This commit is contained in:
Felix
2026-01-18 21:33:56 +08:00
parent 9904be7893
commit 3728ed54d1
6 changed files with 272 additions and 19 deletions

View File

@@ -14,7 +14,7 @@ class AuditLog(Base):
__tablename__ = 'audit_log'
id: Mapped[snowflake_id_key] = mapped_column(init=False, primary_key=True)
api_type: Mapped[str] = mapped_column(String(20), nullable=False, comment="API类型: recognition embedding assessment")
api_type: Mapped[str] = mapped_column(String(50), nullable=False, comment="API类型: recognition embedding assessment")
model_name: Mapped[str] = mapped_column(String(50), nullable=False, comment="模型名称")
request_data: Mapped[Optional[dict]] = mapped_column(MySQLJSON, comment="请求数据")
response_data: Mapped[Optional[dict]] = mapped_column(MySQLJSON, comment="响应数据")

View File

@@ -1,7 +1,19 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from fastapi import APIRouter, Request, Query
from backend.app.ai.schema.qa import CreateQaExerciseRequest, CreateQaExerciseTaskResponse, QaExerciseSchema, QaExerciseWithQuestionsSchema, QaQuestionSchema, QaSessionSchema, CreateAttemptRequest, TaskStatusResponse, QuestionLatestResultResponse
from backend.app.ai.schema.qa import (
CreateQaExerciseRequest,
CreateQaExerciseTaskResponse,
QaExerciseSchema,
QaExerciseWithQuestionsSchema,
QaQuestionSchema,
QaSessionSchema,
CreateAttemptRequest,
TaskStatusResponse,
QuestionLatestResultResponse,
ImageConversationInitRequest,
ImageConversationInitResponse,
)
from backend.common.response.response_schema import response_base, ResponseSchemaModel
from backend.common.security.jwt import DependsJwtAuth
from backend.app.ai.service.qa_service import qa_service
@@ -15,6 +27,19 @@ async def create_exercise_task(request: Request, obj: CreateQaExerciseRequest) -
return response_base.success(data=CreateQaExerciseTaskResponse(**res))
@router.post('/conversations/setting', summary='获取图片自由对话配置', dependencies=[DependsJwtAuth])
async def get_conversation_setting(request: Request, obj: ImageConversationInitRequest) -> ResponseSchemaModel[ImageConversationInitResponse | None]:
res = await qa_service.get_conversation_setting(image_id=obj.image_id, user_id=request.user.id)
if not res:
return response_base.success(data=None)
data = ImageConversationInitResponse(
image_id=res["image_id"],
setting=res["setting"],
latest_session=res.get("latest_session"),
)
return response_base.success(data=data)
@router.get('/exercises/tasks/{task_id}/status', summary='查询练习任务状态', dependencies=[DependsJwtAuth])
async def get_exercise_task_status(task_id: int) -> ResponseSchemaModel[TaskStatusResponse]:
res = await qa_service.get_task_status(task_id)

View File

@@ -134,6 +134,23 @@ class QaPracticeSessionCRUD(CRUDPlus[QaPracticeSession]):
result = await db.execute(stmt)
return result.scalars().first()
async def get_latest_session_by_image_user(self, db: AsyncSession, user_id: int, image_id: int, exercise_type: Optional[str] = None) -> Optional[QaPracticeSession]:
stmt = (
select(QaPracticeSession)
.join(QaExercise, QaPracticeSession.exercise_id == QaExercise.id)
.where(
and_(
QaPracticeSession.starter_user_id == user_id,
QaExercise.image_id == image_id,
)
)
)
if exercise_type:
stmt = stmt.where(QaExercise.type == exercise_type)
stmt = stmt.order_by(QaPracticeSession.id.desc()).limit(1)
result = await db.execute(stmt)
return result.scalars().first()
qa_session_dao = QaPracticeSessionCRUD(QaPracticeSession)
qa_attempt_dao = QaQuestionAttemptCRUD(QaQuestionAttempt)

View File

@@ -121,10 +121,12 @@ class IncorrectSelectionItem(SchemaBase):
error_type: Optional[str] = None
error_reason: Optional[str] = None
class SelectedDetail(SchemaBase):
correct: List[str] = []
incorrect: List[IncorrectSelectionItem] = []
class EvaluationSchema(SchemaBase):
type: Optional[str] = None
result: Optional[str] = None
@@ -133,7 +135,40 @@ class EvaluationSchema(SchemaBase):
missing_correct: Optional[List[str]] = None
feedback: Optional[str] = None
# Pydantic forward references resolution
class ImageConversationInitRequest(SchemaBase):
image_id: int
class ImageConversationEventSchema(SchemaBase):
event_en: str
event_zh: str
conversation_direction_en: str
conversation_direction_zh: str
class ImageConversationObjectSchema(SchemaBase):
object_en: str
object_zh: str
class ImageConversationSceneSchema(SchemaBase):
scene_en: str
scene_zh: str
class ImageConversationAnalysisSchema(SchemaBase):
core_objects: List[ImageConversationObjectSchema] = []
all_possible_scenes: List[ImageConversationSceneSchema] = []
all_possible_events: List[ImageConversationEventSchema] = []
class ImageConversationInitResponse(SchemaBase):
image_id: int
setting: ImageConversationAnalysisSchema
latest_session: Optional[Dict[str, Any]] = None
CreateAttemptTaskResponse.model_rebuild()
AttemptResultResponse.model_rebuild()
QuestionEvaluationResponse.model_rebuild()

View File

@@ -34,6 +34,7 @@ from backend.app.ai.model.image_task import ImageProcessingTask
from backend.app.ai.model.qa import QaQuestion
from backend.core.prompts.qa_exercise import get_qa_exercise_prompt
from backend.core.prompts.recognition import get_conversation_prompt_for_image_dialogue
from backend.app.ai.tools.qa_tool import SceneVariationGenerator, Illustrator
class QaExerciseProcessor(TaskProcessor):
@@ -164,16 +165,140 @@ class SceneVariationProcessor(TaskProcessor):
return {'count': count, 'token_usage': token_usage}, token_usage
class ConversationInitProcessor(TaskProcessor):
async def process(self, db: AsyncSession, task: ImageProcessingTask) -> Tuple[Dict[str, Any], Dict[str, Any]]:
image = await image_dao.get(db, task.image_id)
if not image:
raise errors.NotFoundError(msg="Image not found")
details = dict(image.details or {})
rr = (details.get("recognition_result") or {}) if details else {}
description = ""
scene_tags: List[str] = []
try:
d = rr.get("description")
if isinstance(d, str):
description = d
elif isinstance(d, list) and d:
description = d[0] if isinstance(d[0], str) else ""
except Exception:
description = ""
try:
tags = rr.get("scene_tag")
if isinstance(tags, list):
scene_tags = [str(t) for t in tags]
elif isinstance(tags, str):
scene_tags = [tags]
except Exception:
scene_tags = []
payload = {
"description": description,
"scene_tags": scene_tags,
}
prompt = get_conversation_prompt_for_image_dialogue(payload)
res = await self._call_llm_chat(prompt=prompt, image_id=image.id, user_id=task.user_id, chat_type="image_conversation_analysis")
if not res.get("success"):
raise Exception(res.get("error") or "LLM call failed")
token_usage = res.get("token_usage") or {}
try:
parsed = json.loads(res.get("result")) if isinstance(res.get("result"), str) else res.get("result")
except Exception:
parsed = {}
image_analysis = parsed.get("image_analysis") if isinstance(parsed, dict) else None
if not isinstance(image_analysis, dict):
raise Exception("Invalid image_analysis structure")
new_details = dict(details)
new_details["conversation_analysis"] = {
"image_analysis": image_analysis,
}
image.details = new_details
try:
from sqlalchemy.orm.attributes import flag_modified
flag_modified(image, "details")
except Exception:
pass
await db.flush()
result = {"image_analysis": image_analysis, "token_usage": token_usage}
return result, token_usage
async def _call_llm_chat(self, prompt: str, image_id: int, user_id: int, chat_type: str) -> Dict[str, Any]:
messages = [
SystemMessage(content="You are a helpful assistant."),
HumanMessage(content=prompt)
]
metadata = {
"image_id": image_id,
"user_id": user_id,
"api_type": chat_type,
"model_name": settings.LLM_MODEL_TYPE
}
try:
llm = LLMFactory.create_llm(settings.LLM_MODEL_TYPE)
res = await llm.ainvoke(
messages,
config={"callbacks": [AuditLogCallbackHandler(metadata=metadata)]}
)
content = res.content
if not isinstance(content, str):
content = str(content)
token_usage = {}
if res.response_metadata:
token_usage = res.response_metadata.get("token_usage") or res.response_metadata.get("usage") or {}
return {
"success": True,
"result": content,
"token_usage": token_usage
}
except Exception as e:
return {"success": False, "error": str(e)}
class QaService:
async def get_conversation_setting(self, image_id: int, user_id: int) -> Optional[Dict[str, Any]]:
async with async_db_session() as db:
task = await image_task_dao.get_by_image_id(db, image_id)
if not task or task.user_id != user_id:
raise errors.ForbiddenError(msg="Forbidden")
image = await image_dao.get(db, image_id)
if not image:
raise errors.NotFoundError(msg="Image not found")
details = dict(image.details or {})
existing = details.get("conversation_analysis") or {}
existing_analysis = existing.get("image_analysis")
if not isinstance(existing_analysis, dict):
return None
# Find latest conversation session
latest_session_info = None
session = await qa_session_dao.get_latest_session_by_image_user(db, user_id, image_id, exercise_type='free_conversation')
if session:
latest_session_info = {
'session_id': str(session.id),
'status': session.status,
'updated_at': session.completed_at.isoformat() if session.completed_at else (session.started_at.isoformat() if session.started_at else None),
'exercise_id': str(session.exercise_id),
}
return {
"image_id": image_id,
"setting": existing_analysis,
"latest_session": latest_session_info,
}
async def create_exercise_task(self, image_id: int, user_id: int, type: Optional[str] = "scene_basic") -> Dict[str, Any]:
is_conversation_init = type == 'init_conversion'
async with async_db_session.begin() as db:
# Check for existing active task
latest_task = await image_task_dao.get_latest_active_task(db, user_id, image_id, 'qa_exercise')
ref_type_for_lookup = 'image_conversation_analysis' if is_conversation_init else 'qa_exercise'
latest_task = await image_task_dao.get_latest_active_task(db, user_id, image_id, ref_type_for_lookup)
if latest_task:
# existing_exercise = await qa_exercise_dao.get(db, latest_task.ref_id)
# if existing_exercise and existing_exercise.type != type:
# raise errors.ForbiddenError(msg='当前正在进行其他类型的任务,请等待完成后再试')
return {'task_id': str(latest_task.id), 'status': latest_task.status}
if not await points_service.check_sufficient_points(user_id, LLM_CHAT_COST):
@@ -187,21 +312,27 @@ class QaService:
image = await image_dao.get(db, image_id)
if not image:
raise errors.NotFoundError(msg='Image not found')
exercise = await qa_exercise_dao.create(db, {
'image_id': image_id,
'created_by': user_id,
'type': type,
'description': None,
'status': 'draft',
'ext': None
})
await db.flush()
if is_conversation_init:
ref_type = 'image_conversation_analysis'
ref_id = image_id
else:
exercise = await qa_exercise_dao.create(db, {
'image_id': image_id,
'created_by': user_id,
'type': type,
'description': None,
'status': 'draft',
'ext': None
})
await db.flush()
ref_type = 'qa_exercise'
ref_id = exercise.id
task = await image_task_dao.create_task(db, CreateImageTaskParam(
image_id=image_id,
user_id=user_id,
dict_level=(getattr(getattr(image, 'dict_level', None), 'name', None) or 'LEVEL1'),
ref_type='qa_exercise',
ref_id=exercise.id,
ref_type=ref_type,
ref_id=ref_id,
status=ImageTaskStatus.PENDING,
))
await db.flush()
@@ -210,6 +341,8 @@ class QaService:
if type == 'scene_variation':
processor = SceneVariationProcessor()
elif is_conversation_init:
processor = ConversationInitProcessor()
else:
processor = QaExerciseProcessor()

View File

@@ -125,6 +125,49 @@ level2 (Intermediate):
{"functionTags":["询问","索要物品"],"sceneExplanation":"该句型适用于咖啡厅/餐厅场景向服务人员礼貌索要菜单比直接说“Give me the menu”更得体适配所有餐饮消费场景的基础沟通。"}
"""
)
else:
return ""
def get_conversation_prompt_for_image_dialogue(payload: dict) -> str:
description = payload.get("description") or ""
scene_tags = payload.get("scene_tags") or []
scene_str = ", ".join(scene_tags) if scene_tags else ""
base = f"""
Refer to the description of the picture. Analyze the uploaded image to comprehensively identify all possible scene types and all possible events that are logically feasible in daily life, without binding scenes to events (i.e., one event can match multiple scenes, and one scene can correspond to multiple events). All results must include both English and Chinese to serve as flexible optional tags for users to start English conversations, ensuring relevance to the image content and practicality for daily communication practice.
Picture Description: {description}.
// Analysis Rules (Must Follow Strictly)
Core Object Identification Rules:
Extract 3-5 core objects from the image (the most prominent and representative objects, e.g., menu, laptop, shopping bag, cake).
Provide both English name and Chinese translation for each core object (format: object_enobject_zh), which serves as the basis for inferring scenes and events.
Scene Identification Rules:
Identify 3-6 possible scenes based on the core objects and visual elements of the image; scenes can be general or specific (e.g., if core objects include "menu, steak", scenes can cover restaurant, café, food court, home kitchen).
Scenes must be common daily/office scenarios (avoid rare or abstract scenes like "space station").
Provide both English name and Chinese translation for each scene (format: scene_enscene_zh), and do not limit the number of events matching each scene.
Event Identification Rules:
Identify 5-8 possible events that are logically feasible in daily life; events can be loosely associated with the images core objects (e.g., even if the image shows a restaurant, events can include dining with friends, blind date, working remotely, celebrating a promotion).
Events must be specific and actionable (avoid vague descriptions like "doing something").
For each event, provide English name + Chinese translation + bilingual brief conversation direction (10-20 words per direction, explaining the focus of the conversation for this event).
No need to bind events to specific scenes; prioritize enriching event diversity to expand users' conversation options.
Output Constraints
Only return a JSON string (no explanatory text, no extra comments).
Ensure the JSON can be directly parsed by JSON.parse.
Strictly control the quantity of scenes and events within the specified range to avoid overwhelming users with options.
Output JSON Structure:
{{
"image_analysis": {{
"core_objects": [ {{"object_en": "xxx", "object_zh": "xxx"}}, ...], // 4-7 core objects, bilingual
"all_possible_scenes": [{{"scene_en": "xxx", "scene_zh": "xxx"}}, ...], // 4-7 scenes, bilingual, independent
"all_possible_events": [
{{
"event_en": "string", // English event name (e.g., "dining with friends")
"event_zh": "string", // Chinese event name (e.g., "和朋友聚餐")
"conversation_direction_en": "string", // English conversation focus (e.g., "talking about food taste and restaurant recommendations")
"conversation_direction_zh": "string" // Chinese conversation focus (e.g., "谈论食物口味和餐厅推荐")
}}, ...// 4-7 events in total, independent of scenes
]
}}
}}
"""
return base