fix code

2026-01-18 21:33:56 +08:00
parent 9904be7893
commit 3728ed54d1
6 changed files with 272 additions and 19 deletions
--- a/backend/app/admin/model/audit_log.py
+++ b/backend/app/admin/model/audit_log.py
@@ -14,7 +14,7 @@ class AuditLog(Base):
    __tablename__ = 'audit_log'

    id: Mapped[snowflake_id_key] = mapped_column(init=False, primary_key=True)
-    api_type: Mapped[str] = mapped_column(String(20), nullable=False, comment="API类型: recognition embedding assessment")
+    api_type: Mapped[str] = mapped_column(String(50), nullable=False, comment="API类型: recognition embedding assessment")
    model_name: Mapped[str] = mapped_column(String(50), nullable=False, comment="模型名称")
    request_data: Mapped[Optional[dict]] = mapped_column(MySQLJSON, comment="请求数据")
    response_data: Mapped[Optional[dict]] = mapped_column(MySQLJSON, comment="响应数据")
--- a/backend/app/ai/api/qa.py
+++ b/backend/app/ai/api/qa.py
@@ -1,7 +1,19 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 from fastapi import APIRouter, Request, Query
-from backend.app.ai.schema.qa import CreateQaExerciseRequest, CreateQaExerciseTaskResponse, QaExerciseSchema, QaExerciseWithQuestionsSchema, QaQuestionSchema, QaSessionSchema, CreateAttemptRequest, TaskStatusResponse, QuestionLatestResultResponse
+from backend.app.ai.schema.qa import (
+    CreateQaExerciseRequest,
+    CreateQaExerciseTaskResponse,
+    QaExerciseSchema,
+    QaExerciseWithQuestionsSchema,
+    QaQuestionSchema,
+    QaSessionSchema,
+    CreateAttemptRequest,
+    TaskStatusResponse,
+    QuestionLatestResultResponse,
+    ImageConversationInitRequest,
+    ImageConversationInitResponse,
+)
 from backend.common.response.response_schema import response_base, ResponseSchemaModel
 from backend.common.security.jwt import DependsJwtAuth
 from backend.app.ai.service.qa_service import qa_service
@@ -15,6 +27,19 @@ async def create_exercise_task(request: Request, obj: CreateQaExerciseRequest) -
    return response_base.success(data=CreateQaExerciseTaskResponse(**res))


+@router.post('/conversations/setting', summary='获取图片自由对话配置', dependencies=[DependsJwtAuth])
+async def get_conversation_setting(request: Request, obj: ImageConversationInitRequest) -> ResponseSchemaModel[ImageConversationInitResponse | None]:
+    res = await qa_service.get_conversation_setting(image_id=obj.image_id, user_id=request.user.id)
+    if not res:
+        return response_base.success(data=None)
+    data = ImageConversationInitResponse(
+        image_id=res["image_id"],
+        setting=res["setting"],
+        latest_session=res.get("latest_session"),
+    )
+    return response_base.success(data=data)
+
+
@router.get('/exercises/tasks/{task_id}/status', summary='查询练习任务状态', dependencies=[DependsJwtAuth])
 async def get_exercise_task_status(task_id: int) -> ResponseSchemaModel[TaskStatusResponse]:
    res = await qa_service.get_task_status(task_id)
--- a/backend/app/ai/crud/qa_crud.py
+++ b/backend/app/ai/crud/qa_crud.py
@@ -134,6 +134,23 @@ class QaPracticeSessionCRUD(CRUDPlus[QaPracticeSession]):
        result = await db.execute(stmt)
        return result.scalars().first()

+    async def get_latest_session_by_image_user(self, db: AsyncSession, user_id: int, image_id: int, exercise_type: Optional[str] = None) -> Optional[QaPracticeSession]:
+        stmt = (
+            select(QaPracticeSession)
+            .join(QaExercise, QaPracticeSession.exercise_id == QaExercise.id)
+            .where(
+                and_(
+                    QaPracticeSession.starter_user_id == user_id,
+                    QaExercise.image_id == image_id,
+                )
+            )
+        )
+        if exercise_type:
+            stmt = stmt.where(QaExercise.type == exercise_type)
+        stmt = stmt.order_by(QaPracticeSession.id.desc()).limit(1)
+        result = await db.execute(stmt)
+        return result.scalars().first()
+

 qa_session_dao = QaPracticeSessionCRUD(QaPracticeSession)
 qa_attempt_dao = QaQuestionAttemptCRUD(QaQuestionAttempt)
--- a/backend/app/ai/schema/qa.py
+++ b/backend/app/ai/schema/qa.py
@@ -121,10 +121,12 @@ class IncorrectSelectionItem(SchemaBase):
    error_type: Optional[str] = None
    error_reason: Optional[str] = None

+
 class SelectedDetail(SchemaBase):
    correct: List[str] = []
    incorrect: List[IncorrectSelectionItem] = []

+
 class EvaluationSchema(SchemaBase):
    type: Optional[str] = None
    result: Optional[str] = None
@@ -133,7 +135,40 @@ class EvaluationSchema(SchemaBase):
    missing_correct: Optional[List[str]] = None
    feedback: Optional[str] = None

-# Pydantic forward references resolution
+
+class ImageConversationInitRequest(SchemaBase):
+    image_id: int
+
+
+class ImageConversationEventSchema(SchemaBase):
+    event_en: str
+    event_zh: str
+    conversation_direction_en: str
+    conversation_direction_zh: str
+
+
+class ImageConversationObjectSchema(SchemaBase):
+    object_en: str
+    object_zh: str
+
+
+class ImageConversationSceneSchema(SchemaBase):
+    scene_en: str
+    scene_zh: str
+
+
+class ImageConversationAnalysisSchema(SchemaBase):
+    core_objects: List[ImageConversationObjectSchema] = []
+    all_possible_scenes: List[ImageConversationSceneSchema] = []
+    all_possible_events: List[ImageConversationEventSchema] = []
+
+
+class ImageConversationInitResponse(SchemaBase):
+    image_id: int
+    setting: ImageConversationAnalysisSchema
+    latest_session: Optional[Dict[str, Any]] = None
+
+
 CreateAttemptTaskResponse.model_rebuild()
 AttemptResultResponse.model_rebuild()
 QuestionEvaluationResponse.model_rebuild()
--- a/backend/app/ai/service/qa_service.py
+++ b/backend/app/ai/service/qa_service.py
@@ -34,6 +34,7 @@ from backend.app.ai.model.image_task import ImageProcessingTask
 from backend.app.ai.model.qa import QaQuestion

 from backend.core.prompts.qa_exercise import get_qa_exercise_prompt
+from backend.core.prompts.recognition import get_conversation_prompt_for_image_dialogue
 from backend.app.ai.tools.qa_tool import SceneVariationGenerator, Illustrator

 class QaExerciseProcessor(TaskProcessor):
@@ -164,16 +165,140 @@ class SceneVariationProcessor(TaskProcessor):
        
        return {'count': count, 'token_usage': token_usage}, token_usage

+
+class ConversationInitProcessor(TaskProcessor):
+    async def process(self, db: AsyncSession, task: ImageProcessingTask) -> Tuple[Dict[str, Any], Dict[str, Any]]:
+        image = await image_dao.get(db, task.image_id)
+        if not image:
+            raise errors.NotFoundError(msg="Image not found")
+        details = dict(image.details or {})
+        rr = (details.get("recognition_result") or {}) if details else {}
+        description = ""
+        scene_tags: List[str] = []
+        try:
+            d = rr.get("description")
+            if isinstance(d, str):
+                description = d
+            elif isinstance(d, list) and d:
+                description = d[0] if isinstance(d[0], str) else ""
+        except Exception:
+            description = ""
+        try:
+            tags = rr.get("scene_tag")
+            if isinstance(tags, list):
+                scene_tags = [str(t) for t in tags]
+            elif isinstance(tags, str):
+                scene_tags = [tags]
+        except Exception:
+            scene_tags = []
+        payload = {
+            "description": description,
+            "scene_tags": scene_tags,
+        }
+        prompt = get_conversation_prompt_for_image_dialogue(payload)
+        res = await self._call_llm_chat(prompt=prompt, image_id=image.id, user_id=task.user_id, chat_type="image_conversation_analysis")
+        if not res.get("success"):
+            raise Exception(res.get("error") or "LLM call failed")
+        token_usage = res.get("token_usage") or {}
+        try:
+            parsed = json.loads(res.get("result")) if isinstance(res.get("result"), str) else res.get("result")
+        except Exception:
+            parsed = {}
+        image_analysis = parsed.get("image_analysis") if isinstance(parsed, dict) else None
+        if not isinstance(image_analysis, dict):
+            raise Exception("Invalid image_analysis structure")
+        new_details = dict(details)
+        new_details["conversation_analysis"] = {
+            "image_analysis": image_analysis,
+        }
+        image.details = new_details
+        try:
+            from sqlalchemy.orm.attributes import flag_modified
+
+            flag_modified(image, "details")
+        except Exception:
+            pass
+        await db.flush()
+        result = {"image_analysis": image_analysis, "token_usage": token_usage}
+        return result, token_usage
+
+    async def _call_llm_chat(self, prompt: str, image_id: int, user_id: int, chat_type: str) -> Dict[str, Any]:
+        messages = [
+            SystemMessage(content="You are a helpful assistant."),
+            HumanMessage(content=prompt)
+        ]
+        
+        metadata = {
+            "image_id": image_id,
+            "user_id": user_id,
+            "api_type": chat_type,
+            "model_name": settings.LLM_MODEL_TYPE
+        }
+        
+        try:
+            llm = LLMFactory.create_llm(settings.LLM_MODEL_TYPE)
+            res = await llm.ainvoke(
+                messages, 
+                config={"callbacks": [AuditLogCallbackHandler(metadata=metadata)]}
+            )
+            
+            content = res.content
+            if not isinstance(content, str):
+                content = str(content)
+                
+            token_usage = {}
+            if res.response_metadata:
+                 token_usage = res.response_metadata.get("token_usage") or res.response_metadata.get("usage") or {}
+
+            return {
+                "success": True, 
+                "result": content, 
+                "token_usage": token_usage
+            }
+        except Exception as e:
+            return {"success": False, "error": str(e)}
+
 class QaService:
+    async def get_conversation_setting(self, image_id: int, user_id: int) -> Optional[Dict[str, Any]]:
+        async with async_db_session() as db:
+            task = await image_task_dao.get_by_image_id(db, image_id)
+            if not task or task.user_id != user_id:
+                raise errors.ForbiddenError(msg="Forbidden")
+            image = await image_dao.get(db, image_id)
+            if not image:
+                raise errors.NotFoundError(msg="Image not found")
+            details = dict(image.details or {})
+            existing = details.get("conversation_analysis") or {}
+            existing_analysis = existing.get("image_analysis")
+            if not isinstance(existing_analysis, dict):
+                return None
+            
+            # Find latest conversation session
+            latest_session_info = None
+            session = await qa_session_dao.get_latest_session_by_image_user(db, user_id, image_id, exercise_type='free_conversation')
+            if session:
+                latest_session_info = {
+                    'session_id': str(session.id),
+                    'status': session.status,
+                    'updated_at': session.completed_at.isoformat() if session.completed_at else (session.started_at.isoformat() if session.started_at else None),
+                    'exercise_id': str(session.exercise_id),
+                }
+
+            return {
+                "image_id": image_id,
+                "setting": existing_analysis,
+                "latest_session": latest_session_info,
+            }
+
    async def create_exercise_task(self, image_id: int, user_id: int, type: Optional[str] = "scene_basic") -> Dict[str, Any]:

+        is_conversation_init = type == 'init_conversion'
+
        async with async_db_session.begin() as db:
            # Check for existing active task
-            latest_task = await image_task_dao.get_latest_active_task(db, user_id, image_id, 'qa_exercise')
+            ref_type_for_lookup = 'image_conversation_analysis' if is_conversation_init else 'qa_exercise'
+            latest_task = await image_task_dao.get_latest_active_task(db, user_id, image_id, ref_type_for_lookup)
            if latest_task:
-                # existing_exercise = await qa_exercise_dao.get(db, latest_task.ref_id)
-                # if existing_exercise and existing_exercise.type != type:
-                #     raise errors.ForbiddenError(msg='当前正在进行其他类型的任务，请等待完成后再试')
                return {'task_id': str(latest_task.id), 'status': latest_task.status}

        if not await points_service.check_sufficient_points(user_id, LLM_CHAT_COST):
@@ -187,21 +312,27 @@ class QaService:
            image = await image_dao.get(db, image_id)
            if not image:
                raise errors.NotFoundError(msg='Image not found')
-            exercise = await qa_exercise_dao.create(db, {
-                'image_id': image_id,
-                'created_by': user_id,
-                'type': type,
-                'description': None,
-                'status': 'draft',
-                'ext': None
-            })
-            await db.flush()
+            if is_conversation_init:
+                ref_type = 'image_conversation_analysis'
+                ref_id = image_id
+            else:
+                exercise = await qa_exercise_dao.create(db, {
+                    'image_id': image_id,
+                    'created_by': user_id,
+                    'type': type,
+                    'description': None,
+                    'status': 'draft',
+                    'ext': None
+                })
+                await db.flush()
+                ref_type = 'qa_exercise'
+                ref_id = exercise.id
            task = await image_task_dao.create_task(db, CreateImageTaskParam(
                image_id=image_id,
                user_id=user_id,
                dict_level=(getattr(getattr(image, 'dict_level', None), 'name', None) or 'LEVEL1'),
-                ref_type='qa_exercise',
-                ref_id=exercise.id,
+                ref_type=ref_type,
+                ref_id=ref_id,
                status=ImageTaskStatus.PENDING,
            ))
            await db.flush()
@@ -210,6 +341,8 @@ class QaService:
            
        if type == 'scene_variation':
            processor = SceneVariationProcessor()
+        elif is_conversation_init:
+            processor = ConversationInitProcessor()
        else:
            processor = QaExerciseProcessor()
            
--- a/backend/core/prompts/recognition.py
+++ b/backend/core/prompts/recognition.py
@@ -125,6 +125,49 @@ level2 (Intermediate):
 {"functionTags":["询问","索要物品"],"sceneExplanation":"该句型适用于咖啡厅/餐厅场景，向服务人员礼貌索要菜单，比直接说“Give me the menu”更得体，适配所有餐饮消费场景的基础沟通。"}
            """
        )
-    
+        
    else:
        return ""
+
+
+def get_conversation_prompt_for_image_dialogue(payload: dict) -> str:
+    description = payload.get("description") or ""
+    scene_tags = payload.get("scene_tags") or []
+    scene_str = ", ".join(scene_tags) if scene_tags else ""
+    base = f"""
+Refer to the description of the picture. Analyze the uploaded image to comprehensively identify all possible scene types and all possible events that are logically feasible in daily life, without binding scenes to events (i.e., one event can match multiple scenes, and one scene can correspond to multiple events). All results must include both English and Chinese to serve as flexible optional tags for users to start English conversations, ensuring relevance to the image content and practicality for daily communication practice.
+Picture Description: {description}.
+// Analysis Rules (Must Follow Strictly)
+Core Object Identification Rules:
+Extract 3-5 core objects from the image (the most prominent and representative objects, e.g., menu, laptop, shopping bag, cake).
+Provide both English name and Chinese translation for each core object (format: object_en（object_zh）), which serves as the basis for inferring scenes and events.
+Scene Identification Rules:
+Identify 3-6 possible scenes based on the core objects and visual elements of the image; scenes can be general or specific (e.g., if core objects include "menu, steak", scenes can cover restaurant, café, food court, home kitchen).
+Scenes must be common daily/office scenarios (avoid rare or abstract scenes like "space station").
+Provide both English name and Chinese translation for each scene (format: scene_en（scene_zh）), and do not limit the number of events matching each scene.
+Event Identification Rules:
+Identify 5-8 possible events that are logically feasible in daily life; events can be loosely associated with the image’s core objects (e.g., even if the image shows a restaurant, events can include dining with friends, blind date, working remotely, celebrating a promotion).
+Events must be specific and actionable (avoid vague descriptions like "doing something").
+For each event, provide English name + Chinese translation + bilingual brief conversation direction (10-20 words per direction, explaining the focus of the conversation for this event).
+No need to bind events to specific scenes; prioritize enriching event diversity to expand users' conversation options.
+Output Constraints
+Only return a JSON string (no explanatory text, no extra comments).
+Ensure the JSON can be directly parsed by JSON.parse.
+Strictly control the quantity of scenes and events within the specified range to avoid overwhelming users with options.
+Output JSON Structure:
+{{
+"image_analysis": {{
+"core_objects": [ {{"object_en": "xxx", "object_zh": "xxx"}}, ...], // 4-7 core objects, bilingual
+"all_possible_scenes": [{{"scene_en": "xxx", "scene_zh": "xxx"}}, ...], // 4-7 scenes, bilingual, independent
+"all_possible_events": [
+{{
+"event_en": "string", // English event name (e.g., "dining with friends")
+"event_zh": "string", // Chinese event name (e.g., "和朋友聚餐")
+"conversation_direction_en": "string", // English conversation focus (e.g., "talking about food taste and restaurant recommendations")
+"conversation_direction_zh": "string" // Chinese conversation focus (e.g., "谈论食物口味和餐厅推荐")
+}}, ...// 4-7 events in total, independent of scenes
+]
+}}
+}}
+"""
+    return base