add conversation

2026-01-20 20:41:23 +08:00
parent 3728ed54d1
commit 7ade571e13
8 changed files with 935 additions and 12 deletions
--- a/backend/app/ai/api/qa.py
+++ b/backend/app/ai/api/qa.py
@@ -13,6 +13,12 @@ from backend.app.ai.schema.qa import (
    QuestionLatestResultResponse,
    ImageConversationInitRequest,
    ImageConversationInitResponse,
+    ConversationStartRequest,
+    ConversationStartResponse,
+    ConversationSessionSchema,
+    ConversationReplyRequest,
+    ConversationReplyResponse,
+    ConversationLatestResponse,
 )
 from backend.common.response.response_schema import response_base, ResponseSchemaModel
 from backend.common.security.jwt import DependsJwtAuth
@@ -40,6 +46,46 @@ async def get_conversation_setting(request: Request, obj: ImageConversationInitR
    return response_base.success(data=data)


+@router.post('/conversations/start', summary='启动图片自由对话', dependencies=[DependsJwtAuth])
+async def start_conversation(request: Request, obj: ConversationStartRequest) -> ResponseSchemaModel[ConversationStartResponse]:
+    res = await qa_service.start_conversation(
+        image_id=obj.image_id,
+        user_id=request.user.id,
+        scene=obj.scene,
+        event=obj.event,
+        style=obj.style,
+        user_role=obj.user_role,
+        assistant_role=obj.assistant_role,
+        level=obj.level,
+        info=obj.info,
+    )
+    data = ConversationStartResponse(**res)
+    return response_base.success(data=data)
+
+
+@router.post('/conversations/{session_id}/reply', summary='回复图片自由对话', dependencies=[DependsJwtAuth])
+async def reply_conversation(request: Request, session_id: int, obj: ConversationReplyRequest) -> ResponseSchemaModel[ConversationReplyResponse]:
+    res = await qa_service.reply_conversation(
+        session_id=session_id,
+        user_id=request.user.id,
+        input_text=obj.content,
+    )
+    return response_base.success(data=ConversationReplyResponse(**res))
+
+
+@router.get('/conversations/{session_id}/latest', summary='获取图片自由对话最新消息', dependencies=[DependsJwtAuth])
+async def get_conversation_latest(request: Request, session_id: int) -> ResponseSchemaModel[ConversationLatestResponse]:
+    res = await qa_service.get_latest_messages(session_id=session_id, user_id=request.user.id)
+    return response_base.success(data=ConversationLatestResponse(**res))
+
+
+@router.get('/conversations/{session_id}', summary='获取图片自由对话会话信息', dependencies=[DependsJwtAuth])
+async def get_conversation_session(request: Request, session_id: int) -> ResponseSchemaModel[ConversationSessionSchema]:
+    res = await qa_service.get_conversation_session(session_id=session_id, user_id=request.user.id)
+    data = ConversationSessionSchema(**res)
+    return response_base.success(data=data)
+
+
@router.get('/exercises/tasks/{task_id}/status', summary='查询练习任务状态', dependencies=[DependsJwtAuth])
 async def get_exercise_task_status(task_id: int) -> ResponseSchemaModel[TaskStatusResponse]:
    res = await qa_service.get_task_status(task_id)
--- a/backend/app/ai/crud/qa_crud.py
+++ b/backend/app/ai/crud/qa_crud.py
@@ -41,10 +41,15 @@ class QaQuestionCRUD(CRUDPlus[QaQuestion]):
        return inst

    async def get_by_exercise_id(self, db: AsyncSession, exercise_id: int) -> List[QaQuestion]:
-        stmt = select(self.model).where(self.model.exercise_id == exercise_id)
+        stmt = select(self.model).where(self.model.exercise_id == exercise_id).order_by(self.model.id)
        result = await db.execute(stmt)
        return list(result.scalars().all())

+    async def get_latest_by_exercise_id(self, db: AsyncSession, exercise_id: int) -> Optional[QaQuestion]:
+        stmt = select(self.model).where(self.model.exercise_id == exercise_id).order_by(self.model.id.desc()).limit(1)
+        result = await db.execute(stmt)
+        return result.scalars().first()
+

 class QaQuestionAttemptCRUD(CRUDPlus[QaQuestionAttempt]):
    async def get(self, db: AsyncSession, id: int) -> Optional[QaQuestionAttempt]:
--- a/backend/app/ai/model/qa.py
+++ b/backend/app/ai/model/qa.py
@@ -47,7 +47,7 @@ class QaPracticeSession(Base):
    id: Mapped[snowflake_id_key] = mapped_column(BigInteger, init=False, primary_key=True)
    exercise_id: Mapped[int] = mapped_column(BigInteger, ForeignKey('qa_exercise.id'), nullable=False)
    starter_user_id: Mapped[int] = mapped_column(BigInteger, ForeignKey('wx_user.id'), nullable=False)
-    share_id: Mapped[Optional[int]] = mapped_column(BigInteger, nullable=True)
+    share_id: Mapped[Optional[int]] = mapped_column(BigInteger, nullable=True, default=None)
    status: Mapped[str] = mapped_column(String(20), default='ongoing')
    started_at: Mapped[Optional[DateTime]] = mapped_column(DateTime, default=None)
    completed_at: Mapped[Optional[DateTime]] = mapped_column(DateTime, default=None)
@@ -67,8 +67,8 @@ class QaQuestionAttempt(Base):
    question_id: Mapped[int] = mapped_column(BigInteger, ForeignKey('qa_question.id'), nullable=False)
    exercise_id: Mapped[int] = mapped_column(BigInteger, ForeignKey('qa_exercise.id'), nullable=False)
    user_id: Mapped[int] = mapped_column(BigInteger, ForeignKey('wx_user.id'), nullable=False)
-    task_id: Mapped[Optional[int]] = mapped_column(BigInteger, ForeignKey('image_processing_task.id'), nullable=True)
-    recording_id: Mapped[Optional[int]] = mapped_column(BigInteger, ForeignKey('recording.id'), nullable=True)
+    task_id: Mapped[Optional[int]] = mapped_column(BigInteger, ForeignKey('image_processing_task.id'), nullable=True, default=None)
+    recording_id: Mapped[Optional[int]] = mapped_column(BigInteger, ForeignKey('recording.id'), nullable=True, default=None)
    choice_options: Mapped[Optional[list]] = mapped_column(MySQLJSON, default=None)
    cloze_options: Mapped[Optional[str]] = mapped_column(String(100), default=None)
    input_text: Mapped[Optional[str]] = mapped_column(Text, default=None)
--- a/backend/app/ai/schema/qa.py
+++ b/backend/app/ai/schema/qa.py
@@ -141,10 +141,13 @@ class ImageConversationInitRequest(SchemaBase):


 class ImageConversationEventSchema(SchemaBase):
-    event_en: str
-    event_zh: str
-    conversation_direction_en: str
-    conversation_direction_zh: str
+    event_en: Optional[str] = None
+    event_zh: Optional[str] = None
+    conversation_direction_en: Optional[str] = None
+    conversation_direction_zh: Optional[str] = None
+    style_en: Optional[str] = None
+    style_zh: Optional[str] = None
+    suggested_roles: Optional[List[Dict[str, str]]] = []


 class ImageConversationObjectSchema(SchemaBase):
@@ -169,6 +172,73 @@ class ImageConversationInitResponse(SchemaBase):
    latest_session: Optional[Dict[str, Any]] = None


+class ConversationStartRequest(SchemaBase):
+    image_id: int
+    scene: List[str]
+    event: List[str]
+    style: Optional[str] = None
+    user_role: Optional[str] = None
+    assistant_role: Optional[str] = None
+    level: Optional[str] = None
+    info: Optional[str] = None
+
+
+class ConversationAlternativeItemSchema(SchemaBase):
+    alt_en: Optional[str] = None
+    alt_zh: Optional[str] = None
+
+
+class ConversationAlternativeResponsesSchema(SchemaBase):
+    positive: Optional[ConversationAlternativeItemSchema] = None
+    neutral: Optional[ConversationAlternativeItemSchema] = None
+    negative: Optional[ConversationAlternativeItemSchema] = None
+
+
+class FreeConversationContentSchema(SchemaBase):
+    response_en: Optional[str] = None
+    response_zh: Optional[str] = None
+    prompt_en: Optional[str] = None
+    prompt_zh: Optional[str] = None
+    alternative_responses: Optional[ConversationAlternativeResponsesSchema] = None
+    correction: Optional[str] = None
+    text: Optional[str] = None
+
+
+class ConversationMessageSchema(SchemaBase):
+    role: str
+    content: FreeConversationContentSchema
+
+
+class ConversationStartResponse(SchemaBase):
+    task_id: str
+    status: str
+    exercise_id: Optional[str] = None
+
+
+class ConversationReplyRequest(SchemaBase):
+    content: str
+    audio_id: Optional[str] = None
+
+
+class ConversationReplyResponse(SchemaBase):
+    task_id: str
+    status: str
+    session_id: Optional[str] = None
+
+
+class ConversationLatestResponse(SchemaBase):
+    session_id: str
+    messages: List[ConversationMessageSchema]
+
+
+class ConversationSessionSchema(SchemaBase):
+    exercise_id: str
+    session_id: str
+    status: str
+    updated_at: Optional[str] = None
+    messages: List[ConversationMessageSchema] = []
+
+
 CreateAttemptTaskResponse.model_rebuild()
 AttemptResultResponse.model_rebuild()
 QuestionEvaluationResponse.model_rebuild()
--- a/backend/app/ai/service/qa_service.py
+++ b/backend/app/ai/service/qa_service.py
@@ -31,10 +31,11 @@ from backend.common.const import EXERCISE_TYPE_CHOICE, EXERCISE_TYPE_CLOZE, EXER
 from backend.app.admin.schema.wx import DictLevel
 from backend.app.ai.service.image_task_service import TaskProcessor, image_task_service
 from backend.app.ai.model.image_task import ImageProcessingTask
-from backend.app.ai.model.qa import QaQuestion
+from backend.app.ai.model.qa import QaQuestion, QaPracticeSession

 from backend.core.prompts.qa_exercise import get_qa_exercise_prompt
 from backend.core.prompts.recognition import get_conversation_prompt_for_image_dialogue
+from backend.core.prompts.free_conversation import get_free_conversation_start_prompt, get_free_conversation_reply_prompt
 from backend.app.ai.tools.qa_tool import SceneVariationGenerator, Illustrator

 class QaExerciseProcessor(TaskProcessor):
@@ -222,6 +223,7 @@ class ConversationInitProcessor(TaskProcessor):
        result = {"image_analysis": image_analysis, "token_usage": token_usage}
        return result, token_usage

+
    async def _call_llm_chat(self, prompt: str, image_id: int, user_id: int, chat_type: str) -> Dict[str, Any]:
        messages = [
            SystemMessage(content="You are a helpful assistant."),
@@ -258,6 +260,321 @@ class ConversationInitProcessor(TaskProcessor):
        except Exception as e:
            return {"success": False, "error": str(e)}

+class ConversationStartProcessor(TaskProcessor):
+    async def process(self, db: AsyncSession, task: ImageProcessingTask) -> Tuple[Dict[str, Any], Dict[str, Any]]:
+        # task.ref_id is exercise_id
+        exercise_id = task.ref_id
+        exercise = await qa_exercise_dao.get(db, exercise_id)
+        if not exercise:
+            raise errors.NotFoundError(msg="Exercise not found")
+            
+        image = await image_dao.get(db, exercise.image_id)
+        if not image:
+            raise errors.NotFoundError(msg="Image not found")
+        
+        # Parse recognition result for description
+        rr = (image.details or {}).get('recognition_result') or {}
+        description = ''
+        try:
+            d = rr.get('description')
+            if isinstance(d, str):
+                description = d
+            elif isinstance(d, list) and d:
+                description = d[0] if isinstance(d[0], str) else ''
+        except Exception:
+            description = ''
+            
+        params = exercise.ext or {}
+        prompt = get_free_conversation_start_prompt(
+            scene=params.get('scene'),
+            event=params.get('event'),
+            user_role=params.get('user_role'),
+            assistant_role=params.get('assistant_role'),
+            style=params.get('style'),
+            level=params.get('level'),
+            info=params.get('info'),
+            description=description,
+        )
+        
+        res = await self._call_llm_chat(prompt=prompt, image_id=image.id, user_id=task.user_id, chat_type='conversation_start')
+        if not res.get('success'):
+            raise Exception(res.get('error') or "LLM call failed")
+            
+        token_usage = res.get('token_usage') or {}
+        try:
+            parsed = json.loads(res.get('result')) if isinstance(res.get('result'), str) else res.get('result')
+        except Exception:
+            parsed = {}
+            
+        if not parsed or not isinstance(parsed, dict):
+            raise Exception("Invalid LLM response format")
+            
+        # Update Exercise Status
+        exercise.status = 'published'
+        exercise.question_count = 1
+        
+        # Get or Create Session
+        session = await qa_session_dao.get_latest_by_user_exercise(db, task.user_id, exercise.id)
+        if session:
+            session.status = 'ongoing'
+            prog = dict(session.progress or {})
+            prog['total_questions'] = 1
+            session.progress = prog
+        else:
+            prog = {'current_index': 0, 'answered': 0, 'correct': 0, 'attempts': [], 'total_questions': 1}
+            session = await qa_session_dao.create(db, {
+                'exercise_id': exercise.id,
+                'starter_user_id': task.user_id,
+                'status': 'ongoing',
+                'started_at': datetime.now(),
+                'progress': prog,
+                'ext': None,
+            })
+        
+        # Create First Question (AI Message)
+        question_content = parsed.get('response_en') or ''
+        question_ext = {
+            'role': 'assistant',
+            'response_zh': parsed.get('response_zh'),
+            'prompt_en': parsed.get('prompt_en'),
+            'prompt_zh': parsed.get('prompt_zh'),
+            'alternative_responses': parsed.get('alternative_responses'),
+            'correction': parsed.get('correction'),
+        }
+        
+        await qa_question_dao.create(db, {
+            'exercise_id': exercise.id,
+            'image_id': image.id,
+            'question': question_content,
+            'user_id': task.user_id,
+            'payload': None,
+            'ext': question_ext,
+        })
+        
+        await db.flush()
+        
+        result = {
+            'exercise_id': str(exercise.id),
+            'session_id': str(session.id),
+            'token_usage': token_usage
+        }
+        return result, token_usage
+
+    async def _call_llm_chat(self, prompt: str, image_id: int, user_id: int, chat_type: str) -> Dict[str, Any]:
+        messages = [
+            SystemMessage(content="You are a helpful assistant."),
+            HumanMessage(content=prompt)
+        ]
+        metadata = {
+            "image_id": image_id,
+            "user_id": user_id,
+            "api_type": chat_type,
+            "model_name": settings.LLM_MODEL_TYPE
+        }
+        try:
+            llm = LLMFactory.create_llm(settings.LLM_MODEL_TYPE)
+            res = await llm.ainvoke(
+                messages, 
+                config={"callbacks": [AuditLogCallbackHandler(metadata=metadata)]}
+            )
+            content = res.content
+            if not isinstance(content, str):
+                content = str(content)
+            token_usage = {}
+            if res.response_metadata:
+                 token_usage = res.response_metadata.get("token_usage") or res.response_metadata.get("usage") or {}
+            return {"success": True, "result": content, "token_usage": token_usage}
+        except Exception as e:
+            return {"success": False, "error": str(e)}
+
+
+class ConversationReplyProcessor(TaskProcessor):
+    async def process(self, db: AsyncSession, task: ImageProcessingTask) -> Tuple[Dict[str, Any], Dict[str, Any]]:
+        # task.ref_id is attempt_id
+        attempt_id = task.ref_id
+        attempt = await qa_attempt_dao.get(db, attempt_id)
+        if not attempt:
+            raise errors.NotFoundError(msg="Attempt not found")
+            
+        exercise = await qa_exercise_dao.get(db, attempt.exercise_id)
+        if not exercise:
+            raise errors.NotFoundError(msg="Exercise not found")
+            
+        image = await image_dao.get(db, exercise.image_id)
+        
+        # Get Session (to update timestamp/progress)
+        session = await qa_session_dao.get_latest_by_user_exercise(db, task.user_id, exercise.id)
+        if not session:
+            # Should not happen in normal flow, but maybe session closed?
+            # Or creating a new session?
+            # We assume session exists.
+            pass
+
+        # Parse recognition result for description
+        rr = (image.details or {}).get('recognition_result') or {}
+        description = ''
+        try:
+            d = rr.get('description')
+            if isinstance(d, str):
+                description = d
+            elif isinstance(d, list) and d:
+                description = d[0] if isinstance(d[0], str) else ''
+        except Exception:
+            description = ''
+            
+        # Get history
+        questions = await qa_question_dao.get_by_exercise_id(db, exercise.id)
+        
+        history = []
+        # We need to build history excluding the current attempt (which is linked to the LAST question)
+        # The current attempt is for the LAST question in `questions`.
+        # So we iterate up to the second to last question for pairs, 
+        # and then handle the last question.
+        
+        # Actually, `questions` includes ALL questions.
+        # The attempt we just created is for `questions[-1]`.
+        # So `questions[-1]` is the AI message we are replying to.
+        # We need to construct history for the prompt.
+        
+        # The prompt expects:
+        # History:
+        # AI: ...
+        # User: ...
+        # AI: ...
+        # (Current AI message is NOT in history of prompt? 
+        # Wait, prompt says "History" then "User's New Input".
+        # If I am replying to AI's "Hello", "Hello" should be in history?
+        # Usually yes.
+        # The `get_free_conversation_reply_prompt` implementation:
+        # history_str += f"{role}: {content}\n"
+        # User's New Input is appended.
+        # So `history` should contain everything BEFORE User's New Input.
+        # That includes the AI message user is replying to.
+        
+        for i, q in enumerate(questions):
+            # AI Message
+            history.append({
+                'role': 'assistant',
+                'content': q.question
+            })
+            
+            # If this is the last question, it's the one we are replying to.
+            # We don't add a user attempt for it in `history` list, 
+            # because the "User's New Input" IS the attempt.
+            if i == len(questions) - 1:
+                break
+                
+            # For previous questions, find their attempt
+            prev_attempt = await qa_attempt_dao.get_latest_completed_by_user_question(db, task.user_id, q.id)
+            if prev_attempt:
+                history.append({
+                    'role': 'user',
+                    'content': prev_attempt.input_text
+                })
+        
+        user_input = attempt.input_text or ''
+        
+        params = exercise.ext or {}
+        
+        prompt = get_free_conversation_reply_prompt(
+            history=history, 
+            user_input=user_input,
+            scene=params.get('scene'),
+            event=params.get('event'),
+            user_role=params.get('user_role'),
+            assistant_role=params.get('assistant_role'),
+            style=params.get('style'),
+            level=params.get('level'),
+            info=params.get('info'),
+            description=description,
+        )
+        
+        res = await self._call_llm_chat(prompt=prompt, image_id=image.id, user_id=task.user_id, chat_type='conversation_reply')
+        if not res.get('success'):
+            raise Exception(res.get('error') or "LLM call failed")
+            
+        token_usage = res.get('token_usage') or {}
+        try:
+            parsed = json.loads(res.get('result')) if isinstance(res.get('result'), str) else res.get('result')
+        except Exception:
+            parsed = {}
+            
+        if not parsed or not isinstance(parsed, dict):
+            raise Exception("Invalid LLM response format")
+            
+        # Update attempt with correction
+        correction = parsed.get('correction')
+        if correction:
+             new_ext = dict(attempt.ext or {})
+             new_ext['correction'] = correction
+             attempt.ext = new_ext
+             attempt.evaluation = {'correction': correction}
+        
+        attempt.status = 'completed' # It was pending
+             
+        # Create New Question (AI Response)
+        question_content = parsed.get('response_en') or ''
+        question_ext = {
+            'role': 'assistant',
+            'response_zh': parsed.get('response_zh'),
+            'prompt_en': parsed.get('prompt_en'),
+            'prompt_zh': parsed.get('prompt_zh'),
+            'alternative_responses': parsed.get('alternative_responses'),
+            'correction': correction,
+        }
+        
+        new_question = await qa_question_dao.create(db, {
+            'exercise_id': exercise.id,
+            'image_id': image.id,
+            'question': question_content,
+            'user_id': task.user_id,
+            'payload': None,
+            'ext': question_ext,
+        })
+        
+        # Update Session
+        if session:
+            session.updated_at = datetime.now()
+            prog = dict(session.progress or {})
+            prog['total_questions'] = (prog.get('total_questions') or 0) + 1
+            session.progress = prog
+        
+        await db.flush()
+        
+        result = {
+            'session_id': str(session.id) if session else '',
+            'new_question_id': str(new_question.id),
+            'token_usage': token_usage
+        }
+        return result, token_usage
+
+    async def _call_llm_chat(self, prompt: str, image_id: int, user_id: int, chat_type: str) -> Dict[str, Any]:
+        messages = [
+            SystemMessage(content="You are a helpful assistant."),
+            HumanMessage(content=prompt)
+        ]
+        metadata = {
+            "image_id": image_id,
+            "user_id": user_id,
+            "api_type": chat_type,
+            "model_name": settings.LLM_MODEL_TYPE
+        }
+        try:
+            llm = LLMFactory.create_llm(settings.LLM_MODEL_TYPE)
+            res = await llm.ainvoke(
+                messages, 
+                config={"callbacks": [AuditLogCallbackHandler(metadata=metadata)]}
+            )
+            content = res.content
+            if not isinstance(content, str):
+                content = str(content)
+            token_usage = {}
+            if res.response_metadata:
+                 token_usage = res.response_metadata.get("token_usage") or res.response_metadata.get("usage") or {}
+            return {"success": True, "result": content, "token_usage": token_usage}
+        except Exception as e:
+            return {"success": False, "error": str(e)}
+
 class QaService:
    async def get_conversation_setting(self, image_id: int, user_id: int) -> Optional[Dict[str, Any]]:
        async with async_db_session() as db:
@@ -290,6 +607,223 @@ class QaService:
                "latest_session": latest_session_info,
            }

+    async def start_conversation(
+        self,
+        image_id: int,
+        user_id: int,
+        scene: List[str],
+        event: List[str],
+        style: Optional[str] = None,
+        user_role: Optional[str] = None,
+        assistant_role: Optional[str] = None,
+        level: Optional[str] = None,
+        info: Optional[str] = None,
+    ) -> Dict[str, Any]:
+        # Check points and rate limit
+        if not await points_service.check_sufficient_points(user_id, LLM_CHAT_COST):
+            raise errors.ForbiddenError(msg='积分不足，请获取积分后继续使用')
+            
+        slot_acquired = await rate_limit_service.acquire_task_slot(user_id)
+        if not slot_acquired:
+             max_tasks = await rate_limit_service.get_user_task_limit(user_id)
+             raise errors.ForbiddenError(msg=f'用户同时最多只能运行 {max_tasks} 个任务，请等待现有任务完成后再试')
+
+        async with async_db_session.begin() as db:
+            image = await image_dao.get(db, image_id)
+            if not image:
+                raise errors.NotFoundError(msg="Image not found")
+                
+            # Create Exercise
+            exercise = await qa_exercise_dao.create(db, {
+                "image_id": image_id,
+                "created_by": user_id,
+                "type": "free_conversation",
+                "description": None,
+                "status": "ongoing", 
+                "ext": {
+                    "scene": scene,
+                    "event": event,
+                    "user_role": user_role,
+                    "assistant_role": assistant_role,
+                    "style": style,
+                    "level": level,
+                    "info": info,
+                },
+            })
+            await db.flush()
+            
+            # Create Session (Pre-create to return session_id immediately)
+            prog = {'current_index': 0, 'answered': 0, 'correct': 0, 'attempts': [], 'total_questions': 0}
+            session = await qa_session_dao.create(db, {
+                'exercise_id': exercise.id,
+                'starter_user_id': user_id,
+                'status': 'initializing',
+                'started_at': datetime.now(),
+                'progress': prog,
+                'ext': None,
+            })
+            await db.flush()
+
+            # Create Task
+            task = await image_task_dao.create_task(db, CreateImageTaskParam(
+                image_id=image_id,
+                user_id=user_id,
+                dict_level=(getattr(getattr(image, 'dict_level', None), 'name', None) or 'LEVEL1'),
+                ref_type='qa_exercise',
+                ref_id=exercise.id,
+                status=ImageTaskStatus.PENDING,
+            ))
+            await db.flush()
+            task_id = task.id
+            
+        # Dispatch Task
+        asyncio.create_task(image_task_service.process_task(task_id, user_id, ConversationStartProcessor()))
+        
+        return {
+            "task_id": str(task_id),
+            "status": "processing",
+            "exercise_id": str(exercise.id),
+            "session_id": str(session.id)
+        }
+
+    async def reply_conversation(
+        self,
+        session_id: int,
+        user_id: int,
+        input_text: str,
+    ) -> Dict[str, Any]:
+        # Check points and rate limit
+        if not await points_service.check_sufficient_points(user_id, LLM_CHAT_COST):
+            raise errors.ForbiddenError(msg='积分不足，请获取积分后继续使用')
+            
+        slot_acquired = await rate_limit_service.acquire_task_slot(user_id)
+        if not slot_acquired:
+             max_tasks = await rate_limit_service.get_user_task_limit(user_id)
+             raise errors.ForbiddenError(msg=f'用户同时最多只能运行 {max_tasks} 个任务，请等待现有任务完成后再试')
+
+        async with async_db_session.begin() as db:
+             session = await qa_session_dao.get(db, session_id)
+             if not session:
+                 raise errors.NotFoundError(msg="Session not found")
+             if session.starter_user_id != user_id:
+                 raise errors.ForbiddenError(msg="Forbidden")
+                 
+             exercise = await qa_exercise_dao.get(db, session.exercise_id)
+             
+             # Create Attempt (User Input)
+             # Link to the last question (the one AI asked)
+             last_question = await qa_question_dao.get_latest_by_exercise_id(db, exercise.id)
+             if not last_question:
+                 raise errors.ServerError(msg="No question to reply to")
+                 
+             attempt = await qa_attempt_dao.create(db, {
+                 "user_id": user_id,
+                 "question_id": last_question.id,
+                 "exercise_id": exercise.id,
+                 "input_text": input_text,
+                 "status": "pending", 
+                 "evaluation": None,
+                 "ext": None
+             })
+             await db.flush()
+             
+             # Create Task
+             task = await image_task_dao.create_task(db, CreateImageTaskParam(
+                image_id=exercise.image_id,
+                user_id=user_id,
+                dict_level='LEVEL1', # Default or fetch from image
+                ref_type='qa_attempt',
+                ref_id=attempt.id,
+                status=ImageTaskStatus.PENDING,
+            ))
+             await db.flush()
+             task_id = task.id
+            
+        asyncio.create_task(image_task_service.process_task(task_id, user_id, ConversationReplyProcessor()))
+         
+        return {
+            "task_id": str(task_id),
+            "status": "processing",
+            "session_id": str(session.id)
+        }
+
+    async def _get_messages_for_session(self, db: AsyncSession, exercise_id: int, user_id: int) -> List[Dict[str, Any]]:
+        questions = await qa_question_dao.get_by_exercise_id(db, exercise_id)
+        
+        messages = []
+        for q in questions:
+            # AI Message
+            ext = q.ext or {}
+            messages.append({
+                "role": "assistant",
+                "content": {
+                    "response_en": q.question,
+                    "response_zh": ext.get("response_zh"),
+                    "prompt_en": ext.get("prompt_en"),
+                    "prompt_zh": ext.get("prompt_zh"),
+                    "alternative_responses": ext.get("alternative_responses"),
+                    "correction": ext.get("correction"),
+                }
+            })
+            
+            # User Reply (Attempt)
+            attempt = await qa_attempt_dao.get_latest_completed_by_user_question(db, user_id, q.id)
+            if attempt:
+                messages.append({
+                    "role": "user",
+                    "content": {
+                        "text": attempt.input_text,
+                        "correction": (attempt.evaluation or {}).get("correction")
+                    }
+                })
+        return messages
+
+    async def get_latest_messages(self, session_id: int, user_id: int) -> Dict[str, Any]:
+        async with async_db_session() as db:
+            session = await qa_session_dao.get(db, session_id)
+            if not session or session.starter_user_id != user_id:
+                raise errors.NotFoundError(msg="Session not found")
+            
+            # Optimization: Directly fetch the latest question from DB to avoid loading full history
+            latest_q = await qa_question_dao.get_latest_by_exercise_id(db, session.exercise_id)
+            
+            latest_messages = []
+            if latest_q:
+                ext = latest_q.ext or {}
+                latest_messages.append({
+                    "role": "assistant",
+                    "content": {
+                        "response_en": latest_q.question,
+                        "response_zh": ext.get("response_zh"),
+                        "prompt_en": ext.get("prompt_en"),
+                        "prompt_zh": ext.get("prompt_zh"),
+                        "alternative_responses": ext.get("alternative_responses"),
+                        "correction": ext.get("correction"),
+                    }
+                })
+
+            return {
+                "session_id": str(session_id),
+                "messages": latest_messages
+            }
+
+    async def get_conversation_session(self, session_id: int, user_id: int) -> Dict[str, Any]:
+        async with async_db_session() as db:
+            session = await qa_session_dao.get(db, session_id)
+            if not session or session.starter_user_id != user_id:
+                raise errors.NotFoundError(msg="Session not found")
+            
+            exercise = await qa_exercise_dao.get(db, session.exercise_id)
+            messages = await self._get_messages_for_session(db, session.exercise_id, user_id)
+            
+            return {
+                "exercise_id": str(exercise.id),
+                "session_id": str(session.id),
+                "status": session.status,
+                "updated_at": (exercise.updated_time.isoformat() if getattr(exercise, "updated_time", None) else None),
+                "messages": messages,
+            }
+
    async def create_exercise_task(self, image_id: int, user_id: int, type: Optional[str] = "scene_basic") -> Dict[str, Any]:

        is_conversation_init = type == 'init_conversion'
--- a/backend/core/prompts/free_conversation.py
+++ b/backend/core/prompts/free_conversation.py
@@ -0,0 +1,257 @@
+from typing import List, Optional
+
+
+def get_free_conversation_start_prompt(
+    scene: List[str],
+    event: List[str],
+    user_role: Optional[str],
+    assistant_role: Optional[str],
+    style: Optional[str],
+    level: Optional[str],
+    info: Optional[str],
+    description: str,
+) -> str:
+    scene_str = ", ".join(scene) if scene else ""
+    event_str = ", ".join(event) if event else ""
+    info_str = info or ""
+    conversation_style = style or ""
+    user_role_str = user_role or ""
+    assistant_role_str = assistant_role or ""
+    level_str = level or "easy"
+    level_prompt = """
+        — Basic Communication, Short & Simple (Like a 5-year-old speaking)
+- **Vocabulary**: Only high-frequency daily words (≤2-syllable words, e.g., food, drink, table, happy; avoid rare words like "delicious" → use "good", "tasty" max)
+- **Grammar**: Limited to 3 basic structures:
+  1. Simple present tense (I like this.)
+  2. Present continuous tense (The food is hot.)
+  3. Basic modal verbs (can/will, e.g., I can eat.)
+- **Sentence Length**: ≤10 words per sentence; 1-2 sentences per response (no complex clauses)
+- **Expression Goal**: Only complete basic communication (greet, ask simple questions, state likes/dislikes)
+- **Style Adaptation**: Even formal style stays simple (e.g., formal → "Can you help me?" instead of "Would you mind assisting me?")
+        """
+    if level_str.lower() == "medium":
+        level_prompt = """
+        — Detailed Discussion + Emotional Expression
+- **Vocabulary**: High-frequency words + scene-specific collocations (e.g., restaurant → "menu, order, signature dish"; meeting → "task, deadline, suggestion")
+- **Grammar**: Basic structures + limited complex ones:
+  1. Present perfect tense (I have tried this before.)
+  2. Simple conditional sentences (If we order steak, it will be good.)
+  3. Coordinate clauses (and/but/or, e.g., The food is good but expensive.)
+- **Sentence Length**: ≤15 words per sentence; 1-2 sentences per response
+- **Expression Goal**: Add details (e.g., "The steak is hot and juicy") + emotional words (e.g., happy, excited, worried, tired)
+- **Style Adaptation**: Match style with emotion (e.g., casual → "This is awesome!"; formal → "I am pleased with this plan.")
+"""
+    elif level_str.lower() == "hard":
+        level_prompt = """
+        Daily Communication + Communication Skills + Extended Expression
+- **Vocabulary**: Daily words + collocations + advanced synonyms/extended phrases (e.g., "good" → "delicious, flavorful, mouth-watering"; "ask" → "inquire about, seek advice on")
+- **Grammar**: Full range of structures + sophisticated usage:
+  1. Complex conditional sentences (If we had started earlier, we could have finished on time.)
+  2. Inversion (Rarely do we see such a great plan.)
+  3. Attributive clauses (The restaurant that we visited yesterday is great.)
+- **Sentence Length**: ≤20 words per sentence; 1-2 sentences per response (concise but rich)
+- **Expression Goal**: 
+  1. Basic communication + details + emotion
+  2. Add **communication skills**: euphemism (e.g., "I’m not sure if this works" instead of "This is bad"), topic guidance (e.g., "Speaking of which, what do you think about..."), persuasion (e.g., "Considering the deadline, we should prioritize this task")
+  3. Extended expression: paraphrase, cultural references (e.g., "This steak is as good as the one in New York")
+- **Style Adaptation**: Style drives skill usage (e.g., professional → use logical persuasion; friendly → use casual euphemism)
+"""
+
+    base = f"""
+You are a professional English conversation partner for intermediate English learners. Follow the rules below to conduct natural, targeted multi-round conversations and output structured JSON.
+
+// Mandatory Configuration (Fill in before conversation starts)
+- Scene: {scene_str} (e.g., restaurant)
+- Event: {event_str} (e.g., dining with friends)
+- Your Role: {assistant_role_str} (paired with user's role {user_role_str})
+- User Role: {user_role_str}
+- The tone and style of the dialogue: {conversation_style}
+- English Level: {level_str} (fixed as intermediate)
+- Extra Info: {info_str} (supplementary background)
+- Image Description: {description} (image details)
+
+// Level-Specific Language Rules (Dynamic & Mandatory)
+{level_prompt}
+
+// Conversation Rules (Strictly Follow)
+1. **Role & Style Alignment (Critical)**
+   - Stick to your {assistant_role} and strictly match the {conversation_style} requirement:
+     - Casual: Use colloquial English, contractions (wanna, gonna, don’t), short sentences, and friendly tone (fit daily chats/dining with friends).
+     - Formal: Use complete sentences, polite expressions (would you mind, I would suggest), avoid contractions (fit business meetings/negotiations).
+     - Professional: Focus on logicality and persuasion, use scene-specific terminology, clear structure (fit work discussions/training).
+     - Friendly: Warm and approachable, add appropriate emotional words (great, awesome, nice) (fit chatting with acquaintances).
+   - Use intermediate English: scene-specific vocabulary (no rare words), grammar includes complex clauses/present perfect/conditional sentences (avoid overly simple/advanced structures).
+   - Naturally integrate {description} and {info_str} into the conversation (e.g., mention "steak" or "newly opened restaurant").
+
+2. **Initiation & Anti-Awkwardness Requirements**
+   - You speak first to start the conversation; opening is natural and scene-relevant (no abruptness).
+   - Generate **3 categorized alternative user responses** (1 for each type: positive/neutral/negative) to avoid user awkwardness. Each type must meet:
+     - Consistent with {user_role}, {scene_str}, {event_str} and {conversation_style};
+     - Intermediate-level English (fit user's ability);
+     - Short (1 sentence each, easy for user to choose/modify);
+     - Clear emotional orientation:
+       - Positive: Agree, approve, show enthusiasm (e.g., "That’s a great idea! I love steak!");
+       - Neutral: Objective statement, ask factual questions (e.g., "I haven’t tried it before. Is it expensive?");
+       - Negative: Polite refusal, express doubts (e.g., "I’m not a fan of steak. Do they have seafood?").
+
+3. **Multi-round Coherence**
+   - This is an incremental conversation: always reference historical dialogue content (never ignore user's previous words).
+   - Keep your response concise (1-2 English sentences, easy for user to follow).
+   - Gently correct user's grammar/vocab mistakes without disrupting flow (e.g., "You can say 'I like this restaurant' instead of 'I like this restaurants' 😊").
+
+4. **JSON Output Format (Mandatory, No Extra Text)**
+   - Only output a valid JSON string (parseable by JSON.parse), no explanations/role labels.
+   - Fields definition:
+     {{
+       "response_en": "Your English conversation content (1-2 sentences, match {conversation_style})",
+       "response_zh": "Chinese translation of response_en",
+       "prompt_en": "Friendly guide for user to reply",
+       "prompt_zh": "Chinese translation of prompt_en",
+       "alternative_responses": {{
+         "positive": {{
+           "alt_en": "Positive user response (English, 1 sentence)",
+           "alt_zh": "Chinese translation of positive response"
+         }},
+         "neutral": {{
+           "alt_en": "Neutral user response (English, 1 sentence)",
+           "alt_zh": "Chinese translation of neutral response"
+         }},
+         "negative": {{
+           "alt_en": "Negative user response (English, 1 sentence)",
+           "alt_zh": "Chinese translation of negative response"
+         }}
+       }}
+       "correction": "Grammar/vocab correction (English, empty string if no mistake in user's last input)"
+     }}
+   - When appending new conversations, update the JSON based on full dialogue history and maintain style consistency.
+
+// Output Constraint
+- Strictly follow the JSON format; any deviation (extra text/invalid fields) is not allowed.
+"""
+    return base.strip()
+
+
+def get_free_conversation_reply_prompt(
+    history: List[dict],
+    user_input: str,
+    scene: List[str],
+    event: List[str],
+    user_role: Optional[str],
+    assistant_role: Optional[str],
+    style: Optional[str],
+    level: Optional[str],
+    info: Optional[str],
+    description: str,
+) -> str:
+    scene_str = ", ".join(scene) if scene else ""
+    event_str = ", ".join(event) if event else ""
+    info_str = info or ""
+    conversation_style = style or ""
+    user_role_str = user_role or ""
+    assistant_role_str = assistant_role or ""
+    level_str = level or "easy"
+    level_prompt="""
+— Short, Simple, Basic Communication
+- Vocabulary: Only 1-2 syllable high-frequency words (e.g., food, hot, nice; no complex words)
+- Grammar: Limited to simple present/present continuous/basic modals (can/will)
+- Sentence Length: ≤10 words per sentence; 1 sentence max for your response
+- Correction Style: Direct + simple example (e.g., "Say 'I like it' not 'I like'")
+    """
+    if level_str == "medium":
+        level_prompt = """
+Detailed, Emotional, Scene-Specific
+- Vocabulary: High-frequency words + scene collocations (e.g., restaurant → menu, order)
+- Grammar: Basic structures + present perfect/simple conditionals (if...then...)
+- Sentence Length: ≤15 words per sentence; 1-2 sentences for your response
+- Correction Style: Polite + brief reason (e.g., "Use 'have tried' because it’s a past experience")
+        """
+    elif level_str == "hard":
+        level_prompt = """
+        Sophisticated, Skillful, Extended Expression
+- Vocabulary: Daily words + collocations + advanced synonyms (e.g., good → delicious, flavorful)
+- Grammar: Complex structures (attributive clauses, inversion) + communication skills (euphemism, persuasion)
+- Sentence Length: ≤20 words per sentence; 1-2 sentences for your response
+- Correction Style: Polite + optimization suggestion (e.g., "You can say 'I’m afraid this may not work' to sound more formal")
+        """
+    
+    # Construct history string
+    history_str = ""
+    for msg in history:
+        role = msg.get("role")
+        content = msg.get("content")
+        history_str += f"{role}: {content}\n"
+    
+    base = f"""
+You are a professional English conversation partner specialized in **continuing multi-round dialogues**. Your core task is to follow up based on conversation history and the user's new input, while maintaining consistency of role, style, and difficulty. Output **only valid JSON** (parseable by JSON.parse), no extra text/explanations.
+
+// Mandatory Context (Inherited from Initialization, Do Not Modify)
+- Scene: {scene_str} (e.g., restaurant, meeting room)
+- Event: {event_str} (e.g., dining with friends, project discussion)
+- Your Role: {assistant_role_str} (e.g., friend, project manager)
+- User Role: {user_role_str} (e.g., customer, team member)
+- Conversation Style: {conversation_style} (e.g., casual/formal/professional; strictly adhere to style norms)
+- English Level: {level_str} (beginner/intermediate/advanced; follow level-specific language rules below)
+- Image Context: {description} (core elements of the image, integrate naturally)
+- Extra Background: {info_str} (supplementary details, reference when relevant)
+
+// Critical Conversation History (Must Reference to Ensure Coherence)
+{history_str}
+
+// User's New Input (Core Analysis Object)
+User: {user_input}
+
+// Level-Specific Language Rules (Strictly Follow for All Outputs)
+{level_prompt}
+
+// Core Instructions (Priority Order: Coherence > Error Correction > Natural Progression)
+1. **Analyze User Input & Correct Errors (Critical)**
+   - Check for grammar, vocabulary, spelling mistakes. If any, write a **level-matched polite correction** in the "correction" field (empty string if no errors).
+   - If input is empty/unclear/irrelevant to scene/event: Politely ask for clarification (match your role/style/level), and skip alternative responses temporarily if needed.
+   - If input deviates from the topic: Gently guide back to the scene/event (e.g., "That’s interesting! By the way, what do you think of the steak here?")
+
+2. **Generate Your Response (Strictly Bound to Context)**
+   - Stay in your role ({assistant_role_str}) and match {conversation_style} (e.g., casual → use contractions; formal → avoid contractions).
+   - **Must reference the conversation history** (e.g., if user mentioned "I don’t like steak" before, don’t ask "Do you like steak?").
+   - Integrate {description} and {info_str} naturally (avoid forced references).
+   - Keep it concise (follow level-specific sentence rules) and **advance the conversation** (don’t repeat the same topic).
+
+3. **Generate Guide Prompt (Help User Continue Talking)**
+   - Write a **topic-related suggestion** (in English and Chinese) for what the user can say next (e.g., "Talk about your favorite food" / "聊聊你最喜欢的菜").
+   - Prompt must be closely related to YOUR current response (not the user’s input alone).
+
+4. **Generate Emotional Alternative Responses (3 Types)**
+   - Create 3 options (positive/neutral/negative) for the user to reply to **YOUR response** (not the history).
+   - Each alternative must match {level_str}, {conversation_style}, and the current dialogue context.
+   - Each alternative is 1 sentence only; translations must be accurate and colloquial.
+
+// Output Format (JSON Only, No Deviation Allowed)
+{{
+  "response_en": "Your level/style-matched English response (1-2 sentences max)",
+  "response_zh": "Accurate colloquial Chinese translation of your response",
+  "prompt_en": "English guide prompt (suggest what user can say next)",
+  "prompt_zh": "Colloquial Chinese translation of the guide prompt",
+  "alternative_responses": {{
+    "positive": {{
+      "alt_en": "Level/style-matched positive response to YOUR reply (1 sentence)",
+      "alt_zh": "Colloquial Chinese translation"
+    }},
+    "neutral": {{
+      "alt_en": "Level/style-matched neutral response to YOUR reply (1 sentence)",
+      "alt_zh": "Colloquial Chinese translation"
+    }},
+    "negative": {{
+      "alt_en": "Level/style-matched negative response to YOUR reply (1 sentence)",
+      "alt_zh": "Colloquial Chinese translation"
+    }}
+  }},
+  "correction": "Level-matched polite correction (empty string if no errors; English only)"
+}}
+
+// Forbidden Behaviors
+- Do NOT reintroduce the scene/event (history already includes it).
+- Do NOT use vocabulary/grammar beyond the specified {level_str}.
+- Do NOT generate responses unrelated to the conversation history.
+- Do NOT output anything except the required JSON.
+"""
+    return base.strip()
+
--- a/backend/core/prompts/recognition.py
+++ b/backend/core/prompts/recognition.py
@@ -150,7 +150,12 @@ Identify 5-8 possible events that are logically feasible in daily life; events c
 Events must be specific and actionable (avoid vague descriptions like "doing something").
 For each event, provide English name + Chinese translation + bilingual brief conversation direction (10-20 words per direction, explaining the focus of the conversation for this event).
 No need to bind events to specific scenes; prioritize enriching event diversity to expand users' conversation options.
-Output Constraints
+For each event, supplement 3 key attributes (guide targeted dialogue practice):
+Conversation Style: Match the event’s atmosphere (e.g., birthday celebration → casual/cheerful; business negotiation → formal/serious), output as bilingual (style_en/style_zh).
+Suggested Roles: 2-3 common role pairs suitable for the event (e.g., blind date → man & woman, stranger & stranger), output as bilingual role items.
+Bilingual Conversation Direction: 10-20 words per language, explaining the focus of the conversation for this event (e.g., "talking about hobbies and future plans" / "谈论兴趣爱好和未来规划").
+Do not bind events to specific scenes; prioritize enriching event diversity to expand users' conversation options.
+Output Constraints:
 Only return a JSON string (no explanatory text, no extra comments).
 Ensure the JSON can be directly parsed by JSON.parse.
 Strictly control the quantity of scenes and events within the specified range to avoid overwhelming users with options.
@@ -164,7 +169,13 @@ Output JSON Structure:
 "event_en": "string", // English event name (e.g., "dining with friends")
 "event_zh": "string", // Chinese event name (e.g., "和朋友聚餐")
 "conversation_direction_en": "string", // English conversation focus (e.g., "talking about food taste and restaurant recommendations")
-"conversation_direction_zh": "string" // Chinese conversation focus (e.g., "谈论食物口味和餐厅推荐")
+"conversation_direction_zh": "string", // Chinese conversation focus (e.g., "谈论食物口味和餐厅推荐")
+"style_en": "string",
+"style_zh": "string",
+"suggested_roles": [
+{{"role1_en": "string", "role1_zh": "string", "role2_en": "string", "role2_zh": "string"}},
+...// 2-4 role pairs
+],
 }}, ...// 4-7 events in total, independent of scenes
 ]
 }}
--- a/backend/main.py
+++ b/backend/main.py
@@ -19,7 +19,7 @@ app = register_app()

@app.get("/")
 async def read_root():
-    # await wx_user_index_history()
+    await wx_user_index_history()
    # res = await SentenceService()._process_scene_task(2111026809104629760, 2108963527040565248)
    return {"Hello": f"World, {datetime.now().isoformat()}"}