add qa

2025-12-30 20:37:49 +08:00
parent 5e2de52da9
commit 904ab008f6
17 changed files with 1270 additions and 116 deletions
--- a/backend/app/admin/crud/audit_log_crud.py
+++ b/backend/app/admin/crud/audit_log_crud.py
@@ -43,41 +43,44 @@ class CRUDAuditLog(CRUDPlus[AuditLog]):
        today_start = datetime(today.year, today.month, today.day)

        # 使用窗口函数为每个image_id分区内的记录按called_at排序，并选择排名为1的记录
+        # 将关联查询和过滤条件放入子查询中，确保筛选逻辑与count_stmt一致
        ranked_subquery = (
            select(
                AuditLog.id,
-                AuditLog.image_id,
                AuditLog.dict_level,
                AuditLog.called_at,
+                Image.id.label('image_id'),
+                Image.thumbnail_id,
+                Image.file_id,
+                Image.created_time,
                func.row_number().over(
                    partition_by=AuditLog.image_id,
                    order_by=[AuditLog.called_at, AuditLog.id]  # 用id作为tie-breaker
                ).label('rn')
            )
+            .join(Image, AuditLog.image_id == Image.id)
+            .join(ImageProcessingTask, Image.id == ImageProcessingTask.image_id)
            .where(
                AuditLog.user_id == user_id,
                AuditLog.api_type == API_TYPE_RECOGNITION,
-                AuditLog.called_at >= today_start
+                AuditLog.called_at >= today_start,
+                ImageProcessingTask.dict_level == DictLevel.LEVEL1.value,
+                ImageProcessingTask.status == ImageTaskStatus.COMPLETED
            )
            .subquery()
        )

-        # 主查询：关联image表、ranked子查询和image_processing_task表，获取所需字段（仅当天且任务状态为completed）
+        # 主查询：直接从ranked子查询获取数据
        stmt = (
            select(
-                Image.id,
-                Image.thumbnail_id,
-                Image.file_id,
-                Image.created_time,
+                ranked_subquery.c.image_id.label('id'),
+                ranked_subquery.c.thumbnail_id,
+                ranked_subquery.c.file_id,
+                ranked_subquery.c.created_time,
                ranked_subquery.c.dict_level
            )
-            .join(Image, ranked_subquery.c.image_id == Image.id)
-            .join(ImageProcessingTask, Image.id == ImageProcessingTask.image_id)
-            .where(
-                ranked_subquery.c.rn == 1,  # 只选择每个image_id的第一条记录
-                ImageProcessingTask.dict_level == DictLevel.LEVEL1.value,
-                ImageProcessingTask.status == ImageTaskStatus.COMPLETED  # 只选择任务状态为completed的记录
-            )
+            .select_from(ranked_subquery)
+            .where(ranked_subquery.c.rn == 1)
            .order_by(ranked_subquery.c.called_at.desc(), ranked_subquery.c.id.desc())
            .offset((page - 1) * size)
            .limit(size)
--- a/backend/app/ai/api/qa.py
+++ b/backend/app/ai/api/qa.py
@@ -0,0 +1,62 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+from fastapi import APIRouter, Request, Query
+from backend.app.ai.schema.qa import CreateQaExerciseRequest, CreateQaExerciseTaskResponse, QaExerciseSchema, QaExerciseWithQuestionsSchema, QaQuestionSchema, QaSessionSchema, CreateAttemptRequest, TaskStatusResponse, QuestionLatestResultResponse
+from backend.common.response.response_schema import response_base, ResponseSchemaModel
+from backend.common.security.jwt import DependsJwtAuth
+from backend.app.ai.service.qa_service import qa_service
+
+router = APIRouter()
+
+
+@router.post('/exercises/tasks', summary='创建练习任务', dependencies=[DependsJwtAuth])
+async def create_exercise_task(request: Request, obj: CreateQaExerciseRequest) -> ResponseSchemaModel[CreateQaExerciseTaskResponse]:
+    res = await qa_service.create_exercise_task(image_id=obj.image_id, user_id=request.user.id, title=obj.title, description=obj.description)
+    return response_base.success(data=CreateQaExerciseTaskResponse(**res))
+
+
+@router.get('/exercises/tasks/{task_id}/status', summary='查询练习任务状态', dependencies=[DependsJwtAuth])
+async def get_exercise_task_status(task_id: int) -> ResponseSchemaModel[TaskStatusResponse]:
+    res = await qa_service.get_task_status(task_id)
+    return response_base.success(data=TaskStatusResponse(**res))
+
+
+@router.get('/{image_id}/exercises', summary='根据图片获取练习', dependencies=[DependsJwtAuth])
+async def list_exercises(request: Request, image_id: int) -> ResponseSchemaModel[QaExerciseWithQuestionsSchema | None]:
+    item = await qa_service.list_exercises_by_image(image_id, user_id=request.user.id)
+    data = None if not item else QaExerciseWithQuestionsSchema(**item)
+    return response_base.success(data=data)
+
+
+@router.post('/questions/{question_id}/attempts', summary='提交题目练习', dependencies=[DependsJwtAuth])
+async def submit_attempt(request: Request, question_id: int, obj: CreateAttemptRequest) -> ResponseSchemaModel[QuestionLatestResultResponse]:
+    res = await qa_service.submit_attempt(
+        question_id=question_id,
+        exercise_id=obj.exercise_id,
+        user_id=request.user.id,
+        mode=obj.mode,
+        selected_options=obj.selected_options,
+        input_text=obj.input_text,
+        cloze_options=obj.cloze_options,
+        file_id=obj.file_id,
+        session_id=obj.session_id,
+    )
+    return response_base.success(data=QuestionLatestResultResponse(**res))
+
+
+@router.get('/question-tasks/{task_id}/status', summary='获取题目任务状态', dependencies=[DependsJwtAuth])
+async def get_question_task_status(task_id: int) -> ResponseSchemaModel[TaskStatusResponse]:
+    res = await qa_service.get_attempt_task_status(task_id)
+    return response_base.success(data=TaskStatusResponse(**res))
+
+
+@router.get('/questions/{question_id}/result', summary='获取题目最新结果', dependencies=[DependsJwtAuth])
+async def get_question_latest_result(request: Request, question_id: int) -> ResponseSchemaModel[QuestionLatestResultResponse]:
+    res = await qa_service.get_question_evaluation(question_id, user_id=request.user.id)
+    return response_base.success(data=QuestionLatestResultResponse(**res))
+
+@router.get('/questions/{question_id}/audio', summary='获取题目标准音频', dependencies=[DependsJwtAuth])
+async def get_question_audio(request: Request, question_id: int) -> ResponseSchemaModel[dict]:
+    from backend.app.ai.service.recording_service import RecordingService
+    file_id = await RecordingService.get_question_audio_file_id(question_id=question_id, user_id=request.user.id)
+    return response_base.success(data={'file_id': str(file_id)})
--- a/backend/app/ai/api/router.py
+++ b/backend/app/ai/api/router.py
@@ -6,6 +6,7 @@ from backend.app.ai.api.image import router as image_router
 from backend.app.ai.api.recording import router as recording_router
 from backend.app.ai.api.image_text import router as image_text_router
 from backend.app.ai.api.scene import router as scene_router
+from backend.app.ai.api.qa import router as qa_router
 from backend.core.conf import settings

 v1 = APIRouter(prefix=settings.FASTAPI_API_V1_PATH)
@@ -14,3 +15,4 @@ v1.include_router(image_router, prefix='/image', tags=['AI图片服务'])
 v1.include_router(recording_router, prefix='/recording', tags=['AI录音服务'])
 v1.include_router(image_text_router, prefix='/image_text', tags=['AI图片文本服务'])
 v1.include_router(scene_router, prefix='/scene', tags=['AI场景服务'])
+v1.include_router(qa_router, prefix='/qa', tags=['AI问答服务'])
--- a/backend/app/ai/crud/qa_crud.py
+++ b/backend/app/ai/crud/qa_crud.py
@@ -0,0 +1,117 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+from typing import Optional, List
+from sqlalchemy import select, and_
+from sqlalchemy.ext.asyncio import AsyncSession
+from sqlalchemy_crud_plus import CRUDPlus
+from backend.app.ai.model.qa import QaExercise, QaQuestion, QaQuestionAttempt, QaPracticeSession
+
+
+class QaExerciseCRUD(CRUDPlus[QaExercise]):
+    async def get(self, db: AsyncSession, id: int) -> Optional[QaExercise]:
+        return await self.select_model(db, id)
+
+    async def create(self, db: AsyncSession, obj_in: dict) -> QaExercise:
+        inst = QaExercise(**obj_in)
+        db.add(inst)
+        await db.flush()
+        return inst
+
+    async def get_by_image_id(self, db: AsyncSession, image_id: int) -> List[QaExercise]:
+        stmt = select(self.model).where(self.model.image_id == image_id)
+        result = await db.execute(stmt)
+        return list(result.scalars().all())
+
+    async def get_latest_by_image_id(self, db: AsyncSession, image_id: int) -> Optional[QaExercise]:
+        stmt = (
+            select(self.model)
+            .where(self.model.image_id == image_id)
+            .order_by(self.model.created_time.desc(), self.model.id.desc())
+            .limit(1)
+        )
+        result = await db.execute(stmt)
+        return result.scalars().first()
+
+class QaQuestionCRUD(CRUDPlus[QaQuestion]):
+    async def get(self, db: AsyncSession, id: int) -> Optional[QaQuestion]:
+        return await self.select_model(db, id)
+
+    async def create(self, db: AsyncSession, obj_in: dict) -> QaQuestion:
+        inst = QaQuestion(**obj_in)
+        db.add(inst)
+        await db.flush()
+        return inst
+
+    async def get_by_exercise_id(self, db: AsyncSession, exercise_id: int) -> List[QaQuestion]:
+        stmt = select(self.model).where(self.model.exercise_id == exercise_id)
+        result = await db.execute(stmt)
+        return list(result.scalars().all())
+
+
+class QaQuestionAttemptCRUD(CRUDPlus[QaQuestionAttempt]):
+    async def get(self, db: AsyncSession, id: int) -> Optional[QaQuestionAttempt]:
+        return await self.select_model(db, id)
+
+    async def create(self, db: AsyncSession, obj_in: dict) -> QaQuestionAttempt:
+        inst = QaQuestionAttempt(**obj_in)
+        db.add(inst)
+        await db.flush()
+        return inst
+
+    async def update_status(self, db: AsyncSession, id: int, status: str, evaluation: Optional[dict] = None) -> int:
+        obj = await db.get(QaQuestionAttempt, id)
+        if obj:
+            obj.status = status
+            if evaluation is not None:
+                obj.evaluation = evaluation
+            await db.flush()
+            return 1
+        return 0
+
+    async def get_latest_by_user_question(self, db: AsyncSession, user_id: int, question_id: int) -> Optional[QaQuestionAttempt]:
+        stmt = (
+            select(QaQuestionAttempt)
+            .where(and_(QaQuestionAttempt.user_id == user_id, QaQuestionAttempt.question_id == question_id))
+            .order_by(QaQuestionAttempt.id.desc())
+            .limit(1)
+        )
+        result = await db.execute(stmt)
+        return result.scalars().first()
+
+    async def get_latest_completed_by_user_question(self, db: AsyncSession, user_id: int, question_id: int) -> Optional[QaQuestionAttempt]:
+        stmt = (
+            select(QaQuestionAttempt)
+            .where(
+                and_(
+                    QaQuestionAttempt.user_id == user_id,
+                    QaQuestionAttempt.question_id == question_id,
+                    QaQuestionAttempt.status == 'completed',
+                )
+            )
+            .order_by(QaQuestionAttempt.id.desc())
+            .limit(1)
+        )
+        result = await db.execute(stmt)
+        return result.scalars().first()
+
+
+qa_exercise_dao = QaExerciseCRUD(QaExercise)
+qa_question_dao = QaQuestionCRUD(QaQuestion)
+class QaPracticeSessionCRUD(CRUDPlus[QaPracticeSession]):
+    async def get(self, db: AsyncSession, id: int) -> Optional[QaPracticeSession]:
+        return await self.select_model(db, id)
+
+    async def create(self, db: AsyncSession, obj_in: dict) -> QaPracticeSession:
+        inst = QaPracticeSession(**obj_in)
+        db.add(inst)
+        await db.flush()
+        return inst
+
+    async def get_latest_by_user_exercise(self, db: AsyncSession, user_id: int, exercise_id: int) -> Optional[QaPracticeSession]:
+        stmt = select(QaPracticeSession).where(and_(QaPracticeSession.starter_user_id == user_id, QaPracticeSession.exercise_id == exercise_id))
+        result = await db.execute(stmt)
+        return result.scalars().first()
+
+
+qa_session_dao = QaPracticeSessionCRUD(QaPracticeSession)
+qa_attempt_dao = QaQuestionAttemptCRUD(QaQuestionAttempt)
--- a/backend/app/ai/crud/recording_crud.py
+++ b/backend/app/ai/crud/recording_crud.py
@@ -77,5 +77,17 @@ class RecordingCRUD(CRUDPlus[Recording]):
        result = await db.execute(stmt)
        return result.scalar_one_or_none()

+    async def get_standard_by_ref(self, db: AsyncSession, ref_type: str, ref_id: int) -> Optional[Recording]:
+        """根据通用引用获取标准音频记录"""
+        stmt = select(self.model).where(
+            and_(
+                self.model.ref_type == ref_type,
+                self.model.ref_id == ref_id,
+                self.model.is_standard == True
+            )
+        ).limit(1)
+        result = await db.execute(stmt)
+        return result.scalar_one_or_none()
+

 recording_dao: RecordingCRUD = RecordingCRUD(Recording)
--- a/backend/app/ai/model/init.py
+++ b/backend/app/ai/model/init.py
@@ -5,3 +5,4 @@ from backend.app.ai.model.image_task import ImageProcessingTask
 from backend.app.ai.model.recording import Recording
 from backend.app.ai.model.sentence_card import SentenceCard
 from backend.app.ai.model.scene_sentence import SceneSentence, SceneSentenceItem
+from backend.app.ai.model.qa import QaExercise, QaQuestion, QaQuestionAttempt, QaPracticeSession
--- a/backend/app/ai/model/qa.py
+++ b/backend/app/ai/model/qa.py
@@ -0,0 +1,83 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+from typing import Optional
+from sqlalchemy import BigInteger, Text, String, Integer, DateTime, ForeignKey, Index, Float
+from sqlalchemy.dialects.mysql import JSON as MySQLJSON
+from sqlalchemy.orm import mapped_column, Mapped
+from backend.common.model import snowflake_id_key, Base
+
+
+class QaExercise(Base):
+    __tablename__ = 'qa_exercise'
+
+    id: Mapped[snowflake_id_key] = mapped_column(BigInteger, init=False, primary_key=True)
+    image_id: Mapped[int] = mapped_column(BigInteger, ForeignKey('image.id'), nullable=False)
+    created_by: Mapped[int] = mapped_column(BigInteger, ForeignKey('wx_user.id'), nullable=False)
+    title: Mapped[Optional[str]] = mapped_column(String(100), default=None)
+    description: Mapped[Optional[str]] = mapped_column(Text, default=None)
+    status: Mapped[str] = mapped_column(String(20), default='draft')
+    question_count: Mapped[int] = mapped_column(Integer, default=0)
+    ext: Mapped[Optional[dict]] = mapped_column(MySQLJSON, default=None)
+
+    __table_args__ = (
+        Index('idx_qa_exercise_image_status', 'image_id', 'status'),
+        Index('idx_qa_exercise_creator', 'created_by'),
+    )
+
+
+class QaQuestion(Base):
+    __tablename__ = 'qa_question'
+
+    id: Mapped[snowflake_id_key] = mapped_column(BigInteger, init=False, primary_key=True)
+    user_id: Mapped[int] = mapped_column(BigInteger, ForeignKey('wx_user.id'), nullable=False)
+    exercise_id: Mapped[int] = mapped_column(BigInteger, ForeignKey('qa_exercise.id'), nullable=False)
+    image_id: Mapped[int] = mapped_column(BigInteger, ForeignKey('image.id'), nullable=False)
+    question: Mapped[str] = mapped_column(Text, nullable=False)
+    payload: Mapped[Optional[dict]] = mapped_column(MySQLJSON, default=None)
+    ext: Mapped[Optional[dict]] = mapped_column(MySQLJSON, default=None)
+
+    __table_args__ = (
+        Index('idx_qa_question_exercise', 'exercise_id'),
+        Index('idx_qa_question_image', 'image_id'),
+    )
+
+class QaPracticeSession(Base):
+    __tablename__ = 'qa_practice_session'
+
+    id: Mapped[snowflake_id_key] = mapped_column(BigInteger, init=False, primary_key=True)
+    exercise_id: Mapped[int] = mapped_column(BigInteger, ForeignKey('qa_exercise.id'), nullable=False)
+    starter_user_id: Mapped[int] = mapped_column(BigInteger, ForeignKey('wx_user.id'), nullable=False)
+    share_id: Mapped[Optional[int]] = mapped_column(BigInteger, nullable=True)
+    status: Mapped[str] = mapped_column(String(20), default='ongoing')
+    started_at: Mapped[Optional[DateTime]] = mapped_column(DateTime, default=None)
+    completed_at: Mapped[Optional[DateTime]] = mapped_column(DateTime, default=None)
+    progress: Mapped[Optional[dict]] = mapped_column(MySQLJSON, default=None)
+    score: Mapped[Optional[Float]] = mapped_column(Float, default=None)
+    ext: Mapped[Optional[dict]] = mapped_column(MySQLJSON, default=None)
+
+    __table_args__ = (
+        Index('idx_qa_session_user_exercise', 'starter_user_id', 'exercise_id', 'status'),
+    )
+
+
+class QaQuestionAttempt(Base):
+    __tablename__ = 'qa_question_attempt'
+
+    id: Mapped[snowflake_id_key] = mapped_column(BigInteger, init=False, primary_key=True)
+    question_id: Mapped[int] = mapped_column(BigInteger, ForeignKey('qa_question.id'), nullable=False)
+    exercise_id: Mapped[int] = mapped_column(BigInteger, ForeignKey('qa_exercise.id'), nullable=False)
+    user_id: Mapped[int] = mapped_column(BigInteger, ForeignKey('wx_user.id'), nullable=False)
+    task_id: Mapped[Optional[int]] = mapped_column(BigInteger, ForeignKey('image_processing_task.id'), nullable=True)
+    recording_id: Mapped[Optional[int]] = mapped_column(BigInteger, ForeignKey('recording.id'), nullable=True)
+    choice_options: Mapped[Optional[list]] = mapped_column(MySQLJSON, default=None)
+    cloze_options: Mapped[Optional[str]] = mapped_column(String(100), default=None)
+    input_text: Mapped[Optional[str]] = mapped_column(Text, default=None)
+    status: Mapped[str] = mapped_column(String(20), default='pending')
+    evaluation: Mapped[Optional[dict]] = mapped_column(MySQLJSON, default=None)
+    ext: Mapped[Optional[dict]] = mapped_column(MySQLJSON, default=None)
+
+    __table_args__ = (
+        Index('idx_qa_attempt_session_question', 'exercise_id', 'question_id', 'status'),
+        Index('idx_qa_attempt_user', 'user_id'),
+        Index('idx_qa_attempt_task', 'task_id'),
+    )
--- a/backend/app/ai/schema/qa.py
+++ b/backend/app/ai/schema/qa.py
@@ -0,0 +1,132 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+from typing import Optional, List, Dict, Any
+from pydantic import BaseModel, Field
+from backend.common.schema import SchemaBase
+
+
+class CreateQaExerciseRequest(SchemaBase):
+    image_id: int
+    title: Optional[str] = None
+    description: Optional[str] = None
+
+
+class CreateQaExerciseTaskResponse(SchemaBase):
+    task_id: str
+    status: str
+
+
+class QaExerciseSchema(SchemaBase):
+    id: str
+    image_id: str
+    title: Optional[str] = None
+    description: Optional[str] = None
+    status: str
+    question_count: int
+
+
+class QaQuestionSchema(SchemaBase):
+    id: str
+    exercise_id: str
+    image_id: str
+    question: str
+    ext: Optional[Dict[str, Any]] = None
+
+class QaSessionSchema(SchemaBase):
+    started_at: Optional[str] = None
+    progress: Optional[Dict[str, Any]] = None
+
+class CreateAttemptRequest(SchemaBase):
+    exercise_id: int
+    mode: str
+    selected_options: Optional[List[str]] = None
+    input_text: Optional[str] = None
+    cloze_options: Optional[List[str]] = None
+    file_id: Optional[int] = None
+    session_id: Optional[int] = None
+
+
+class CreateAttemptTaskResponse(SchemaBase):
+    attempt_id: str
+    task_id: Optional[str] = None
+    status: str
+    is_correct: Optional[str] = None
+    evaluation: Optional['EvaluationSchema'] = None
+
+
+class TaskStatusResponse(SchemaBase):
+    task_id: str
+    image_id: str
+    ref_type: str
+    ref_id: str
+    status: str
+    error_message: Optional[str] = None
+
+
+class AttemptResultResponse(SchemaBase):
+    attempt_id: str
+    status: str
+    is_correct: Optional[str] = None
+    evaluation: Optional['EvaluationSchema'] = None
+    mode: str
+    selected_options: Optional[List[str]] = None
+    input_text: Optional[str] = None
+    recording_id: Optional[str] = None
+    stt_text: Optional[str] = None
+    updated_time: Optional[str] = None
+
+
+class QaExerciseWithQuestionsSchema(SchemaBase):
+    exercise: QaExerciseSchema
+    questions: List[QaQuestionSchema]
+    session: Optional[QaSessionSchema] = None
+
+class QuestionEvaluationResponse(SchemaBase):
+    evaluation: Optional['EvaluationSchema'] = None
+
+class ChoiceNode(SchemaBase):
+    options: List[str] = []
+    evaluation: 'EvaluationSchema'
+
+class ClozeNode(SchemaBase):
+    input: str
+    evaluation: 'EvaluationSchema'
+
+class FreeTextNode(SchemaBase):
+    text: str
+    evaluation: 'EvaluationSchema'
+
+class AudioNode(SchemaBase):
+    recording_id: Optional[str] = None
+    stt_text: Optional[str] = None
+    evaluation: 'EvaluationSchema'
+
+class QuestionLatestResultResponse(SchemaBase):
+    session_id: Optional[str] = None
+    type: Optional[str] = None
+    choice: Optional[ChoiceNode] = None
+    cloze: Optional[ClozeNode] = None
+    free_text: Optional[FreeTextNode] = None
+    audio: Optional[AudioNode] = None
+class IncorrectSelectionItem(SchemaBase):
+    content: str
+    error_type: Optional[str] = None
+    error_reason: Optional[str] = None
+
+class SelectedDetail(SchemaBase):
+    correct: List[str] = []
+    incorrect: List[IncorrectSelectionItem] = []
+
+class EvaluationSchema(SchemaBase):
+    type: Optional[str] = None
+    result: Optional[str] = None
+    detail: Optional[str] = None
+    selected: Optional[SelectedDetail] = None
+    missing_correct: Optional[List[str]] = None
+    feedback: Optional[str] = None
+
+# Pydantic forward references resolution
+CreateAttemptTaskResponse.model_rebuild()
+AttemptResultResponse.model_rebuild()
+QuestionEvaluationResponse.model_rebuild()
+QuestionLatestResultResponse.model_rebuild()
--- a/backend/app/ai/service/image_service.py
+++ b/backend/app/ai/service/image_service.py
@@ -355,13 +355,13 @@ class ImageService:
                    await db.commit()
                    task_processing_success = True
                    # step 6 create scene task
-                    try:
-                        from backend.app.ai.service.sentence_service import SentenceService
-                        if task:
-                            logger.info(f"Create scene sentence task for image {task.image_id}")
-                            await SentenceService.create_scene_task(task.image_id, task.user_id, 'scene_sentence')
-                    except Exception as scene_err:
-                        logger.error(f"Failed to create scene sentence task for task {task_id}: {str(scene_err)}")
+                    # try:
+                    #     from backend.app.ai.service.sentence_service import SentenceService
+                    #     if task:
+                    #         logger.info(f"Create scene sentence task for image {task.image_id}")
+                    #         await SentenceService.create_scene_task(task.image_id, task.user_id, 'scene_sentence')
+                    # except Exception as scene_err:
+                    #     logger.error(f"Failed to create scene sentence task for task {task_id}: {str(scene_err)}")
                    
                except Exception as e:
                    await db.rollback()
--- a/backend/app/ai/service/image_task_service.py
+++ b/backend/app/ai/service/image_task_service.py
@@ -0,0 +1,120 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+import math
+from abc import ABC, abstractmethod
+from typing import Dict, Any, Tuple, Optional
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from backend.app.ai.model.image_task import ImageTaskStatus, ImageProcessingTask
+from backend.app.ai.crud.image_task_crud import image_task_dao
+from backend.app.admin.service.points_service import points_service
+from backend.app.ai.service.rate_limit_service import rate_limit_service
+from backend.database.db import background_db_session
+from backend.common.const import LLM_CHAT_COST
+from backend.common.log import log as logger
+from backend.app.ai.tasks import update_task_status_with_retry
+
+
+class TaskProcessor(ABC):
+    @abstractmethod
+    async def process(self, db: AsyncSession, task: ImageProcessingTask) -> Tuple[Dict[str, Any], Dict[str, Any]]:
+        """
+        Execute the specific business logic for the task.
+        
+        Args:
+            db: Database session
+            task: The task object
+            
+        Returns:
+            Tuple containing:
+            - result: The result data to be stored in the task
+            - token_usage: Token usage information for points deduction
+        """
+        pass
+
+
+class ImageTaskService:
+    async def process_task(self, task_id: int, user_id: int, processor: TaskProcessor):
+        """
+        Generic method to process an image task with standard lifecycle management:
+        1. Status update (PROCESSING)
+        2. Business logic execution (via processor)
+        3. Points deduction
+        4. Status update (COMPLETED/FAILED)
+        5. Task slot release
+        """
+        try:
+            async with background_db_session() as db:
+                task = await image_task_dao.get(db, task_id)
+                if not task:
+                    logger.warning(f"Task {task_id} not found during processing")
+                    return
+
+                await image_task_dao.update_task_status(db, task_id, ImageTaskStatus.PROCESSING)
+                
+                # Execute specific business logic
+                # Processor should return the final result dict and token usage info
+                result, token_usage = await processor.process(db, task)
+                
+                # Calculate and deduct points
+                total_tokens = 0
+                if isinstance(token_usage, dict):
+                    # Check if token_usage is nested (legacy structure) or direct
+                    if "total_tokens" in token_usage:
+                         total_tokens = int(token_usage.get("total_tokens") or 0)
+                    else:
+                         total_tokens = int((token_usage.get("token_usage") or {}).get("total_tokens") or 0)
+                
+                deduct_amount = LLM_CHAT_COST
+                if total_tokens > 0:
+                    units = math.ceil(max(total_tokens, 1) / 1000)
+                    deduct_amount = units * LLM_CHAT_COST
+                
+                # Use ref_id as the related_id for points record
+                points_deducted = await points_service.deduct_points_with_db(
+                    user_id=task.user_id,
+                    amount=deduct_amount,
+                    db=db,
+                    related_id=task.ref_id,
+                    details={
+                        "task_id": task_id, 
+                        "ref_type": task.ref_type, 
+                        "token_usage": total_tokens
+                    },
+                    action=task.ref_type
+                )
+                
+                if not points_deducted:
+                    raise Exception("Failed to deduct points")
+
+                # If result doesn't have token_usage, we might want to add it, 
+                # but let's assume processor handles result structure.
+                # Actually, some existing logic adds token_usage to result.
+                if isinstance(result, dict) and 'token_usage' not in result:
+                    result['token_usage'] = token_usage
+
+                await update_task_status_with_retry(
+                    db, task_id, ImageTaskStatus.COMPLETED, 
+                    result=result
+                )
+                await db.commit()
+
+        except Exception as e:
+            logger.error(f"Error processing task {task_id}: {str(e)}")
+            try:
+                async with background_db_session() as db:
+                    await update_task_status_with_retry(
+                        db, task_id, ImageTaskStatus.FAILED, 
+                        error_message=str(e)
+                    )
+                    await db.commit()
+            except Exception:
+                pass
+        finally:
+            try:
+                await rate_limit_service.release_task_slot(user_id)
+            except Exception:
+                pass
+
+
+image_task_service = ImageTaskService()
--- a/backend/app/ai/service/qa_service.py
+++ b/backend/app/ai/service/qa_service.py
@@ -0,0 +1,587 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+import asyncio
+import json
+import math
+from typing import Optional, List, Dict, Any, Tuple
+from datetime import datetime
+from sqlalchemy.ext.asyncio import AsyncSession
+from backend.database.db import async_db_session, background_db_session
+from backend.app.ai.crud.qa_crud import qa_exercise_dao, qa_question_dao, qa_attempt_dao, qa_session_dao
+from backend.app.ai.crud.image_task_crud import image_task_dao
+from backend.app.ai.crud.image_curd import image_dao
+from backend.app.ai.model.image_task import ImageTaskStatus
+from backend.app.ai.schema.image_task import CreateImageTaskParam
+from backend.app.admin.service.points_service import points_service
+from backend.app.ai.service.rate_limit_service import rate_limit_service
+from backend.common.exception import errors
+from backend.middleware.qwen import Qwen
+from backend.middleware.tencent_hunyuan import Hunyuan
+from backend.core.conf import settings
+from backend.app.ai.service.recording_service import recording_service
+from backend.common.const import EXERCISE_TYPE_CHOICE, EXERCISE_TYPE_CLOZE, EXERCISE_TYPE_FREE_TEXT, LLM_CHAT_COST
+from backend.app.admin.schema.wx import DictLevel
+from backend.app.ai.service.image_task_service import TaskProcessor, image_task_service
+from backend.app.ai.model.image_task import ImageProcessingTask
+
+class QaExerciseProcessor(TaskProcessor):
+    async def process(self, db: AsyncSession, task: ImageProcessingTask) -> Tuple[Dict[str, Any], Dict[str, Any]]:
+        image = await image_dao.get(db, task.image_id)
+        exercise = await qa_exercise_dao.get(db, task.ref_id)
+        payload = {}
+        rr = (image.details or {}).get('recognition_result') or {}
+        description = ''
+        try:
+            d = rr.get('description')
+            if isinstance(d, str):
+                description = d
+            elif isinstance(d, list) and d:
+                description = d[0] if isinstance(d[0], str) else ''
+        except Exception:
+            description = ''
+        payload = {'description': description}
+        prompt = (
+            '### 任务目标\n'
+            '请基于给定的图片英语描述，生成【3-4个细节类半开放问题】，返回包含**问题、多版本回答、正确/错误选项、填词模式**的结构化JSON数据，用于英语口语练习程序自动化调用。\n'
+            '### 图片描述\n'
+            + json.dumps(payload, ensure_ascii=False) + '\n'
+            '### 生成要求\n'
+            '1. 问题规则：细节类半开放特殊疑问句，覆盖至少2个维度（主体特征/动作行为/场景环境）, 每个问题的维度不能重复，题干和选项都是英文；\n'
+            '2. JSON数据规则：\n'
+            '   - 根节点：`qa_list`（数组，3-4个问答对象）；\n'
+            '   - 每个问答对象字段：\n'
+            '     1. `question`：问题内容；\n'
+            '     2. `dimension`：考察维度；\n'
+            '     3. `key_pronunciation_words`：核心发音单词（2-3个）；\n'
+            '     4. `answers`：多版本回答（spoken/written/friendly/lively）；\n'
+            '     5. `correct_options`：正确选项数组（含`content`/`type`字段）；\n'
+            '     6. `incorrect_options`：错误选项数组（含`content`/`error_type`/`error_reason`字段）；\n'
+            '     7. `cloze`：填词模式专项字段：\n'
+            '        - `sentence_with_blank`：含 ___ 的填空句；\n'
+            '        - `correct_word`：填空处原词，一个正确选项；\n'
+            '        - `distractor_words`：近义词干扰项数组（3-4个，无语法类干扰）。\n'
+            '3. 输出限制：仅返回JSON字符串，无其他解释文字，确保可被`JSON.parse`直接解析。\n'
+            '输入图片描述：' + json.dumps(payload, ensure_ascii=False) + '\n'
+            '### 输出JSON格式\n'
+            '{ "qa_list": [ { "question": "", "dimension": "", "key_pronunciation_words": [], "answers": { "spoken": "", "written": "", "friendly": "", "lively": "" }, "correct_options": [ { "content": "", "type": "core" } ], "incorrect_options": [ { "content": "", "error_type": "词汇混淆", "error_reason": "" } ], "cloze": { "sentence_with_blank": "", "correct_word": "", "distractor_words": [] } } ] }'
+        )
+        res = await self._call_llm_chat(prompt=prompt, image_id=image.id, user_id=task.user_id, chat_type='qa_exercise')
+        if not res.get('success'):
+            raise Exception(res.get('error') or "LLM call failed")
+            
+        token_usage = res.get('token_usage') or {}
+        items = []
+        try:
+            parsed = json.loads(res.get('result')) if isinstance(res.get('result'), str) else res.get('result')
+            if isinstance(parsed, dict):
+                items = parsed.get('qa_list') or []
+            elif isinstance(parsed, list):
+                items = parsed
+        except Exception:
+            items = []
+            
+        created = 0
+        for it in items:
+            q = await qa_question_dao.create(db, {
+                'exercise_id': exercise.id,
+                'image_id': image.id,
+                'question': it.get('question') or '',
+                'payload': None,
+                'user_id': task.user_id,
+                'ext': {
+                    'dimension': it.get('dimension'),
+                    'key_pronunciation_words': it.get('key_pronunciation_words'),
+                    'answers': it.get('answers'),
+                    'cloze': it.get('cloze'),
+                    'correct_options': it.get('correct_options'),
+                    'incorrect_options': it.get('incorrect_options'),
+                },
+            })
+            created += 1
+            
+        exercise.question_count = created
+        exercise.status = 'published' if created > 0 else 'draft'
+        await db.flush()
+        
+        if created > 0:
+            existing_session = await qa_session_dao.get_latest_by_user_exercise(db, task.user_id, exercise.id)
+            if not existing_session:
+                prog = {'current_index': 0, 'answered': 0, 'correct': 0, 'attempts': [], 'total_questions': created}
+                await qa_session_dao.create(db, {
+                    'exercise_id': exercise.id,
+                    'starter_user_id': task.user_id,
+                    'share_id': None,
+                    'status': 'ongoing',
+                    'started_at': datetime.now(),
+                    'completed_at': None,
+                    'progress': prog,
+                    'score': None,
+                    'ext': None,
+                })
+                await db.flush()
+
+        # Return result and token_usage. 
+        # Note: image_task_service handles points deduction and final status update.
+        result = {'token_usage': token_usage, 'count': created}
+        return result, token_usage
+
+    async def _call_llm_chat(self, prompt: str, image_id: int, user_id: int, chat_type: str) -> Dict[str, Any]:
+        model_type = (settings.LLM_MODEL_TYPE or "").lower()
+        messages = [{"role": "system", "content": "You are a helpful assistant."}, {'role': 'user', 'content': prompt}]
+        if model_type == 'qwen':
+            try:
+                qres = await Qwen.chat(messages=[{'role': 'user', 'content': prompt}], image_id=image_id, user_id=user_id, api_type=chat_type)
+                if qres and qres.get('success'):
+                    return {"success": True, "result": qres.get("result"), "token_usage": qres.get("token_usage") or {}}
+            except Exception as e:
+                return {"success": False, "error": str(e)}
+            return {"success": False, "error": "LLM call failed"}
+        else:
+            try:
+                res = await Hunyuan.chat(messages=messages, image_id=image_id, user_id=user_id, system_prompt=None, chat_type=chat_type)
+                if res and res.get('success'):
+                    return res
+            except Exception as e:
+                return {"success": False, "error": str(e)}
+            return {"success": False, "error": "LLM call failed"}
+
+class QaService:
+    async def create_exercise_task(self, image_id: int, user_id: int, title: Optional[str] = None, description: Optional[str] = None) -> Dict[str, Any]:
+
+        if not await points_service.check_sufficient_points(user_id, LLM_CHAT_COST):
+            raise errors.ForbiddenError(msg='积分不足，请获取积分后继续使用')
+        slot_acquired = await rate_limit_service.acquire_task_slot(user_id)
+        if not slot_acquired:
+            max_tasks = await rate_limit_service.get_user_task_limit(user_id)
+            raise errors.ForbiddenError(msg=f'用户同时最多只能运行 {max_tasks} 个任务，请等待现有任务完成后再试')
+
+        async with async_db_session.begin() as db:
+            image = await image_dao.get(db, image_id)
+            if not image:
+                raise errors.NotFoundError(msg='Image not found')
+            exercise = await qa_exercise_dao.create(db, {
+                'image_id': image_id,
+                'created_by': user_id,
+                'title': title,
+                'description': description,
+                'status': 'draft',
+            })
+            await db.flush()
+            task = await image_task_dao.create_task(db, CreateImageTaskParam(
+                image_id=image_id,
+                user_id=user_id,
+                dict_level=(getattr(getattr(image, 'dict_level', None), 'name', None) or 'LEVEL1'),
+                ref_type='qa_exercise',
+                ref_id=exercise.id,
+                status=ImageTaskStatus.PENDING,
+            ))
+            await db.flush()
+            task_id = task.id
+            await db.commit()
+        processor = QaExerciseProcessor()
+        asyncio.create_task(image_task_service.process_task(task_id, user_id, processor))
+        return {'task_id': str(task_id), 'status': 'accepted'}
+
+    async def get_task_status(self, task_id: int) -> Dict[str, Any]:
+        async with async_db_session() as db:
+            task = await image_task_dao.get(db, task_id)
+            if not task:
+                raise errors.NotFoundError(msg='Task not found')
+            return {
+                'task_id': str(task.id),
+                'image_id': str(task.image_id),
+                'ref_type': task.ref_type,
+                'ref_id': str(task.ref_id),
+                'status': task.status,
+                'error_message': task.error_message,
+            }
+
+    async def list_exercises_by_image(self, image_id: int, user_id: Optional[int] = None) -> Optional[Dict[str, Any]]:
+        async with async_db_session() as db:
+            image = await image_dao.get(db, image_id)
+            if not image:
+                return None
+            i = await qa_exercise_dao.get_latest_by_image_id(db, image_id)
+            if not i:
+                return None
+            qs = await qa_question_dao.get_by_exercise_id(db, i.id)
+            session = None
+            if user_id:
+                s = await qa_session_dao.get_latest_by_user_exercise(db, user_id, i.id)
+                if s:
+                    session = {
+                        'started_at': s.started_at.isoformat() if s.started_at else None,
+                        'progress': s.progress,
+                    }
+            ret = {
+                'exercise': {
+                    'id': str(i.id),
+                    'image_id': str(i.image_id),
+                    'title': i.title,
+                    'description': i.description,
+                    'status': i.status,
+                    'question_count': i.question_count,
+                },
+                'session': session,
+                'questions': [
+                    {
+                        'id': str(q.id),
+                        'exercise_id': str(q.exercise_id),
+                        'image_id': str(q.image_id),
+                        'question': q.question,
+                        'ext': q.ext,
+                    } for q in qs
+                ]
+            }
+            return ret
+
+    async def submit_attempt(self, question_id: int, exercise_id: int, user_id: int, mode: str, selected_options: Optional[List[str]] = None, input_text: Optional[str] = None, cloze_options: Optional[List[str]] = None, file_id: Optional[int] = None, session_id: Optional[int] = None) -> Dict[str, Any]:
+        async with async_db_session.begin() as db:
+            q = await qa_question_dao.get(db, question_id)
+            if not q or q.exercise_id != exercise_id:
+                raise errors.NotFoundError(msg='Question not found')
+            recording_id = None
+            attempt = await qa_attempt_dao.get_latest_by_user_question(db, user_id=user_id, question_id=question_id)
+            if attempt:
+                attempt.task_id = None
+                attempt.choice_options = selected_options if mode == EXERCISE_TYPE_CHOICE else attempt.choice_options
+                attempt.cloze_options = ((cloze_options[0] if isinstance(cloze_options, list) and cloze_options else None) if mode == EXERCISE_TYPE_CLOZE else attempt.cloze_options)
+                attempt.input_text = input_text if mode == EXERCISE_TYPE_FREE_TEXT else attempt.input_text
+                attempt.status = 'pending'
+                ext0 = attempt.ext or {}
+                if session_id:
+                    ext0['session_id'] = session_id
+                attempt.ext = ext0
+                await db.flush()
+            else:
+                attempt = await qa_attempt_dao.create(db, {
+                    'question_id': question_id,
+                    'exercise_id': exercise_id,
+                    'user_id': user_id,
+                    'task_id': None,
+                    'recording_id': recording_id,
+                    'choice_options': selected_options if mode == EXERCISE_TYPE_CHOICE else None,
+                    'cloze_options': ((cloze_options[0] if isinstance(cloze_options, list) and cloze_options else None) if mode == EXERCISE_TYPE_CLOZE else None),
+                    'input_text': input_text if mode == EXERCISE_TYPE_FREE_TEXT else None,
+                    'status': 'pending',
+                    'evaluation': None,
+                    'ext': {'session_id': session_id} if session_id else None,
+                })
+            if session_id:
+                s = await qa_session_dao.get(db, session_id)
+                if s and s.exercise_id == exercise_id:
+                    prog = s.progress or {}
+                    attempts = list(prog.get('attempts') or [])
+                    replaced = False
+                    for idx, a in enumerate(attempts):
+                        if a.get('question_id') == question_id:
+                            attempts[idx] = {
+                                'attempt_id': attempt.id,
+                                'question_id': question_id,
+                                'mode': mode,
+                                'created_at': datetime.now().isoformat(),
+                                'is_correct': a.get('is_correct'),
+                            }
+                            replaced = True
+                            break
+                    if not replaced:
+                        attempts.append({
+                            'attempt_id': attempt.id,
+                            'question_id': question_id,
+                            'mode': mode,
+                            'created_at': datetime.now().isoformat(),
+                            'is_correct': None,
+                        })
+                        prog['answered'] = int(prog.get('answered') or 0) + 1
+                    prog['attempts'] = attempts
+                    s.progress = prog
+                    await db.flush()
+        if mode == EXERCISE_TYPE_FREE_TEXT:
+            attempt.ext = {**(attempt.ext or {}), 'type': 'free_text', 'free_text': {'text': attempt.input_text or '', 'evaluation': None}}
+            await db.flush()
+            async with async_db_session.begin() as db2:
+                task = await image_task_dao.create_task(db2, CreateImageTaskParam(
+                    image_id=q.image_id,
+                    user_id=user_id,
+                    dict_level=DictLevel.LEVEL1.value,
+                    ref_type='qa_question_attempt',
+                    ref_id=attempt.id,
+                    status=ImageTaskStatus.PENDING,
+                ))
+                await db2.flush()
+            asyncio.create_task(self._process_attempt_evaluation(task.id, user_id))
+            session_id_val = (attempt.ext or {}).get('session_id')
+            return {
+                'session_id': str(session_id_val) if session_id_val is not None else None,
+                'type': 'free_text',
+                'free_text': {
+                    'text': attempt.input_text or '',
+                    'evaluation': None
+                }
+            }
+        # Synchronous evaluation for choice/cloze
+        if mode == EXERCISE_TYPE_CHOICE:
+            ext = q.ext or {}
+            raw_correct = ext.get('correct_options') or []
+            raw_incorrect = ext.get('incorrect_options') or []
+            def _norm(v):
+                try:
+                    return str(v).strip().lower()
+                except Exception:
+                    return str(v)
+            correct_set = set(_norm(o.get('content') if isinstance(o, dict) else o) for o in raw_correct)
+            incorrect_map = {}
+            for o in raw_incorrect:
+                c = _norm(o.get('content') if isinstance(o, dict) else o)
+                if isinstance(o, dict):
+                    incorrect_map[c] = {
+                        'content': o.get('content'),
+                        'error_type': o.get('error_type'),
+                        'error_reason': o.get('error_reason')
+                    }
+                else:
+                    incorrect_map[c] = {'content': o, 'error_type': None, 'error_reason': None}
+            selected_list = list(attempt.choice_options or [])
+            selected = set(_norm(s) for s in selected_list)
+            if not selected:
+                is_correct = 'incorrect'
+                result_text = '完全错误'
+                evaluation = {'type': 'choice', 'result': result_text, 'detail': 'no selection', 'selected': {'correct': [], 'incorrect': []}, 'missing_correct': [o.get('content') if isinstance(o, dict) else o for o in raw_correct]}
+                # update ext with choice details
+                attempt.ext = {**(attempt.ext or {}), 'type': 'choice', 'choice': {'options': selected_list, 'evaluation': evaluation}}
+                await db.flush()
+                merged_eval = dict(attempt.evaluation or {})
+                merged_eval['choice'] = {'options': selected_list, 'evaluation': evaluation}
+                await qa_attempt_dao.update_status(db, attempt.id, 'completed', merged_eval)
+            else:
+                selected_correct = []
+                for o in raw_correct:
+                    c = _norm(o.get('content') if isinstance(o, dict) else o)
+                    if c in selected:
+                        selected_correct.append(o.get('content') if isinstance(o, dict) else o)
+                selected_incorrect = []
+                for s in selected_list:
+                    ns = _norm(s)
+                    if ns not in correct_set:
+                        detail = incorrect_map.get(ns)
+                        if detail:
+                            selected_incorrect.append(detail)
+                        else:
+                            selected_incorrect.append({'content': s, 'error_type': 'unknown', 'error_reason': None})
+                missing_correct = []
+                for o in raw_correct:
+                    c = _norm(o.get('content') if isinstance(o, dict) else o)
+                    if c not in selected:
+                        missing_correct.append(o.get('content') if isinstance(o, dict) else o)
+                if selected == correct_set and not selected_incorrect:
+                    is_correct = 'correct'
+                    result_text = '完全匹配'
+                    evaluation = {'type': 'choice', 'result': result_text, 'detail': is_correct, 'selected': {'correct': selected_correct, 'incorrect': []}, 'missing_correct': []}
+                elif selected_correct:
+                    is_correct = 'partial'
+                    result_text = '部分匹配'
+                    evaluation = {'type': 'choice', 'result': result_text, 'detail': is_correct, 'selected': {'correct': selected_correct, 'incorrect': selected_incorrect}, 'missing_correct': missing_correct}
+                else:
+                    is_correct = 'incorrect'
+                    result_text = '完全错误'
+                    evaluation = {'type': 'choice', 'result': result_text, 'detail': is_correct, 'selected': {'correct': [], 'incorrect': selected_incorrect}, 'missing_correct': [o.get('content') if isinstance(o, dict) else o for o in raw_correct]}
+                # update ext with choice details
+                attempt.ext = {**(attempt.ext or {}), 'type': 'choice', 'choice': {'options': selected_list, 'evaluation': evaluation}}
+                await db.flush()
+                merged_eval = dict(attempt.evaluation or {})
+                merged_eval['choice'] = {'options': selected_list, 'evaluation': evaluation}
+                await qa_attempt_dao.update_status(db, attempt.id, 'completed', merged_eval)
+            sid = (attempt.ext or {}).get('session_id') if attempt.ext else None
+            if sid:
+                s = await qa_session_dao.get(db, sid)
+                if s and s.exercise_id == attempt.exercise_id:
+                    prog = s.progress or {}
+                    attempts = list(prog.get('attempts') or [])
+                    for a in attempts:
+                        if a.get('attempt_id') == attempt.id:
+                            prev = a.get('is_correct')
+                            a['is_correct'] = is_correct
+                            break
+                    prev_correct = 1 if prev == 'correct' else 0
+                    new_correct = 1 if is_correct == 'correct' else 0
+                    correct_inc = new_correct - prev_correct
+                    prog['attempts'] = attempts
+                    prog['correct'] = int(prog.get('correct') or 0) + correct_inc
+                    s.progress = prog
+                    await db.flush()
+            await db.commit()
+            # return latest result structure
+            return await self.get_question_evaluation(question_id, user_id)
+        if mode == EXERCISE_TYPE_CLOZE:
+            ext = q.ext or {}
+            cloze = ext.get('cloze') or {}
+            correct_word = cloze.get('correct_word')
+            distractors = cloze.get('distractor_words') or []
+            # Support multiple selections: treat as correct if any selected matches a correct answer
+            selection_list = cloze_options or ([attempt.cloze_options] if attempt.cloze_options else ([attempt.input_text] if attempt.input_text else []))
+            selection_list = [s for s in selection_list if isinstance(s, str) and s.strip()]
+            user_text_first = (selection_list[0] if selection_list else '').strip()
+            def _norm(v):
+                try:
+                    return str(v).strip().lower()
+                except Exception:
+                    return str(v)
+            # correct answers may be a single string or a list
+            correct_candidates = []
+            if isinstance(correct_word, list):
+                correct_candidates = [cw for cw in correct_word if isinstance(cw, str) and cw.strip()]
+            elif isinstance(correct_word, str) and correct_word.strip():
+                correct_candidates = [correct_word]
+            correct_set = set(_norm(cw) for cw in correct_candidates)
+            selected_set = set(_norm(s) for s in selection_list)
+            is_correct = 'correct' if (selected_set and (selected_set & correct_set)) else 'incorrect'
+            result_text = '完全匹配' if is_correct == 'correct' else '完全错误'
+            if is_correct == 'incorrect':
+                mc = correct_candidates if correct_candidates else []
+                evaluation = {'type': 'cloze', 'result': result_text, 'detail': is_correct, 'missing_correct': mc}
+            else:
+                evaluation = {'type': 'cloze', 'result': result_text, 'detail': is_correct}
+            # update ext with cloze details
+            attempt.ext = {**(attempt.ext or {}), 'type': 'cloze', 'cloze': {'input': attempt.cloze_options or user_text_first, 'evaluation': evaluation}}
+            await db.flush()
+            merged_eval = dict(attempt.evaluation or {})
+            merged_eval['cloze'] = {'input': attempt.cloze_options or user_text_first, 'evaluation': evaluation}
+            await qa_attempt_dao.update_status(db, attempt.id, 'completed', merged_eval)
+            sid = (attempt.ext or {}).get('session_id') if attempt.ext else None
+            if sid:
+                s = await qa_session_dao.get(db, sid)
+                if s and s.exercise_id == attempt.exercise_id:
+                    prog = s.progress or {}
+                    attempts = list(prog.get('attempts') or [])
+                    for a in attempts:
+                        if a.get('attempt_id') == attempt.id:
+                            prev = a.get('is_correct')
+                            a['is_correct'] = is_correct
+                            break
+                    prev_correct = 1 if prev == 'correct' else 0
+                    new_correct = 1 if is_correct == 'correct' else 0
+                    correct_inc = new_correct - prev_correct
+                    prog['attempts'] = attempts
+                    prog['correct'] = int(prog.get('correct') or 0) + correct_inc
+                    s.progress = prog
+                    await db.flush()
+            await db.commit()
+            # return latest result structure
+            return await self.get_question_evaluation(question_id, user_id)
+
+    async def _process_attempt_evaluation(self, task_id: int, user_id: int):
+        async with background_db_session() as db:
+            task = await image_task_dao.get(db, task_id)
+            if not task:
+                return
+            await image_task_dao.update_task_status(db, task_id, ImageTaskStatus.PROCESSING)
+            attempt = await qa_attempt_dao.get(db, task.ref_id)
+            if not attempt:
+                await image_task_dao.update_task_status(db, task_id, ImageTaskStatus.FAILED, error_message='Attempt not found')
+                await db.commit()
+                return
+            # Only async evaluation for free_text/audio attempts
+            q = await qa_question_dao.get(db, attempt.question_id)
+            user_text = attempt.input_text or ''
+            answers = (q.ext or {}).get('answers') or {}
+            prompt = (
+                '根据给定标准答案，判断用户回答是否正确，输出JSON：{is_correct: correct|partial|incorrect, feedback: string}。'
+                '标准答案：' + json.dumps(answers, ensure_ascii=False) +
+                '用户回答：' + user_text
+            )
+            res = await self._call_llm_chat(prompt=prompt, image_id=q.image_id, user_id=user_id, chat_type='qa_attempt')
+            if not res.get('success'):
+                await image_task_dao.update_task_status(db, task_id, ImageTaskStatus.FAILED, error_message=res.get('error'))
+                await db.commit()
+                return
+            try:
+                parsed = json.loads(res.get('result')) if isinstance(res.get('result'), str) else res.get('result')
+            except Exception:
+                parsed = {}
+            evaluation = {'type': 'free_text', 'result': parsed.get('is_correct'), 'feedback': parsed.get('feedback')}
+            # update ext with free_text details
+            attempt.ext = {**(attempt.ext or {}), 'type': 'free_text', 'free_text': {'text': attempt.input_text or '', 'evaluation': evaluation}}
+            await db.flush()
+            merged_eval = dict(attempt.evaluation or {})
+            merged_eval['free_text'] = {'text': attempt.input_text or '', 'evaluation': evaluation}
+            await qa_attempt_dao.update_status(db, attempt.id, 'completed', merged_eval)
+            await image_task_dao.update_task_status(db, task_id, ImageTaskStatus.COMPLETED, result={'mode': 'free_text', 'token_usage': res.get('token_usage') or {}})
+            sid = (attempt.ext or {}).get('session_id') if attempt.ext else None
+            if sid:
+                s = await qa_session_dao.get(db, sid)
+                if s and s.exercise_id == attempt.exercise_id:
+                    prog = s.progress or {}
+                    attempts = list(prog.get('attempts') or [])
+                    for a in attempts:
+                        if a.get('attempt_id') == attempt.id:
+                            prev = a.get('is_correct')
+                            a['is_correct'] = parsed.get('is_correct')
+                            break
+                    prev_correct = 1 if prev == 'correct' else 0
+                    new_correct = 1 if parsed.get('is_correct') == 'correct' else 0
+                    correct_inc = new_correct - prev_correct
+                    prog['attempts'] = attempts
+                    prog['correct'] = int(prog.get('correct') or 0) + correct_inc
+                    s.progress = prog
+                    await db.flush()
+            await db.commit()
+
+    async def _call_llm_chat(self, prompt: str, image_id: int, user_id: int, chat_type: str) -> Dict[str, Any]:
+        model_type = (settings.LLM_MODEL_TYPE or "").lower()
+        messages = [{"role": "system", "content": "You are a helpful assistant."}, {'role': 'user', 'content': prompt}]
+        if model_type == 'qwen':
+            try:
+                qres = await Qwen.chat(messages=[{'role': 'user', 'content': prompt}], image_id=image_id, user_id=user_id, api_type=chat_type)
+                if qres and qres.get('success'):
+                    return {"success": True, "result": qres.get("result"), "token_usage": qres.get("token_usage") or {}}
+            except Exception as e:
+                return {"success": False, "error": str(e)}
+            return {"success": False, "error": "LLM call failed"}
+        else:
+            try:
+                res = await Hunyuan.chat(messages=messages, image_id=image_id, user_id=user_id, system_prompt=None, chat_type=chat_type)
+                if res and res.get('success'):
+                    return res
+            except Exception as e:
+                return {"success": False, "error": str(e)}
+            return {"success": False, "error": "LLM call failed"}
+
+    async def get_attempt_task_status(self, task_id: int) -> Dict[str, Any]:
+        return await self.get_task_status(task_id)
+
+
+    async def get_question_evaluation(self, question_id: int, user_id: int) -> Dict[str, Any]:
+        async with async_db_session() as db:
+            latest = await qa_attempt_dao.get_latest_completed_by_user_question(db, user_id=user_id, question_id=question_id)
+            if not latest:
+                latest = await qa_attempt_dao.get_latest_by_user_question(db, user_id=user_id, question_id=question_id)
+                if not latest:
+                    return {}
+            evalution = latest.evaluation or {}
+            session_id = evalution.get('session_id')
+            ret = {
+                'session_id': str(session_id) if session_id is not None else None,
+                'type': evalution.get('type'),
+            }
+            if 'choice' in evalution:
+                ch = evalution.get('choice') or {}
+                ret['choice'] = {
+                    'options': ch.get('options') or [],
+                    'evaluation': ch.get('evaluation') or None,
+                }
+            if 'cloze' in evalution:
+                cz = evalution.get('cloze') or {}
+                ret['cloze'] = {
+                    'input': cz.get('input') or '',
+                    'evaluation': cz.get('evaluation') or None,
+                }
+            if 'free_text' in evalution:
+                ft = evalution.get('free_text') or {}
+                ret['free_text'] = {
+                    'text': ft.get('text') or '',
+                    'evaluation': ft.get('evaluation') or None,
+                }
+            return ret
+
+
+qa_service = QaService()
--- a/backend/app/ai/service/recording_service.py
+++ b/backend/app/ai/service/recording_service.py
@@ -96,6 +96,55 @@ class RecordingService:
                return recording.file_id
        return None

+    @staticmethod
+    async def get_question_audio_file_id(question_id: int, user_id: int, max_wait_time: int = 30, retry_interval: int = 2) -> Optional[int]:
+        """按需获取QA问题的标准音频文件ID：若不存在则生成后返回。"""
+        async with async_db_session() as db:
+            # 1. Check if standard recording exists
+            recording = await recording_dao.get_standard_by_ref(db, 'qa_question', question_id)
+            if recording:
+                return recording.file_id
+        
+        # 2. Get question content
+        from backend.app.ai.crud.qa_crud import qa_question_dao
+        async with async_db_session() as db:
+            question = await qa_question_dao.get(db, question_id)
+            if not question:
+                return None
+            content = question.question
+            image_id = question.image_id
+        
+        # 3. Generate audio
+        try:
+            model_type = (getattr(settings, "LLM_MODEL_TYPE", "") or "").lower()
+            if model_type == "qwen":
+                await Qwen.text_to_speak(
+                    content=content,
+                    image_id=image_id,
+                    user_id=user_id,
+                    ref_type='qa_question',
+                    ref_id=question_id
+                )
+            else:
+                tts = TencentCloud()
+                await tts.text_to_speak(
+                    content=content,
+                    image_id=image_id,
+                    user_id=user_id,
+                    ref_type='qa_question',
+                    ref_id=question_id
+                )
+        except Exception as e:
+            logger.error(f"On-demand TTS generation failed for question_id={question_id}: {e}")
+            return None
+            
+        # 4. Re-query
+        async with async_db_session() as db:
+            recording = await recording_dao.get_standard_by_ref(db, 'qa_question', question_id)
+            if recording:
+                return recording.file_id
+        return None
+
    @staticmethod
    async def get_standard_audio_recording_by_text_id(text_id: int) -> Optional[Recording]:
        """根据文本ID获取标准音频记录"""
--- a/backend/app/ai/service/sentence_service.py
+++ b/backend/app/ai/service/sentence_service.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
-from typing import Optional, List, Dict, Any
+from typing import Optional, List, Dict, Any, Tuple
 from datetime import datetime
 import json
 import math
@@ -24,6 +24,50 @@ from backend.app.admin.service.points_service import points_service
 from backend.app.ai.service.rate_limit_service import rate_limit_service
 from backend.common.const import SENTENCE_TYPE_SCENE_SENTENCE, SENTENCE_TYPE_SCENE_DIALOGUE, SENTENCE_TYPE_SCENE_EXERCISE, LLM_CHAT_COST

+from backend.app.ai.service.image_task_service import TaskProcessor, image_task_service
+from backend.app.ai.model.image_task import ImageProcessingTask
+from sqlalchemy.ext.asyncio import AsyncSession
+
+class SceneSentenceProcessor(TaskProcessor):
+    async def process(self, db: AsyncSession, task: ImageProcessingTask) -> Tuple[Dict[str, Any], Dict[str, Any]]:
+        image = await image_dao.get(db, task.image_id)
+        if not image or not image.details or "recognition_result" not in image.details:
+            raise Exception("Image Recognition result not found")
+        recognition = image.details["recognition_result"]
+        scene_tag = recognition.get("scene_tag") or image.details.get("scene_tag") or []
+        description = recognition.get("description") or ""
+        level2 = recognition.get("level2") or {}
+        desc_en = level2.get("desc_en", [])
+        desc_zh = level2.get("desc_zh", [])
+        core_vocab = level2.get("core_vocab", [])
+        collocations = level2.get("collocations", [])
+
+        result = {}
+        if task.ref_type == SENTENCE_TYPE_SCENE_DIALOGUE:
+            result = await SentenceService.generate_scene_dialogue(task.image_id, task.user_id, scene_tag, desc_en, desc_zh, core_vocab, collocations)
+        elif task.ref_type == SENTENCE_TYPE_SCENE_EXERCISE:
+            result = await SentenceService.generate_sentence_exercise_card(task.image_id, task.user_id, scene_tag, desc_en, desc_zh, core_vocab, collocations)
+        elif task.ref_type == SENTENCE_TYPE_SCENE_SENTENCE:
+            payload = {
+                "description": description,
+                "scene_tag": scene_tag,
+                "desc_en": desc_en,
+                "desc_zh": desc_zh,
+                "core_vocab": core_vocab,
+                "collocations": collocations,
+                "user_level": "intermediate",
+            }
+            result = await SentenceService.generate_scene_sentence(task.image_id, task.user_id, payload)
+        else:
+             raise Exception(f"Unsupported card type: {task.ref_type}")
+
+        if not result:
+             raise Exception("Task generation returned empty result")
+
+        token_usage = result.get("token_usage") or {}
+        return result, token_usage
+
+

 class SentenceService:
    @staticmethod
@@ -45,9 +89,8 @@ class SentenceService:
                "输出要求：\n"
                "0. description是图片的详细描述，围绕描述展开后续的分析。\n"
                "1. 内容约束：基于基础句型扩展功能标签、场景说明，每句补充「发音提示（重音/连读）」等输出结构中要求的内容，需符合现实生活和真实世界的习惯。\n"
-                "2. 格式约束：严格按照下方JSON结构输出，无额外解释，确保字段完整、值为数组/字符串类型。\n"
-                "3. 语言约束：所有英文内容符合日常沟通表达，无语法错误；中文翻译精准，场景说明简洁易懂（≤50字）。\n"
-                "4. 严格按照JSON结构输出，无额外解释，确保字段完整、值为数组/字符串类型，输出的 JSON 结构是：\n"
+                "2. 语言约束：所有英文内容符合日常沟通表达，无语法错误；中文翻译精准，场景说明简洁易懂（≤50字）。\n"
+                "3. 输出限制：仅返回JSON字符串，无其他解释文字，确保可被`JSON.parse`直接解析，确保字段完整、值为数组/字符串类型，输出的 JSON 结构是：\n"
            )
            struct = (
                """
@@ -204,7 +247,7 @@ class SentenceService:
        return _ret

    @staticmethod
-    async def generate_scene_dialogue(image_id: int, user_id: int, scene_tag: str, desc_en: List[str], desc_zh: List[str], core_vocab: List[str], collocations: List[str]) -> List[SentenceCard]:
+    async def generate_scene_dialogue(image_id: int, user_id: int, scene_tag: str, desc_en: List[str], desc_zh: List[str], core_vocab: List[str], collocations: List[str]) -> Dict[str, Any]:
        payload = {
            "scene_tag": scene_tag,
            "desc_en": desc_en,
@@ -257,8 +300,8 @@ class SentenceService:
                    "details": json.dumps(item, ensure_ascii=False),
                    "called_at": start_at,
                })
-            created.append(card)
-        return created
+                created.append(card)
+        return {"created": created, "token_usage": res.get("token_usage") or {}}

    @staticmethod
    async def _call_scene_llm(prompt: str, image_id: int, user_id: int, chat_type: str) -> Dict[str, Any]:
@@ -268,7 +311,8 @@ class SentenceService:
                qres = await Qwen.chat(
                    messages=[{"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": prompt}],
                    image_id=image_id,
-                    user_id=user_id
+                    user_id=user_id,
+                    api_type=chat_type
                )
                if qres and qres.get("success"):
                    return {"success": True, "result": qres.get("result"), "image_chat_id": None, "token_usage": qres.get("token_usage") or {}}
@@ -291,7 +335,7 @@ class SentenceService:
            return {"success": False, "error": "LLM call failed"}

    @staticmethod
-    async def generate_sentence_exercise_card(image_id: int, user_id: int, scene_tag: str, desc_en: List[str], desc_zh: List[str], core_vocab: List[str], collocations: List[str]) -> List[SentenceCard]:
+    async def generate_sentence_exercise_card(image_id: int, user_id: int, scene_tag: str, desc_en: List[str], desc_zh: List[str], core_vocab: List[str], collocations: List[str]) -> Dict[str, Any]:
        start_at = datetime.now()
        items = []
        max_len = min(len(desc_en or []), len(desc_zh or []))
@@ -321,7 +365,7 @@ class SentenceService:
                    "called_at": start_at,
                })
                created.append(card)
-        return created
+        return {"created": created, "token_usage": {}}

    @staticmethod
    async def create_scene_task(image_id: int, user_id: int, scene_type: str) -> dict:
@@ -365,79 +409,8 @@ class SentenceService:

    @staticmethod
    async def _process_scene_task(task_id: int, user_id: int) -> None:
-        from backend.common.log import log as logger
-        task_processing_success = False
-        points_deducted = False
-        try:
-            async with background_db_session() as db:
-                task = await image_task_dao.get(db, task_id)
-                image = await image_dao.get(db, task.image_id)
-                if not image or not image.details or "recognition_result" not in image.details:
-                    raise Exception("Image Recognition result not found")
-                recognition = image.details["recognition_result"]
-                scene_tag = recognition.get("scene_tag") or image.details.get("scene_tag") or []
-                description = recognition.get("description") or ""
-                level2 = recognition.get("level2") or {}
-                desc_en = level2.get("desc_en", [])
-                desc_zh = level2.get("desc_zh", [])
-                core_vocab = level2.get("core_vocab", [])
-                collocations = level2.get("collocations", [])
-            if task.ref_type == SENTENCE_TYPE_SCENE_DIALOGUE:
-                await SentenceService.generate_scene_dialogue(task.image_id, task.user_id, scene_tag, desc_en, desc_zh, core_vocab, collocations)
-            elif task.ref_type == SENTENCE_TYPE_SCENE_EXERCISE:
-                await SentenceService.generate_sentence_exercise_card(task.image_id, task.user_id, scene_tag, desc_en, desc_zh, core_vocab, collocations)
-            elif task.ref_type == SENTENCE_TYPE_SCENE_SENTENCE:
-                payload = {
-                    "description": description,
-                    "scene_tag": scene_tag,
-                    "desc_en": desc_en,
-                    "desc_zh": desc_zh,
-                    "core_vocab": core_vocab,
-                    "collocations": collocations,
-                    "user_level": "intermediate",
-                }
-                result = await SentenceService.generate_scene_sentence(task.image_id, task.user_id, payload)
-            else:
-                raise Exception(f"Unsupported card type: {task.ref_type}")
-            async with background_db_session() as db:
-                await db.begin()
-                image = await image_dao.get(db, task.image_id)
-                total_tokens = 0
-                if isinstance(result, dict):
-                    total_tokens = int((result.get("token_usage") or {}).get("total_tokens") or 0)
-                deduct_amount = LLM_CHAT_COST
-                if total_tokens > 0:
-                    units = math.ceil(max(total_tokens, 1) / 1000)
-                    deduct_amount = units * LLM_CHAT_COST
-                points_deducted = await points_service.deduct_points_with_db(
-                    user_id=task.user_id,
-                    amount=deduct_amount,
-                    db=db,
-                    related_id=image.id if image else None,
-                    details={"task_id": task_id, "sentence_type": task.ref_type, "token_usage": total_tokens},
-                    action=task.ref_type
-                )
-                if not points_deducted:
-                    raise Exception("Failed to deduct points")
-                from backend.app.ai.tasks import update_task_status_with_retry
-                await update_task_status_with_retry(db, task_id, ImageTaskStatus.COMPLETED, result=result)
-                await db.commit()
-            task_processing_success = True
-        except Exception as e:
-            logger.error(f"Error processing sentence card task {task_id}: {str(e)}")
-            try:
-                async with background_db_session() as db:
-                    await db.begin()
-                    from backend.app.ai.tasks import update_task_status_with_retry
-                    await update_task_status_with_retry(db, task_id, ImageTaskStatus.FAILED, error_message=str(e))
-                    await db.commit()
-            except Exception:
-                pass
-        finally:
-            try:
-                await rate_limit_service.release_task_slot(user_id)
-            except Exception:
-                pass
+        processor = SceneSentenceProcessor()
+        await image_task_service.process_task(task_id, user_id, processor)

    @staticmethod
    async def get_task_status(task_id: int) -> dict:
--- a/backend/common/const.py
+++ b/backend/common/const.py
@@ -21,8 +21,11 @@ POINTS_ACTION_DEBT_SETTLED = "debt_settled"

 API_TYPE_RECOGNITION = 'recognition'

-FREE_TRIAL_BALANCE = 30
+FREE_TRIAL_BALANCE = 100

 SENTENCE_TYPE_SCENE_DIALOGUE = "scene_dialogue"
 SENTENCE_TYPE_SCENE_EXERCISE = "scene_exercise"
 SENTENCE_TYPE_SCENE_SENTENCE = "scene_sentence"
+EXERCISE_TYPE_CHOICE = "choice"
+EXERCISE_TYPE_CLOZE = "cloze"
+EXERCISE_TYPE_FREE_TEXT = "free_text"
--- a/backend/main.py
+++ b/backend/main.py
@@ -19,7 +19,7 @@ app = register_app()

@app.get("/")
 async def read_root():
-    await wx_user_index_history()
+    # await wx_user_index_history()
    # res = await SentenceService()._process_scene_task(2111026809104629760, 2108963527040565248)
    return {"Hello": f"World, {datetime.now().isoformat()}"}

--- a/backend/middleware/qwen.py
+++ b/backend/middleware/qwen.py
@@ -32,7 +32,7 @@ class Qwen:
    EMBEDDING_URL = "https://dashscope.aliyuncs.com/api/v1/services/embeddings/multimodal-embedding"

    @staticmethod
-    async def text_to_speak(content: str, image_text_id: int | None = None, image_id: int | None = None, user_id: int | None = None) -> Dict[str, Any]:
+    async def text_to_speak(content: str, image_text_id: int | None = None, image_id: int | None = None, user_id: int | None = None, ref_type: str | None = None, ref_id: int | None = None) -> Dict[str, Any]:
        api_key = settings.QWEN_API_KEY
        model_name = "qwen3-tts-flash"
        voice = "Jennifer"
@@ -118,7 +118,10 @@ class Qwen:
                                ct = "audio/wav"
                        except Exception:
                            pass
-                        file_name = f"{image_text_id}_std.{ext}"
+                        if ref_id:
+                            file_name = f"{ref_id}_std.{ext}"
+                        else:
+                            file_name = f"{image_text_id}_std.{ext}"
                        upload_file = UploadFile(filename=file_name, file=io.BytesIO(audio_data), headers={}, size=len(audio_data))
                        file_response = await file_service.upload_file_with_content_type(file=upload_file, content_type=ct, metadata={"is_standard_audio": True})
                        recording_id = await recording_service.create_recording_record_with_details(
@@ -130,6 +133,8 @@ class Qwen:
                            user_id=user_id,
                            details=response_data,
                            is_standard=True,
+                            ref_type=ref_type,
+                            ref_id=ref_id
                        )
                    except Exception:
                        recording_id = None
@@ -179,7 +184,7 @@ class Qwen:
            return {"success": False, "error": error_message}

    @staticmethod
-    async def chat(messages: List[Dict[str, str]], image_id: int = 0, user_id: int = 0) -> Dict[str, Any]:
+    async def chat(messages: List[Dict[str, str]], image_id: int = 0, user_id: int = 0, api_type: str = "chat") -> Dict[str, Any]:
        api_key = settings.QWEN_API_KEY
        model_name = settings.QWEN_TEXT_MODEL
        start_time = time.time()
@@ -207,7 +212,7 @@ class Qwen:
            }
            duration = time.time() - start_time
            audit_log = CreateAuditLogParam(
-                api_type="chat",
+                api_type=api_type,
                model_name=model_name,
                response_data=response_data,
                request_data={"messages": messages},
@@ -222,7 +227,7 @@ class Qwen:
                api_version=settings.FASTAPI_API_V1_PATH,
                dict_level=None,
            )
-            Qwen._audit_log("chat", audit_log)
+            Qwen._audit_log(api_type, audit_log)
            if status_code == 200:
                content = ""
                try:
@@ -252,7 +257,7 @@ class Qwen:
        finally:
            if error_message:
                Qwen._log_audit(
-                    api_type="chat",
+                    api_type=api_type,
                    dict_level=None,
                    model_name=model_name,
                    request_data={"messages": messages},
@@ -370,6 +375,7 @@ level2 (Intermediate):
 3. Focus: Prioritize ARTIFICIAL/CENTRAL objects and PRIMARY scene (ignore trivial background elements) — e.g., for a café image, focus on "coffee", "barista", "menu" (not "wall", "floor").
 4. Practicality: All sentences must be directly usable in real-life communication (avoid meaningless grammatical exercises like "I am eat a apple" corrected to "I am eating an apple").
 5. Accuracy: Translations must be accurate (not literal) and match the context of the image scene.
+6. Output Limit: Only return the JSON string, without any explanatory text. Ensure that it can be directly parsed by `JSON.parse`.
                """
            )

--- a/backend/middleware/tencent_cloud.py
+++ b/backend/middleware/tencent_cloud.py
@@ -258,7 +258,7 @@ class TencentCloud:
        """签名方法"""
        return hmac.new(key, msg.encode("utf-8"), hashlib.sha256).digest()
    
-    async def text_to_speak(self, content: str, image_text_id: int = None, image_id: int = None, user_id: int = None) -> Dict[str, Any]:
+    async def text_to_speak(self, content: str, image_text_id: int = None, image_id: int = None, user_id: int = None, ref_type: str = None, ref_id: int = None) -> Dict[str, Any]:
        """
        将文本转换为语音，并创建标准发音录音记录
        
@@ -266,6 +266,8 @@ class TencentCloud:
        :param image_text_id: 关联的图片文本ID
        :param image_id: 关联的图片ID
        :param user_id: 用户ID
+        :param ref_type: 引用类型
+        :param ref_id: 引用ID
        :return: 包含标准发音音频信息的结果
        """
        start_time = time.time()
@@ -374,7 +376,7 @@ class TencentCloud:
                # 保存音频文件并创建标准发音录音记录
                clean_json = response_json.copy()
                del clean_json["Response"]['Audio']
-                recording_id = await self._create_standard_recording(audio_data, content, image_text_id, image_id, user_id, clean_json)
+                recording_id = await self._create_standard_recording(audio_data, content, image_text_id, image_id, user_id, clean_json, ref_type, ref_id)
                
                result = {
                    "success": True,
@@ -441,7 +443,7 @@ class TencentCloud:
        except Exception as e:
            logger.error(f"Failed to create TTS audit log: {str(e)}")
    
-    async def _create_standard_recording(self, audio_data: bytes, content: str, image_text_id: int = None, image_id: int = None, user_id: int = None, details: dict = None) -> int:
+    async def _create_standard_recording(self, audio_data: bytes, content: str, image_text_id: int = None, image_id: int = None, user_id: int = None, details: dict = None, ref_type: str = None, ref_id: int = None) -> int:
        """创建标准发音录音记录"""
        try:
            # 创建文件记录
@@ -482,7 +484,9 @@ class TencentCloud:
                eval_mode=1,  # 句子模式
                user_id=user_id,
                details=details,
-                is_standard=True  # 设置为标准音频
+                is_standard=True,  # 设置为标准音频
+                ref_type=ref_type,
+                ref_id=ref_id
            )
            
            return recording_id