add sentence

2025-12-19 17:17:09 +08:00
parent 0c6485e41d
commit 60997cc395
43 changed files with 1571 additions and 102 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -3,7 +3,6 @@ __pycache__/
 backend/static
 backend/log/
 backend/.env
-backend/alembic/versions/
 backend/app/admin/cert/
 .venv/
 .vscode/
--- a/backend/.env.example
+++ b/backend/.env.example
@@ -11,4 +11,9 @@ REDIS_PORT=6379
 REDIS_PASSWORD=''
 REDIS_DATABASE=0
 # Token
-TOKEN_SECRET_KEY='1VkVF75nsNABBjK_7-qz7GtzNy3AMvktc9TCPwKczCk'
+TOKEN_SECRET_KEY='1VkVF75nsNABBjK_7-qz7GtzNy3AMvktc9TCPwKczCk'
+# Hunyuan
+HUNYUAN_MODLE='hunyuan-lite'
+HUNYUAN_APP_ID=''
+HUNYUAN_SECRET_ID=''
+HUNYUAN_SECRET_KEY=''
--- a/backend/alembic.ini
+++ b/backend/alembic.ini
@@ -33,7 +33,7 @@ prepend_sys_path = .
 # sourceless = false

 # version number format
-version_num_format = %04d
+version_num_format = %%04d

 # version path separator; As mentioned above, this is the character used to split
 # version_locations. The default within new alembic.ini files is "os", which uses
@@ -50,7 +50,7 @@ version_path_separator = os
 # are written from script.py.mako
 # output_encoding = utf-8

-sqlalchemy.url = mysql+asyncmy://root:root@127.0.0.1:3306/app  # Changed from postgresql+asyncpg to mysql+asyncmy
+sqlalchemy.url = mysql+pymysql://root:root@127.0.0.1:3306/app  # Changed from postgresql+asyncpg to mysql+asyncmy


 [post_write_hooks]
@@ -71,17 +71,17 @@ keys = root,sqlalchemy,alembic
 [logger_root]
 level = WARN
 handlers = console
-qalname = root
+qualname = root

 [logger_sqlalchemy]
 level = WARN
 handlers =
-qalname = sqlalchemy.engine
+qualname = sqlalchemy.engine

 [logger_alembic]
 level = INFO
 handlers =
-qalname = alembic
+qualname = alembic

 [handlers]
 keys = console
@@ -97,4 +97,4 @@ keys = generic

 [formatter_generic]
 format = %(levelname)-5.5s [%(name)s] %(message)s
-datefmt = %H:%M:%S
+datefmt = %H:%M:%S
--- a/backend/alembic/versions/0001_recording_add_fields_and_indexes.py
+++ b/backend/alembic/versions/0001_recording_add_fields_and_indexes.py
@@ -0,0 +1,21 @@
+from alembic import op
+import sqlalchemy as sa
+from sqlalchemy.dialects.mysql import JSON as MySQLJSON
+
+revision = '0001'
+down_revision = None
+branch_labels = None
+depends_on = None
+
+
+def upgrade():
+    op.add_column('recording', sa.Column('ref_type', sa.String(length=50), nullable=True))
+    op.add_column('recording', sa.Column('ref_id', sa.BigInteger(), nullable=True))
+    op.create_index('idx_ref_type_id', 'recording', ['ref_type', 'ref_id'])
+
+
+def downgrade():
+    op.drop_index('idx_ref_type_id', table_name='recording')
+
+    op.drop_column('recording', 'ref_id')
+    op.drop_column('recording', 'ref_type')
--- a/backend/alembic/versions/0002_add_ref_to_audit_log.py
+++ b/backend/alembic/versions/0002_add_ref_to_audit_log.py
@@ -0,0 +1,29 @@
+"""add ref_type and ref_id to audit_log
+
+Revision ID: 0002
+Revises: 0001
+Create Date: 2025-12-14
+
+"""
+from alembic import op
+import sqlalchemy as sa
+
+# revision identifiers, used by Alembic.
+revision = "0002"
+down_revision = "0001"
+branch_labels = None
+depends_on = None
+
+
+def upgrade():
+    with op.batch_alter_table('audit_log') as batch_op:
+        batch_op.add_column(sa.Column('ref_type', sa.String(length=50), nullable=True))
+        batch_op.add_column(sa.Column('ref_id', sa.BigInteger(), nullable=True))
+    op.create_index('idx_audit_log_ref_type_id', 'audit_log', ['ref_type', 'ref_id'])
+
+
+def downgrade():
+    op.drop_index('idx_audit_log_ref_type_id', table_name='audit_log')
+    with op.batch_alter_table('audit_log') as batch_op:
+        batch_op.drop_column('ref_id')
+        batch_op.drop_column('ref_type')
--- a/backend/alembic/versions/0003_add_ref_to_image_processing_task.py
+++ b/backend/alembic/versions/0003_add_ref_to_image_processing_task.py
@@ -0,0 +1,37 @@
+"""add ref_type and ref_id to image_processing_task
+
+Revision ID: 0003
+Revises: 0002
+Create Date: 2025-12-15
+
+"""
+from alembic import op
+import sqlalchemy as sa
+
+# revision identifiers, used by Alembic.
+revision = "0003"
+down_revision = "0002"
+branch_labels = None
+depends_on = None
+
+
+def upgrade():
+    with op.batch_alter_table('image_processing_task') as batch_op:
+        batch_op.add_column(sa.Column('ref_type', sa.String(length=50), nullable=True))
+        batch_op.add_column(sa.Column('ref_id', sa.BigInteger(), nullable=True))
+        batch_op.drop_column('file_id')
+        batch_op.drop_column('type')
+    op.create_index(
+        'idx_image_task_ref_type_id_status',
+        'image_processing_task',
+        ['ref_type', 'ref_id', 'status']
+    )
+
+
+def downgrade():
+    op.drop_index('idx_image_task_ref_type_id_status', table_name='image_processing_task')
+    with op.batch_alter_table('image_processing_task') as batch_op:
+        batch_op.drop_column('ref_id')
+        batch_op.drop_column('ref_type')
+        batch_op.add_column(sa.Column('file_id', sa.BigInteger(), nullable=True))
+        batch_op.add_column(sa.Column('type', sa.String(length=50), nullable=True))
--- a/backend/app/admin/crud/audit_log_crud.py
+++ b/backend/app/admin/crud/audit_log_crud.py
@@ -11,6 +11,7 @@ from backend.app.admin.schema.audit_log import CreateAuditLogParam, AuditLogStat
 from backend.app.ai import Image, ImageProcessingTask
 from backend.app.ai.model.image_task import ImageTaskStatus
 from backend.common.const import API_TYPE_RECOGNITION
+from backend.app.admin.schema.wx import DictLevel


 class CRUDAuditLog(CRUDPlus[AuditLog]):
@@ -74,6 +75,7 @@ class CRUDAuditLog(CRUDPlus[AuditLog]):
            .join(ImageProcessingTask, Image.id == ImageProcessingTask.image_id)
            .where(
                ranked_subquery.c.rn == 1,  # 只选择每个image_id的第一条记录
+                ImageProcessingTask.dict_level == DictLevel.LEVEL1.value,
                ImageProcessingTask.status == ImageTaskStatus.COMPLETED  # 只选择任务状态为completed的记录
            )
            .order_by(ranked_subquery.c.called_at.desc(), ranked_subquery.c.id.desc())
@@ -93,6 +95,7 @@ class CRUDAuditLog(CRUDPlus[AuditLog]):
                AuditLog.user_id == user_id,
                AuditLog.api_type == API_TYPE_RECOGNITION,
                AuditLog.called_at >= today_start,
+                ImageProcessingTask.dict_level == DictLevel.LEVEL1.value,
                ImageProcessingTask.status == ImageTaskStatus.COMPLETED
            )
        )
--- a/backend/app/admin/model/audit_log.py
+++ b/backend/app/admin/model/audit_log.py
@@ -24,6 +24,8 @@ class AuditLog(Base):
    status_code: Mapped[Optional[int]] = mapped_column(Integer, comment="HTTP状态码")
    image_id: Mapped[Optional[int]] = mapped_column(BigInteger, ForeignKey('image.id'), comment="关联的图片ID")
    user_id: Mapped[Optional[int]] = mapped_column(BigInteger, ForeignKey('wx_user.id'), comment="调用用户ID")
+    ref_type: Mapped[Optional[str]] = mapped_column(String(50), nullable=True, comment="通用引用类型")
+    ref_id: Mapped[Optional[int]] = mapped_column(BigInteger, nullable=True, comment="通用引用ID")
    dict_level: Mapped[Optional[str]] = mapped_column(String(20), comment="dict level")
    api_version: Mapped[Optional[str]] = mapped_column(String(20), comment="API版本")
    error_message: Mapped[Optional[str]] = mapped_column(Text, default=None, comment="错误信息")
@@ -34,6 +36,7 @@ class AuditLog(Base):
        Index('idx_audit_logs_image_id', 'image_id'),
        # 为用户历史记录查询优化的索引
        Index('idx_audit_log_user_api_called', 'user_id', 'api_type', 'called_at'),
+        Index('idx_audit_log_ref_type_id', 'ref_type', 'ref_id'),
    )


@@ -51,4 +54,4 @@ class DailySummary(Base):
    __table_args__ = (
        # 为用户历史记录查询优化的索引
        Index('idx_daily_summary_api_called', 'user_id', 'summary_time'),
-    )
+    )
--- a/backend/app/admin/schema/audit_log.py
+++ b/backend/app/admin/schema/audit_log.py
@@ -22,6 +22,8 @@ class AuditLogSchemaBase(SchemaBase):
    user_id: int = Field(description="调用用户ID")
    api_version: str = Field(description="API版本")
    dict_level: Optional[str] = Field(None, description="词典等级")
+    ref_type: Optional[str] = Field(None, description="通用引用类型")
+    ref_id: Optional[int] = Field(None, description="通用引用ID")


 class CreateAuditLogParam(AuditLogSchemaBase):
@@ -66,4 +68,4 @@ class DailySummaryPageSchema(SchemaBase):
    items: List[DailySummarySchema] = Field(description="每日汇总记录列表")
    total: int = Field(description="总记录数")
    page: int = Field(description="当前页码")
-    size: int = Field(description="每页记录数")
+    size: int = Field(description="每页记录数")
--- a/backend/app/admin/schema/wx.py
+++ b/backend/app/admin/schema/wx.py
@@ -53,9 +53,9 @@ class UserAuth(BaseModel):


 class DictLevel(str, Enum):
-    LEVEL1 = "LEVEL1" # "小学"
-    LEVEL2 = "LEVEL2" # "初高中"
-    LEVEL3 = "LEVEL3" # "四六级"
+    LEVEL1 = "level1" # "小学"
+    LEVEL2 = "level2" # "初高中"
+    LEVEL3 = "level3" # "四六级"


 class UserSettings(BaseModel):
--- a/backend/app/admin/service/dict_service.py
+++ b/backend/app/admin/service/dict_service.py
@@ -1,4 +1,5 @@
 import json
+import re
 from typing import Optional, Set, Dict, Any, Tuple, List
 import asyncio

@@ -12,7 +13,6 @@ from backend.common.exception import errors
 from backend.database.db import async_db_session, background_db_session
 from backend.database.redis import redis_client

-# Import required modules for the new method
 from backend.app.ai.model import Image
 from backend.app.ai.crud.image_curd import image_dao
 from backend.app.ai.model.image_task import ImageProcessingTask
@@ -754,20 +754,13 @@ class DictService:
                
            # 处理description字段 (根据新需求应该是desc_en)
            descriptions = level_data.get("desc_en", [])
-            if not isinstance(descriptions, list):
-                # 兼容旧的字段名
-                descriptions = level_data.get("desc_en", [])
                
            if isinstance(descriptions, list):
                for desc in descriptions:
                    if isinstance(desc, str):
                        # 按空格分割获取单词
-                        desc_words = desc.split()
-                        for word in desc_words:
-                            # 清理单词（移除标点符号等）
-                            cleaned_word = ''.join(char for char in word if char.isalnum())
-                            if cleaned_word:
-                                words.add(cleaned_word.lower())
+                        desc_words = set(re.findall(r'\b[\w-]+\b', desc.lower()))
+                        words.update(desc_words)
                                
        return words

--- a/backend/app/admin/tasks.py
+++ b/backend/app/admin/tasks.py
@@ -5,7 +5,7 @@
 """
 from datetime import datetime, timedelta
 import logging
-from sqlalchemy import select, and_, desc
+from sqlalchemy import select, and_, desc, func
 from backend.app.admin.model.audit_log import AuditLog, DailySummary
 from backend.app.ai.model.image import Image
 from backend.app.ai.model.image_task import ImageProcessingTask, ImageTaskStatus
@@ -15,6 +15,7 @@ from backend.app.admin.schema.audit_log import CreateDailySummaryParam
 from backend.common.const import API_TYPE_RECOGNITION
 from backend.database.db import async_db_session
 from backend.utils.timezone import timezone
+from backend.app.admin.schema.wx import DictLevel


 async def wx_user_index_history() -> None:
@@ -55,31 +56,40 @@ async def wx_user_index_history() -> None:
            
            # 为这批用户获取 audit_log 记录，只包含有对应completed状态image_processing_task的记录
            audit_logs_stmt = (
-                select(AuditLog)
+                select(
+                    AuditLog.user_id.label('user_id'),
+                    AuditLog.image_id.label('image_id'),
+                    func.max(AuditLog.called_at).label('last_called_at')
+                )
                .join(ImageProcessingTask, and_(
                    AuditLog.image_id == ImageProcessingTask.image_id,
+                    ImageProcessingTask.dict_level == DictLevel.LEVEL1.value,
                    ImageProcessingTask.status == ImageTaskStatus.COMPLETED,
                ))
                .where(
                    and_(
                        AuditLog.user_id.in_(batch_user_ids),
                        AuditLog.api_type == API_TYPE_RECOGNITION,
+                        AuditLog.dict_level == DictLevel.LEVEL1.value,
+                        AuditLog.image_id.is_not(None),
                        AuditLog.called_at >= yesterday_start,
                        AuditLog.called_at < yesterday_end
                    )
                )
-                .order_by(AuditLog.user_id, desc(AuditLog.called_at))
+                .group_by(AuditLog.user_id, AuditLog.image_id)
+                .order_by(AuditLog.user_id, desc(func.max(AuditLog.called_at)))
            )
            
            audit_logs_result = await db.execute(audit_logs_stmt)
-            all_audit_logs = audit_logs_result.scalars().all()
+            all_audit_logs = audit_logs_result.fetchall()
            
            # 按用户 ID 分组 audit_logs
            user_audit_logs = {}
-            for log in all_audit_logs:
-                if log.user_id not in user_audit_logs:
-                    user_audit_logs[log.user_id] = []
-                user_audit_logs[log.user_id].append(log)
+            for row in all_audit_logs:
+                uid = row.user_id
+                if uid not in user_audit_logs:
+                    user_audit_logs[uid] = []
+                user_audit_logs[uid].append(row)
            
            # 获取这批用户的信息
            users_stmt = select(WxUser).where(WxUser.id.in_(batch_user_ids))
@@ -87,7 +97,7 @@ async def wx_user_index_history() -> None:
            users = {user.id: user for user in users_result.scalars().all()}
            
            # 获取所有相关的 image 记录
-            all_image_ids = [log.image_id for log in all_audit_logs if log.image_id]
+            all_image_ids = list(dict.fromkeys([row.image_id for row in all_audit_logs if row.image_id]))
            image_map = {}
            if all_image_ids:
                # 只查询需要的 thumbnail_id 字段，避免加载整个 Image 对象浪费内存
--- a/backend/app/ai/api/image_text.py
+++ b/backend/app/ai/api/image_text.py
@@ -5,6 +5,7 @@ from fastapi import APIRouter, Request, Query
 from starlette.background import BackgroundTasks

 from backend.app.ai.schema.image_text import ImageTextSchema, ImageTextWithRecordingsSchema, CreateImageTextParam, UpdateImageTextParam, ImageTextInitResponseSchema, ImageTextInitParam
+from backend.app.ai.schema.recording import LatestRecordingResult
 from backend.app.ai.service.image_text_service import image_text_service
 from backend.app.ai.service.recording_service import recording_service
 from backend.common.exception import errors
@@ -55,6 +56,24 @@ async def get_standard_audio_file_id(
    return response_base.success(data={'audio_id': str(file_id)})


+@router.get("/{text_id}/record_result", summary="获取最新评测结果", dependencies=[DependsJwtAuth])
+async def get_latest_record_result(
+        request: Request,
+        text_id: int,
+) -> ResponseSchemaModel[LatestRecordingResult | None]:
+    rec = await recording_service.get_latest_result_by_text_id(text_id, request.user.id)
+    if not rec:
+        return response_base.success(data=None)
+    data = LatestRecordingResult(
+        id=rec.get("id"),
+        image_id=rec.get("image_id"),
+        image_text_id=rec.get("image_text_id"),
+        file_id=rec.get("file_id"),
+        details=rec.get("details"),
+    )
+    return response_base.success(data=data)
+
+
 # @router.get("/{text_id}", summary="获取图片文本详情", dependencies=[DependsJwtAuth])
 # async def get_image_text(
 #         text_id: int
@@ -159,4 +178,4 @@ async def get_standard_audio_file_id(
 #     - 图片的所有文本记录列表
 #     """
 #     texts = await image_text_service.get_texts_by_image_id(image_id)
-#     return response_base.success(data=texts)
+#     return response_base.success(data=texts)
--- a/backend/app/ai/api/router.py
+++ b/backend/app/ai/api/router.py
@@ -5,10 +5,12 @@ from fastapi import APIRouter
 from backend.app.ai.api.image import router as image_router
 from backend.app.ai.api.recording import router as recording_router
 from backend.app.ai.api.image_text import router as image_text_router
+from backend.app.ai.api.scene import router as scene_router
 from backend.core.conf import settings

 v1 = APIRouter(prefix=settings.FASTAPI_API_V1_PATH)

 v1.include_router(image_router, prefix='/image', tags=['AI图片服务'])
 v1.include_router(recording_router, prefix='/recording', tags=['AI录音服务'])
-v1.include_router(image_text_router, prefix='/image_text', tags=['AI图片文本服务'])
+v1.include_router(image_text_router, prefix='/image_text', tags=['AI图片文本服务'])
+v1.include_router(scene_router, prefix='/scene', tags=['AI场景服务'])
--- a/backend/app/ai/api/scene.py
+++ b/backend/app/ai/api/scene.py
@@ -0,0 +1,72 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+from fastapi import APIRouter, Depends, Request, BackgroundTasks
+from backend.common.response.response_schema import ResponseSchemaModel
+from backend.common.security.jwt import DependsJwtAuth
+from backend.app.ai.schema.sentence import SentenceTaskRequest, SentenceTaskResponse, SentenceTaskStatusResponse, SentenceAssessmentRequest, SentenceAssessmentResponse, SceneSentenceContent
+from backend.app.ai.schema.recording import RecordingAssessmentResult
+from backend.app.ai.service.sentence_service import sentence_service
+from backend.app.ai.service.recording_service import recording_service
+
+router = APIRouter()
+
+
+@router.post("/create", summary="创建场景任务", dependencies=[DependsJwtAuth])
+async def create_scene_task(
+        request: Request,
+        params: SentenceTaskRequest
+) -> ResponseSchemaModel[SentenceTaskResponse]:
+    result = await sentence_service.create_scene_task(
+        image_id=params.image_id,
+        user_id=request.user.id,
+        scene_type=params.scene_type
+    )
+    data = SentenceTaskResponse(task_id=result.get("task_id"), status=result.get("status"))
+    return ResponseSchemaModel[SentenceTaskResponse](code=200, msg="Success", data=data)
+
+
+@router.post("/scene_sentence/assessment", summary="场景句型评测", dependencies=[DependsJwtAuth])
+async def assess_scene_sentence_api(
+        request: Request,
+        background_tasks: BackgroundTasks,
+        params: SentenceAssessmentRequest
+) -> ResponseSchemaModel[SentenceAssessmentResponse]:
+    assessment = await recording_service.assess_scene_sentence(
+        file_id=params.file_id,
+        ref_text=params.ref_text,
+        ref_type=params.ref_type,
+        ref_id=params.ref_id,
+        user_id=request.user.id,
+        background_tasks=background_tasks
+    )
+    data = SentenceAssessmentResponse(
+        file_id=params.file_id,
+        assessment_result=RecordingAssessmentResult(assessment=assessment),
+        ref_type=params.ref_type,
+        ref_id=str(params.ref_id)
+    )
+    return ResponseSchemaModel[SentenceAssessmentResponse](code=200, msg="Success", data=data)
+
+
+@router.get("/task/{task_id}", summary="查询句型任务状态", dependencies=[DependsJwtAuth])
+async def get_sentence_task_status(
+        request: Request,
+        task_id: int,
+) -> ResponseSchemaModel[SentenceTaskStatusResponse]:
+    result = await sentence_service.get_task_status(task_id)
+    data = SentenceTaskStatusResponse(
+        task_id=result.get("task_id"),
+        ref_type=result.get("ref_type"),
+        ref_id=result.get("ref_id"),
+        status=result.get("status"),
+        error_message=result.get("error_message"),
+    )
+    return ResponseSchemaModel[SentenceTaskStatusResponse](code=200, msg="Success", data=data)
+
+@router.get("/image/{image_id}/sentence", summary="获取图片最新的场景句型", dependencies=[DependsJwtAuth])
+async def get_latest_scene_sentence(
+        request: Request,
+        image_id: int,
+) -> ResponseSchemaModel[SceneSentenceContent | None]:
+    content = await sentence_service.get_latest_scene_sentence(image_id, request.user.id)
+    return ResponseSchemaModel[SceneSentenceContent | None](code=200, msg="Success", data=content)
--- a/backend/app/ai/crud/image_chat_crud.py
+++ b/backend/app/ai/crud/image_chat_crud.py
@@ -0,0 +1,27 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+from typing import Optional, List
+from sqlalchemy import select
+from sqlalchemy.ext.asyncio import AsyncSession
+from sqlalchemy_crud_plus import CRUDPlus
+
+from backend.app.ai.model.image_chat import ImageChat
+
+
+class ImageChatCRUD(CRUDPlus[ImageChat]):
+    async def get(self, db: AsyncSession, id: int) -> Optional[ImageChat]:
+        return await self.select_model(db, id)
+
+    async def create(self, db: AsyncSession, obj_in: dict) -> ImageChat:
+        db_obj = ImageChat(**obj_in)
+        db.add(db_obj)
+        await db.flush()
+        return db_obj
+
+    async def get_by_image_id(self, db: AsyncSession, image_id: int) -> List[ImageChat]:
+        stmt = select(self.model).where(self.model.image_id == image_id)
+        result = await db.execute(stmt)
+        return list(result.scalars().all())
+
+
+image_chat_dao: ImageChatCRUD = ImageChatCRUD(ImageChat)
--- a/backend/app/ai/crud/image_chat_msg_crud.py
+++ b/backend/app/ai/crud/image_chat_msg_crud.py
@@ -0,0 +1,33 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+from typing import Optional, List
+from sqlalchemy import select, func
+from sqlalchemy.ext.asyncio import AsyncSession
+from sqlalchemy_crud_plus import CRUDPlus
+
+from backend.app.ai.model.image_chat import ImageChatMsg
+
+
+class ImageChatMsgCRUD(CRUDPlus[ImageChatMsg]):
+    async def get(self, db: AsyncSession, id: int) -> Optional[ImageChatMsg]:
+        return await self.select_model(db, id)
+
+    async def create(self, db: AsyncSession, obj_in: dict) -> ImageChatMsg:
+        db_obj = ImageChatMsg(**obj_in)
+        db.add(db_obj)
+        await db.flush()
+        return db_obj
+
+    async def get_by_chat_id(self, db: AsyncSession, chat_id: int) -> List[ImageChatMsg]:
+        stmt = select(self.model).where(self.model.image_chat_id == chat_id).order_by(self.model.turn_index)
+        result = await db.execute(stmt)
+        return list(result.scalars().all())
+
+    async def get_last_turn_index(self, db: AsyncSession, chat_id: int) -> int:
+        stmt = select(func.max(self.model.turn_index)).where(self.model.image_chat_id == chat_id)
+        result = await db.execute(stmt)
+        return result.scalar() or 0
+
+
+image_chat_msg_dao: ImageChatMsgCRUD = ImageChatMsgCRUD(ImageChatMsg)
+
--- a/backend/app/ai/crud/recording_crud.py
+++ b/backend/app/ai/crud/recording_crud.py
@@ -53,5 +53,29 @@ class RecordingCRUD(CRUDPlus[Recording]):
        result = await db.execute(stmt)
        return result.scalar_one_or_none()

+    async def get_by_ref(self, db: AsyncSession, ref_type: str, ref_id: int) -> List[Recording]:
+        """根据通用引用获取录音记录"""
+        stmt = select(self.model).where(
+            and_(
+                self.model.ref_type == ref_type,
+                self.model.ref_id == ref_id,
+                self.model.is_standard == False
+            )
+        ).order_by(self.model.created_time.asc())
+        result = await db.execute(stmt)
+        return list(result.scalars().all())

-recording_dao: RecordingCRUD = RecordingCRUD(Recording)
+    async def get_latest_by_ref(self, db: AsyncSession, ref_type: str, ref_id: int) -> Optional[Recording]:
+        """根据通用引用获取最新录音记录"""
+        stmt = select(self.model).where(
+            and_(
+                self.model.ref_type == ref_type,
+                self.model.ref_id == ref_id,
+                self.model.is_standard == False
+            )
+        ).order_by(self.model.created_time.desc()).limit(1)
+        result = await db.execute(stmt)
+        return result.scalar_one_or_none()
+
+
+recording_dao: RecordingCRUD = RecordingCRUD(Recording)
--- a/backend/app/ai/crud/scene_sentence_crud.py
+++ b/backend/app/ai/crud/scene_sentence_crud.py
@@ -0,0 +1,63 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+from typing import Optional, List
+from sqlalchemy import select, func
+from sqlalchemy.ext.asyncio import AsyncSession
+from sqlalchemy_crud_plus import CRUDPlus
+
+from backend.app.ai.model.scene_sentence import SceneSentence, SceneSentenceItem
+
+
+class SceneSentenceCRUD(CRUDPlus[SceneSentence]):
+    async def get(self, db: AsyncSession, id: int) -> Optional[SceneSentence]:
+        return await self.select_model(db, id)
+
+    async def create(self, db: AsyncSession, obj_in: dict) -> SceneSentence:
+        db_obj = SceneSentence(**obj_in)
+        db.add(db_obj)
+        await db.flush()
+        return db_obj
+
+    async def get_last_seq(self, db: AsyncSession, image_chat_id: int) -> int:
+        stmt = select(func.max(self.model.seq)).where(self.model.image_chat_id == image_chat_id)
+        result = await db.execute(stmt)
+        return result.scalar() or 0
+
+    async def get_latest_by_image_and_user(self, db: AsyncSession, image_id: int, user_id: int) -> Optional[SceneSentence]:
+        stmt = (
+            select(self.model)
+            .where(
+                self.model.image_id == image_id,
+                self.model.user_id == user_id
+            )
+            .order_by(self.model.called_at.desc(), self.model.id.desc())
+            .limit(1)
+        )
+        result = await db.execute(stmt)
+        return result.scalar_one_or_none()
+
+
+class SceneSentenceItemCRUD(CRUDPlus[SceneSentenceItem]):
+    async def create(self, db: AsyncSession, obj_in: dict) -> SceneSentenceItem:
+        db_obj = SceneSentenceItem(**obj_in)
+        db.add(db_obj)
+        await db.flush()
+        return db_obj
+
+    async def create_many(self, db: AsyncSession, items: List[dict]) -> List[SceneSentenceItem]:
+        created: List[SceneSentenceItem] = []
+        for it in items:
+            db_obj = SceneSentenceItem(**it)
+            db.add(db_obj)
+            created.append(db_obj)
+        await db.flush()
+        return created
+
+    async def get_by_session(self, db: AsyncSession, sentence_id: int) -> List[SceneSentenceItem]:
+        stmt = select(self.model).where(self.model.sentence_id == sentence_id).order_by(self.model.seq)
+        result = await db.execute(stmt)
+        return list(result.scalars().all())
+
+
+scene_sentence_dao: SceneSentenceCRUD = SceneSentenceCRUD(SceneSentence)
+scene_sentence_item_dao: SceneSentenceItemCRUD = SceneSentenceItemCRUD(SceneSentenceItem)
--- a/backend/app/ai/crud/sentence_card_crud.py
+++ b/backend/app/ai/crud/sentence_card_crud.py
@@ -0,0 +1,38 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+from typing import Optional, List
+from sqlalchemy import select
+from sqlalchemy.ext.asyncio import AsyncSession
+from sqlalchemy_crud_plus import CRUDPlus
+
+from backend.app.ai.model.sentence_card import SentenceCard
+
+
+class SentenceCardCRUD(CRUDPlus[SentenceCard]):
+    async def get(self, db: AsyncSession, id: int) -> Optional[SentenceCard]:
+        return await self.select_model(db, id)
+
+    async def create(self, db: AsyncSession, obj_in: dict) -> SentenceCard:
+        db_obj = SentenceCard(**obj_in)
+        db.add(db_obj)
+        await db.flush()
+        return db_obj
+
+    async def get_latest_by_image_and_type(self, db: AsyncSession, image_id: int, card_type: str) -> Optional[SentenceCard]:
+        stmt = select(self.model).where(
+            self.model.image_id == image_id,
+            self.model.card_type == card_type
+        ).order_by(self.model.called_at.desc(), self.model.id.desc()).limit(1)
+        result = await db.execute(stmt)
+        return result.scalar_one_or_none()
+
+    async def get_by_image_and_type(self, db: AsyncSession, image_id: int, card_type: str) -> List[SentenceCard]:
+        stmt = select(self.model).where(
+            self.model.image_id == image_id,
+            self.model.card_type == card_type
+        ).order_by(self.model.called_at.desc(), self.model.id.desc())
+        result = await db.execute(stmt)
+        return list(result.scalars().all())
+
+
+sentence_card_dao: SentenceCardCRUD = SentenceCardCRUD(SentenceCard)
--- a/backend/app/ai/model/init.py
+++ b/backend/app/ai/model/init.py
@@ -2,4 +2,6 @@ from backend.common.model import MappedBase  # noqa: I
 from backend.app.ai.model.image import Image
 from backend.app.ai.model.image_text import ImageText
 from backend.app.ai.model.image_task import ImageProcessingTask
-# from backend.app.ai.model.article import Article, ArticleParagraph, ArticleSentence
+from backend.app.ai.model.recording import Recording
+from backend.app.ai.model.sentence_card import SentenceCard
+from backend.app.ai.model.scene_sentence import SceneSentence, SceneSentenceItem
--- a/backend/app/ai/model/image_chat.py
+++ b/backend/app/ai/model/image_chat.py
@@ -0,0 +1,50 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+from typing import Optional
+from datetime import datetime
+
+from sqlalchemy import BigInteger, Text, String, Integer, Float, DateTime, ForeignKey, Index
+from sqlalchemy.dialects.mysql import JSON as MySQLJSON
+from sqlalchemy.orm import mapped_column, Mapped
+
+from backend.common.model import snowflake_id_key, Base
+
+
+class ImageChat(Base):
+    __tablename__ = 'image_chat'
+
+    id: Mapped[snowflake_id_key] = mapped_column(BigInteger, init=False, primary_key=True)
+    image_id: Mapped[Optional[int]] = mapped_column(BigInteger, ForeignKey('image.id'), nullable=True, comment="关联的图片ID")
+    user_id: Mapped[Optional[int]] = mapped_column(BigInteger, ForeignKey('wx_user.id'), nullable=True, comment="关联的用户ID")
+    chat_type: Mapped[str] = mapped_column(String(30), nullable=False, comment="聊天类型(scene_sentence/scene_dialogue/sentence_exercise/general)")
+    model_name: Mapped[str] = mapped_column(String(50), nullable=False, comment="模型名称")
+    ext: Mapped[Optional[dict]] = mapped_column(MySQLJSON, default=None, comment="扩展数据")
+    total_tokens: Mapped[Optional[int]] = mapped_column(Integer, default=0, comment="会话累计token")
+    last_called_at: Mapped[Optional[datetime]] = mapped_column(DateTime, default=None, comment="最近调用时间")
+
+    __table_args__ = (
+        Index('idx_image_chat_image_id', 'image_id'),
+        Index('idx_image_chat_user_called', 'user_id', 'last_called_at'),
+        Index('idx_image_chat_type_called', 'chat_type', 'last_called_at'),
+    )
+
+
+class ImageChatMsg(Base):
+    __tablename__ = 'image_chat_msg'
+
+    id: Mapped[snowflake_id_key] = mapped_column(BigInteger, init=False, primary_key=True)
+    image_chat_id: Mapped[int] = mapped_column(BigInteger, ForeignKey('image_chat.id'), nullable=False, comment="会话ID")
+    turn_index: Mapped[int] = mapped_column(Integer, nullable=False, comment="轮次序号")
+    model_name: Mapped[str] = mapped_column(String(50), nullable=False, comment="模型名称")
+    messages: Mapped[Optional[dict]] = mapped_column(MySQLJSON, default=None, comment="增量请求消息")
+    response_data: Mapped[Optional[dict]] = mapped_column(MySQLJSON, default=None, comment="响应数据")
+    token_usage: Mapped[Optional[dict]] = mapped_column(MySQLJSON, default=None, comment="本轮消耗的token数量")
+    duration: Mapped[Optional[float]] = mapped_column(Float, default=None, comment="调用耗时(秒)")
+    status_code: Mapped[Optional[int]] = mapped_column(Integer, default=None, comment="HTTP状态码")
+    error_message: Mapped[Optional[str]] = mapped_column(Text, default=None, comment="错误信息")
+    called_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.now(), comment="调用时间")
+    ext: Mapped[Optional[dict]] = mapped_column(MySQLJSON, default=None, comment="扩展数据")
+
+    __table_args__ = (
+        Index('idx_image_chat_msg_chat_turn', 'image_chat_id', 'turn_index'),
+    )
--- a/backend/app/ai/model/image_task.py
+++ b/backend/app/ai/model/image_task.py
@@ -22,10 +22,10 @@ class ImageProcessingTask(Base):

    id: Mapped[snowflake_id_key] = mapped_column(BigInteger, init=False, primary_key=True)
    image_id: Mapped[int] = mapped_column(BigInteger, nullable=False, comment="关联的图片ID")
-    file_id: Mapped[int] = mapped_column(BigInteger, nullable=False, comment="关联的文件ID")
    user_id: Mapped[int] = mapped_column(BigInteger, nullable=False, comment="用户ID")
    dict_level: Mapped[str] = mapped_column(String(20), nullable=False, comment="词典等级")
-    type: Mapped[str] = mapped_column(String(50), nullable=False, comment="处理类型")
+    ref_type: Mapped[str] = mapped_column(String(50), nullable=False, comment="引用类型")
+    ref_id: Mapped[int] = mapped_column(BigInteger, nullable=False, comment="引用ID")
    status: Mapped[ImageTaskStatus] = mapped_column(String(20), default=ImageTaskStatus.PENDING, comment="任务状态")
    result: Mapped[Optional[dict]] = mapped_column(MySQLJSON, default=None, comment="处理结果")
    error_message: Mapped[Optional[str]] = mapped_column(Text, default=None, comment="错误信息")
@@ -35,8 +35,8 @@ class ImageProcessingTask(Base):
    __table_args__ = (
        # 为 image_id 添加索引以优化查询
        Index('idx_image_task_image_id', 'image_id'),
-        # 为 status 添加索引以优化查询
-        Index('idx_image_task_status', 'status'),
        # 为 user_id 添加索引以优化查询
        Index('idx_image_task_user_id', 'user_id'),
-    )
+        # 为 ref_type/ref_id/status 添加复合索引用于检索
+        Index('idx_image_task_ref_type_id_status', 'ref_type', 'ref_id', 'status'),
+    )
--- a/backend/app/ai/model/recording.py
+++ b/backend/app/ai/model/recording.py
@@ -22,6 +22,8 @@ class Recording(Base):
    image_text_id: Mapped[Optional[int]] = mapped_column(BigInteger, ForeignKey('image_text.id'), nullable=True, comment="关联的图片文本ID")
    text: Mapped[Optional[str]] = mapped_column(String(255), nullable=True, comment='朗读文本')
    eval_mode: Mapped[Optional[int]] = mapped_column(Integer, nullable=True, comment='评测模式')
+    ref_type: Mapped[Optional[str]] = mapped_column(String(50), default=None, comment="通用引用类型")
+    ref_id: Mapped[Optional[int]] = mapped_column(BigInteger, default=None, comment="通用引用ID")
    info: Mapped[Optional[RecordingMetadata]] = mapped_column(PydanticType(pydantic_type=RecordingMetadata), default=None, comment="附加元数据")  # 其他可能的字段（根据实际需求添加）
    details: Mapped[Optional[dict]] = mapped_column(MySQLJSON, default=None, comment="评估信息")  # 其他信息
    is_standard: Mapped[bool] = mapped_column(Boolean, default=False, comment="是否为标准朗读音频")
@@ -30,4 +32,5 @@ class Recording(Base):
    __table_args__ = (
        Index('idx_image_user_id', 'image_id', 'user_id'),
        Index('idx_image_text_id', 'image_text_id'),
-    )
+        Index('idx_ref_type_id', 'ref_type', 'ref_id'),
+    )
--- a/backend/app/ai/model/scene_sentence.py
+++ b/backend/app/ai/model/scene_sentence.py
@@ -0,0 +1,50 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+from typing import Optional, List
+from datetime import datetime
+
+from sqlalchemy import BigInteger, Text, String, Integer, Float, DateTime, ForeignKey, Index
+from sqlalchemy.dialects.mysql import JSON as MySQLJSON
+from sqlalchemy.orm import mapped_column, Mapped
+
+from backend.common.model import snowflake_id_key, Base
+
+
+class SceneSentence(Base):
+    __tablename__ = 'scene_sentence'
+
+    id: Mapped[snowflake_id_key] = mapped_column(BigInteger, init=False, primary_key=True)
+    image_id: Mapped[int] = mapped_column(BigInteger, ForeignKey('image.id'), nullable=False, comment="关联的图片ID")
+    user_id: Mapped[int] = mapped_column(BigInteger, ForeignKey('wx_user.id'), nullable=False, comment="关联的用户ID")
+    image_chat_id: Mapped[Optional[int]] = mapped_column(BigInteger, ForeignKey('image_chat.id'), nullable=True, comment="关联的会话ID")
+    seq: Mapped[int] = mapped_column(Integer, nullable=False, comment="同一会话的批次序号")
+    scene_tag: Mapped[Optional[List[str]]] = mapped_column(MySQLJSON, default=None, comment="场景标签")
+    total_count: Mapped[int] = mapped_column(Integer, default=0, comment="句型总数")
+    viewed_count: Mapped[int] = mapped_column(Integer, default=0, comment="已浏览数")
+    practiced_count: Mapped[int] = mapped_column(Integer, default=0, comment="已跟读数")
+    called_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.now(), comment="创建时间")
+    ext: Mapped[Optional[dict]] = mapped_column(MySQLJSON, default=None, comment="扩展数据")
+
+    __table_args__ = (
+        Index('idx_scene_sentence_session_image_user', 'image_id', 'user_id'),
+        Index('idx_scene_sentence_session_chat_seq', 'image_chat_id', 'seq'),
+    )
+
+
+class SceneSentenceItem(Base):
+    __tablename__ = 'scene_sentence_item'
+
+    id: Mapped[snowflake_id_key] = mapped_column(BigInteger, init=False, primary_key=True)
+    image_text_id: Mapped[Optional[int]] = mapped_column(BigInteger, ForeignKey('image_text.id'), nullable=True, comment="关联的图片文本ID")
+    sentence_id: Mapped[int] = mapped_column(BigInteger, ForeignKey('scene_sentence.id'), nullable=False, comment="所属批次ID")
+    sentence_en: Mapped[str] = mapped_column(String(255), nullable=False, comment="英文句型")
+    sentence_zh: Mapped[str] = mapped_column(String(255), nullable=False, comment="中文翻译")
+    seq: Mapped[int] = mapped_column(Integer, default=0, comment="卡片序号")
+    function_tags: Mapped[Optional[List[str]]] = mapped_column(MySQLJSON, default=None, comment="功能标签")
+    core_vocab: Mapped[Optional[List[str]]] = mapped_column(MySQLJSON, default=None, comment="核心词汇")
+    collocations: Mapped[Optional[List[str]]] = mapped_column(MySQLJSON, default=None, comment="核心搭配")
+    ext: Mapped[Optional[dict]] = mapped_column(MySQLJSON, default=None, comment="扩展数据")
+
+    __table_args__ = (
+        Index('idx_scene_sentence_item_session_order', 'sentence_id', 'seq'),
+    )
--- a/backend/app/ai/model/sentence_card.py
+++ b/backend/app/ai/model/sentence_card.py
@@ -0,0 +1,28 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+from typing import Optional
+from datetime import datetime
+
+from sqlalchemy import BigInteger, Text, String, Float, Integer, DateTime, ForeignKey, Index
+from sqlalchemy.dialects.mysql import JSON as MySQLJSON
+from sqlalchemy.orm import mapped_column, Mapped
+
+from backend.common.model import snowflake_id_key, Base
+
+
+class SentenceCard(Base):
+    __tablename__ = 'sentence_card'
+
+    id: Mapped[snowflake_id_key] = mapped_column(BigInteger, init=False, primary_key=True)
+    image_id: Mapped[Optional[int]] = mapped_column(BigInteger, ForeignKey('image.id'), nullable=True, comment="关联的图片ID")
+    user_id: Mapped[Optional[int]] = mapped_column(BigInteger, ForeignKey('wx_user.id'), nullable=True, comment="关联的用户ID")
+    image_chat_id: Mapped[Optional[int]] = mapped_column(BigInteger, ForeignKey('image_chat.id'), nullable=True, comment="关联的会话ID")
+    card_type: Mapped[str] = mapped_column(String(30), nullable=False, comment="卡片类型(scene_sentence/scene_dialogue/sentence_exercise)")
+    scene_tag: Mapped[Optional[str]] = mapped_column(String(50), default=None, comment="场景标签")
+    details: Mapped[Optional[str]] = mapped_column(Text, default=None, comment="句型详情JSON字符串")
+    called_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.now(), comment="调用时间")
+
+    __table_args__ = (
+        Index('idx_sentence_card_image_id', 'image_id'),
+        Index('idx_sentence_card_type_called', 'card_type', 'called_at'),
+    )
--- a/backend/app/ai/schema/image_chat.py
+++ b/backend/app/ai/schema/image_chat.py
@@ -0,0 +1,18 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+from typing import Optional
+from pydantic import Field
+
+from backend.common.schema import SchemaBase
+
+
+class ImageChatSchemaBase(SchemaBase):
+    chat_type: str = Field(description="聊天类型(scene_sentence/scene_dialogue/sentence_exercise/general)")
+    model_name: str = Field(description="模型名称")
+    image_id: Optional[int] = Field(None, description="关联的图片ID")
+    user_id: Optional[int] = Field(None, description="调用用户ID")
+    ext: Optional[dict] = Field(None, description="扩展数据")
+
+
+class CreateImageChatParam(ImageChatSchemaBase):
+    pass
--- a/backend/app/ai/schema/image_task.py
+++ b/backend/app/ai/schema/image_task.py
@@ -9,10 +9,10 @@ from backend.common.schema import SchemaBase

 class ImageTaskSchemaBase(SchemaBase):
    image_id: int
-    file_id: int
    user_id: int
    dict_level: str
-    type: str
+    ref_type: str
+    ref_id: int
    status: ImageTaskStatus = ImageTaskStatus.PENDING
    result: Optional[dict] = None
    error_message: Optional[str] = None
@@ -40,4 +40,4 @@ class ImageTaskStatusResponse(BaseModel):
    image_id: str
    status: ImageTaskStatus
    # result: Optional[dict] = None
-    error_message: Optional[str] = None
+    error_message: Optional[str] = None
--- a/backend/app/ai/schema/sentence.py
+++ b/backend/app/ai/schema/sentence.py
@@ -0,0 +1,70 @@
+from typing import Optional
+from backend.common.schema import SchemaBase
+from backend.app.ai.schema.recording import RecordingAssessmentResult
+
+
+class SentenceTaskRequest(SchemaBase):
+    image_id: int
+    scene_type: str
+
+
+class SentenceTaskResponse(SchemaBase):
+    task_id: str
+    status: str
+
+class SentenceTaskStatusResponse(SchemaBase):
+    task_id: str
+    ref_type: str
+    ref_id: str
+    status: str
+    error_message: Optional[str] = None
+
+class SceneSentenceItem(SchemaBase):
+    seq: int | None = None
+    sentenceEn: str
+    sentenceZh: str
+    functionTags: Optional[list] = None
+    sceneExplanation: Optional[str] = None
+    pronunciationTip: Optional[str] = None
+    coreVocab: Optional[list] = None
+    coreVocabDesc: Optional[list] = None
+    collocations: Optional[list] = None
+    grammarPoint: Optional[str] = None
+    commonMistakes: Optional[list] = None
+    pragmaticAlternative: Optional[list] = None
+    sceneTransferTip: Optional[str] = None
+    difficultyTag: Optional[str] = None
+    extendedExample: Optional[list] = None
+    imageTextId: Optional[str] = None
+    responsePairs: Optional[list] = None
+    fluencyHacks: Optional[str] = None
+    culturalNote: Optional[str] = None
+    practiceSteps: Optional[list] = None
+    avoidScenarios: Optional[str] = None
+    selfCheckList: Optional[list] = None
+    toneIntensity: Optional[str] = None
+    similarSentenceDistinction: Optional[str] = None
+    speechRateTip: Optional[str] = None
+    personalizedTips: Optional[str] = None
+
+class SceneSentenceContent(SchemaBase):
+    id: str
+    image_id: str
+    user_id: str
+    scene_tag: Optional[list[str]] = None
+    total: int
+    list: list[SceneSentenceItem]
+
+
+class SentenceAssessmentRequest(SchemaBase):
+    file_id: int
+    ref_text: str
+    ref_type: str
+    ref_id: int
+
+
+class SentenceAssessmentResponse(SchemaBase):
+    file_id: Optional[int]
+    assessment_result: RecordingAssessmentResult
+    ref_type: Optional[str] = None
+    ref_id: Optional[str] = None
--- a/backend/app/ai/service/image_chat_service.py
+++ b/backend/app/ai/service/image_chat_service.py
@@ -0,0 +1,54 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+from typing import Optional, List, Dict, Any
+
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from backend.app.ai.crud.image_chat_crud import image_chat_dao
+from backend.app.ai.crud.image_chat_msg_crud import image_chat_msg_dao
+from backend.app.ai.model.image_chat import ImageChat, ImageChatMsg
+from backend.app.ai.schema.image_chat import CreateImageChatParam
+from backend.database.db import async_db_session
+
+
+class ImageChatService:
+    @staticmethod
+    async def create(obj: CreateImageChatParam) -> Optional[ImageChat]:
+        async with async_db_session.begin() as db:
+            return await image_chat_dao.create(db, obj.model_dump(exclude_none=True))
+
+    @staticmethod
+    async def get_by_image_id(image_id: int) -> List[ImageChat]:
+        async with async_db_session() as db:
+            return await image_chat_dao.get_by_image_id(db, image_id)
+
+    @staticmethod
+    async def get(chat_id: int) -> Optional[ImageChat]:
+        async with async_db_session() as db:
+            return await image_chat_dao.get(db, chat_id)
+
+    @staticmethod
+    async def create_conversation(payload: Dict[str, Any]) -> Optional[ImageChat]:
+        async with async_db_session.begin() as db:
+            chat = await image_chat_dao.create(db, payload)
+            return chat
+
+    @staticmethod
+    async def append_turn(chat_id: int, turn: Dict[str, Any]) -> Optional[ImageChatMsg]:
+        async with async_db_session.begin() as db:
+            chat = await image_chat_dao.get(db, chat_id)
+            if not chat:
+                return None
+            last_index = await image_chat_msg_dao.get_last_turn_index(db, chat_id)
+            turn["image_chat_id"] = chat_id
+            turn["turn_index"] = last_index + 1
+            msg = await image_chat_msg_dao.create(db, turn)
+            usage = turn.get("token_usage") or {}
+            total = usage.get("total_tokens") or usage.get("TotalTokens") or usage.get("total") or 0
+            if total:
+                chat.total_tokens = (chat.total_tokens or 0) + int(total)
+            chat.last_called_at = turn.get("called_at") or chat.last_called_at
+            return msg
+
+
+image_chat_service = ImageChatService()
--- a/backend/app/ai/service/image_service.py
+++ b/backend/app/ai/service/image_service.py
@@ -276,11 +276,11 @@ class ImageService:
            # 创建异步处理任务
            task_params = CreateImageTaskParam(
                image_id=image_id,
-                file_id=file_id,
                user_id=current_user.id,
                status=ImageTaskStatus.PENDING,
                dict_level=dict_level,
-                type=type
+                ref_type="image",
+                ref_id=image_id,
            )
            
            task = await image_task_dao.create_task(db, task_params)
@@ -290,7 +290,7 @@ class ImageService:
            await db.commit()

            # 添加后台任务来处理图片识别
-            asyncio.create_task(ImageService._process_image_with_limiting(task_id, current_user.id))
+            asyncio.create_task(ImageService._process_image_with_limiting(task_id, current_user.id, type))

            return {
                "task_id": str(task_id),
@@ -299,7 +299,7 @@ class ImageService:
            }

    @staticmethod
-    async def _process_image_with_limiting(task_id: int, user_id: int) -> None:
+    async def _process_image_with_limiting(task_id: int, user_id: int, proc_type: str) -> None:
        """带限流控制的后台处理图片识别任务"""
        task_processing_success = False
        points_deducted = False
@@ -314,7 +314,7 @@ class ImageService:
            except Exception:
                pass
            if need_recognition:
-                await ImageService._process_image_recognition(task_id)
+                await ImageService._process_image_recognition(task_id, proc_type)
            
            # Step 2: Process all remaining steps in a single database transaction for consistency
            async with background_db_session() as db:
@@ -437,7 +437,7 @@ class ImageService:
            raise

    @staticmethod
-    async def _process_image_recognition(task_id: int) -> None:
+    async def _process_image_recognition(task_id: int, proc_type: str) -> None:
        """后台处理图片识别任务 - compatible version for task processor"""
        # This is maintained for backward compatibility with the task processor
        # It creates its own database connection like the original implementation
@@ -467,7 +467,8 @@ class ImageService:
                    await db.commit()

                # 下载文件（在数据库事务外执行）
-                file_content, file_name, content_type = await file_service.download_image_file(task.file_id)
+                image_record = await image_dao.get(db, task.image_id)
+                file_content, file_name, content_type = await file_service.download_image_file(image_record.file_id if image_record else None)
                image_format = image_service.detect_image_format(file_content)
                image_format_str = image_format.value
                base64_image = base64.b64encode(file_content).decode('utf-8')
@@ -480,7 +481,7 @@ class ImageService:
                    file_name=file_name,
                    format=image_format_str,
                    data=base64_image,
-                    type=task.type,
+                    type=proc_type,
                    dict_level=task.dict_level
                )

--- a/backend/app/ai/service/recording_service.py
+++ b/backend/app/ai/service/recording_service.py
@@ -91,6 +91,21 @@ class RecordingService:
        async with async_db_session() as db:
            return await recording_dao.get_standard_by_text_id(db, text_id)

+    @staticmethod
+    async def get_latest_result_by_text_id(text_id: int, user_id: int) -> Optional[dict]:
+        """根据图片文本ID获取最新一次非标准录音的结果，返回基础标识与details"""
+        async with async_db_session() as db:
+            rec = await recording_dao.get_latest_by_text_id(db, text_id)
+            if not rec:
+                return None
+            return {
+                "id": str(rec.id) if rec.id is not None else None,
+                "image_id": str(rec.image_id) if rec.image_id is not None else None,
+                "image_text_id": str(rec.image_text_id) if rec.image_text_id is not None else None,
+                "file_id": str(rec.file_id) if rec.file_id is not None else None,
+                "details": rec.details,
+            }
+
    @staticmethod
    async def update_recording_details(id: int, details: dict) -> bool:
        """更新录音的评估详情"""
@@ -253,7 +268,7 @@ class RecordingService:
        return metadata

    @staticmethod
-    async def create_recording_record(file_id: int, ref_text: Optional[str] = None, image_id: Optional[int] = None, image_text_id: Optional[int] = None, eval_mode: Optional[int] = None, user_id: Optional[int] = None) -> int:
+    async def create_recording_record(file_id: int, ref_text: Optional[str] = None, image_id: Optional[int] = None, image_text_id: Optional[int] = None, eval_mode: Optional[int] = None, user_id: Optional[int] = None, ref_type: Optional[str] = None, ref_id: Optional[int] = None) -> int:
        """创建录音记录并提取基本元数据"""
        try:
            # 检查文件是否存在
@@ -273,7 +288,9 @@ class RecordingService:
                    eval_mode=eval_mode,
                    info=metadata, 
                    text=ref_text,
-                    user_id=user_id
+                    user_id=user_id,
+                    ref_type=ref_type,
+                    ref_id=ref_id
                )
                db.add(recording)
                await db.commit()
@@ -284,7 +301,7 @@ class RecordingService:
            raise RuntimeError(f"Failed to create recording record for file_id {file_id}: {str(e)}")
    
    @staticmethod
-    async def create_recording_record_with_details(file_id: int, ref_text: Optional[str] = None, image_id: Optional[int] = None, image_text_id: Optional[int] = None, eval_mode: Optional[int] = None, user_id: Optional[int] = None, details: Optional[dict] = None, is_standard: bool = False) -> int:
+    async def create_recording_record_with_details(file_id: int, ref_text: Optional[str] = None, image_id: Optional[int] = None, image_text_id: Optional[int] = None, eval_mode: Optional[int] = None, user_id: Optional[int] = None, details: Optional[dict] = None, is_standard: bool = False, ref_type: Optional[str] = None, ref_id: Optional[int] = None) -> int:
        """创建录音记录并设置详细信息"""
        try:
            # 检查文件是否存在
@@ -306,7 +323,9 @@ class RecordingService:
                    text=ref_text,
                    details=details,
                    is_standard=is_standard,  # 设置标准音频标记
-                    user_id=user_id
+                    user_id=user_id,
+                    ref_type=ref_type,
+                    ref_id=ref_id
                )
                db.add(recording)
                await db.commit()
@@ -413,6 +432,53 @@ class RecordingService:
            # 重新抛出异常
            raise RuntimeError(f"Failed to assess recording: {str(e)}")

+    async def assess_scene_sentence(self, file_id: int, ref_text: str, ref_type: str, ref_id: int, user_id: int = 0, background_tasks=None) -> dict:
+        start_time = time.time()
+        status_code = 200
+        error_message = None
+        image_id = None
+        recording = await self.get_recording_by_file_id(file_id)
+        if not recording:
+            try:
+                recording_id = await self.create_recording_record(file_id, ref_text, image_id, None, 1, user_id, ref_type, ref_id)
+                recording = await self.get_recording_by_file_id(file_id)
+                if not recording:
+                    raise RuntimeError(f"Failed to create recording record for file_id {file_id}")
+            except Exception as e:
+                raise RuntimeError(f"Failed to create recording record for file_id {file_id}: {str(e)}")
+        if not await points_service.check_sufficient_points(user_id, SPEECH_ASSESSMENT_COST):
+            raise RuntimeError('积分不足，请充值以继续使用')
+        try:
+            result = await self.tencent_cloud.assessment_speech(file_id, ref_text, str(recording.id), image_id, user_id)
+            details = {"assessment": result}
+            success = await self.update_recording_details(recording.id, details)
+            if not success:
+                raise RuntimeError(f"Failed to update recording details for file_id {file_id}")
+            async with async_db_session.begin() as db:
+                await points_service.deduct_points_with_db(
+                    user_id=user_id,
+                    amount=SPEECH_ASSESSMENT_COST,
+                    db=db,
+                    related_id=recording.id,
+                    details={"recording_id": recording.id},
+                    action=POINTS_ACTION_SPEECH_ASSESSMENT
+                )
+            duration = time.time() - start_time
+            if background_tasks:
+                self._log_audit(background_tasks, file_id, ref_text, result, duration, status_code, user_id, image_id, 0)
+            return result
+        except Exception as e:
+            error_result = {"assessment": {"error": str(e)}}
+            try:
+                await self.update_recording_details(recording.id, error_result)
+            except Exception:
+                pass
+            status_code = 500
+            error_message = str(e)
+            duration = time.time() - start_time
+            if background_tasks:
+                self._log_audit(background_tasks, file_id, ref_text, error_result, duration, status_code, user_id, image_id, 0, error_message)
+            raise RuntimeError(f"Failed to assess recording: {str(e)}")
    @staticmethod
    def _log_audit(background_tasks, file_id: int, ref_text: Optional[str], result: dict, 
                   duration: float, status_code: int, user_id: int, image_id: Optional[int], image_text_id: int, error_message: Optional[str] = None):
--- a/backend/app/ai/service/scene_sentence_service.py
+++ b/backend/app/ai/service/scene_sentence_service.py
@@ -0,0 +1,74 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+from typing import Optional, List, Dict, Any
+from datetime import datetime
+
+from backend.database.db import async_db_session
+from backend.app.ai.crud.scene_sentence_crud import scene_sentence_dao, scene_sentence_item_dao
+from backend.app.ai.crud.image_text_crud import image_text_dao
+from backend.app.admin.schema.wx import DictLevel
+
+
+class SceneSentenceService:
+    @staticmethod
+    async def create_session_with_items(image_id: int, user_id: int, image_chat_id: Optional[int], scene_tag: Optional[List[str]] = None, items: List[Dict[str, Any]] = [], called_at: datetime = datetime.now()) -> Dict[str, Any]:
+        async with async_db_session.begin() as db:
+            last_seq = 0
+            if image_chat_id:
+                last_seq = await scene_sentence_dao.get_last_seq(db, image_chat_id)
+            sentence = await scene_sentence_dao.create(db, {
+                "image_id": image_id,
+                "user_id": user_id,
+                "image_chat_id": image_chat_id,
+                "scene_tag": scene_tag,
+                "seq": int(last_seq) + 1,
+                "total_count": len(items),
+                "viewed_count": 0,
+                "practiced_count": 0,
+                "called_at": called_at,
+                "ext": {},
+            })
+            payloads = []
+            level2_texts = await image_text_dao.get_by_image_id_and_level(db, image_id, DictLevel.LEVEL2.value)
+            text_map = { (t.content or "").strip(): t.id for t in level2_texts }
+            for idx, it in enumerate(items):
+                content_key = (it.get("sentence_en") or it.get("sentenceEn", "")).strip().lower()
+                image_text_id = text_map.get(content_key)
+                ext_payload = {
+                    "scene_explanation": it.get("scene_explanation") or it.get("sceneExplanation"),
+                    "pronunciation_tip": it.get("pronunciation_tip") or it.get("pronunciationTip"),
+                    "pronunciation_url": it.get("pronunciation_url"),
+                    "core_vocab_desc": it.get("core_vocab_desc") or [],
+                    "grammar_point": it.get("grammar_point"),
+                    "common_mistakes": it.get("common_mistakes"),
+                    "pragmatic_alternative": it.get("pragmatic_alternative") or [],
+                    "scene_transfer_tip": it.get("scene_transfer_tip"),
+                    "difficulty_tag": it.get("difficulty_tag"),
+                    "extended_example": it.get("extended_example"),
+                    "response_pairs": it.get("response_pairs") or it.get("responsePairs") or [],
+                    "fluency_hacks": it.get("fluency_hacks") or it.get("fluencyHacks"),
+                    "cultural_note": it.get("cultural_note") or it.get("culturalNote"),
+                    "practice_steps": it.get("practice_steps") or it.get("practiceSteps") or [],
+                    "avoid_scenarios": it.get("avoid_scenarios") or it.get("avoidScenarios"),
+                    "self_check_list": it.get("self_check_list") or it.get("selfCheckList") or [],
+                    "tone_intensity": it.get("tone_intensity") or it.get("toneIntensity"),
+                    "similar_sentence_distinction": it.get("similar_sentence_distinction") or it.get("similarSentenceDistinction"),
+                    "speech_rate_tip": it.get("speech_rate_tip") or it.get("speechRateTip"),
+                    "personalized_tips": it.get("personalized_tips") or it.get("personalizedTips"),
+                }
+                payloads.append({
+                    "sentence_id": sentence.id,
+                    "seq": idx,
+                    "image_text_id": image_text_id,
+                    "sentence_en": it.get("sentence_en") or it.get("sentenceEn", ""),
+                    "sentence_zh": it.get("sentence_zh") or it.get("sentenceZh", ""),
+                    "function_tags": it.get("function_tags") or it.get("functionTags") or [],
+                    "core_vocab": it.get("core_vocab") or it.get("coreVocab") or [],
+                    "collocations": it.get("collocations") or [],
+                    "ext": ext_payload,
+                })
+            created_items = await scene_sentence_item_dao.create_many(db, payloads)
+            return {"sentence_id": sentence.id, "count": len(created_items)}
+
+
+scene_sentence_service = SceneSentenceService()
--- a/backend/app/ai/service/sentence_service.py
+++ b/backend/app/ai/service/sentence_service.py
@@ -0,0 +1,464 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+from typing import Optional, List, Dict, Any
+from datetime import datetime
+import json
+
+from backend.app.ai.crud.sentence_card_crud import sentence_card_dao
+from backend.app.ai.crud.scene_sentence_crud import scene_sentence_dao, scene_sentence_item_dao
+from backend.app.ai.model.sentence_card import SentenceCard
+from backend.app.ai.schema.image_chat import CreateImageChatParam
+from backend.app.ai.service.image_chat_service import image_chat_service
+from backend.app.ai.crud.image_curd import image_dao
+from backend.database.db import async_db_session, background_db_session
+from backend.core.conf import settings
+from backend.middleware.qwen import Qwen
+from backend.middleware.tencent_hunyuan import Hunyuan
+from backend.app.admin.schema.wx import DictLevel
+from backend.app.ai.service.scene_sentence_service import scene_sentence_service
+from backend.app.ai.model.image_task import ImageTaskStatus, ImageProcessingTask
+from backend.app.ai.crud.image_task_crud import image_task_dao
+from backend.app.ai.schema.image_task import CreateImageTaskParam
+from backend.app.admin.service.points_service import points_service
+from backend.app.ai.service.rate_limit_service import rate_limit_service
+from backend.common.const import SENTENCE_CARD_COST, SENTENCE_TYPE_SCENE_SENTENCE, SENTENCE_TYPE_SCENE_DIALOGUE, SENTENCE_TYPE_SCENE_EXERCISE
+
+
+class SentenceService:
+    @staticmethod
+    def _compose_prompt(payload: dict, mode: str) -> str:
+        base = (
+            "你是英语教育场景的专业助手，需基于给定的图片场景信息和基础内容，扩展生成适配英语进阶学习者的「句型卡片、模拟场景对话、句型套用练习」结构化内容，所有内容需贴合场景、功能导向，无语义重复，且符合日常沟通逻辑。\n"
+            "输入信息如下（JSON）：\n"
+            f"{json.dumps(payload, ensure_ascii=False)}\n"
+            "输出要求：\n"
+            "1. 内容约束：基于基础句型扩展功能标签、场景说明，每句补充「发音提示（重音/连读）」\n"
+            "2. 格式约束：严格按照下方JSON结构输出，无额外解释，确保字段完整、值为数组/字符串类型。\n"
+            "3. 语言约束：所有英文内容符合日常沟通表达，无语法错误；中文翻译精准，场景说明简洁易懂（≤50字）。\n"
+        )
+        if mode == SENTENCE_TYPE_SCENE_SENTENCE:
+            base = (
+                "你是英语教育场景的专业助手，需基于给定的图片场景信息和基础内容，扩展生成适配英语进阶学习者的[场景句型]结构化内容，所有内容需贴合场景、功能导向，无语义重复，且符合日常沟通逻辑。\n"
+                "输入信息如下（JSON）：\n"
+                f"{json.dumps(payload, ensure_ascii=False)}\n"
+                "输出要求：\n"
+                "0. description是图片的详细描述，围绕描述展开后续的分析。\n"
+                "1. 内容约束：基于基础句型扩展功能标签、场景说明，每句补充「发音提示（重音/连读）」等输出结构中要求的内容，需符合现实生活和真实世界的习惯。\n"
+                "2. 格式约束：严格按照下方JSON结构输出，无额外解释，确保字段完整、值为数组/字符串类型。\n"
+                "3. 语言约束：所有英文内容符合日常沟通表达，无语法错误；中文翻译精准，场景说明简洁易懂（≤50字）。\n"
+                "4. 严格按照JSON结构输出，无额外解释，确保字段完整、值为数组/字符串类型，输出的 json 结构是：\n"
+            )
+            struct = (
+                """
+"sentence": { // 对象：场景句型模块（适配前端展示）
+"total": 5, // 数字：句型数量(5-8)
+"list": [ // 数组：场景句型列表（数量与total一致）
+{ "seq": 1, // 数字：序号（1-8）
+"sentence_en": "", // 字符串：英文句型, 使用输入信息中的 desc_en 与之顺序对应的句子
+"sentence_zh": "", // 字符串：中文翻译，使用输入信息中的 desc_zh 与之顺序对应的句子
+"function_tags": ["询问", "索要物品"], // 数组：功能标签（主+子）
+"scene_explanation": "咖啡厅场景向店员礼貌索要菜单，比“Give me the menu”更得体", // 字符串：场景使用说明（≤50字）
+"pronunciation_tip": "重音在menu /ˈmenjuː/，have a look at 连读为 /hævəlʊkæt/", // 字符串：发音提示（重音/连读）
+"core_vocab": ["menu", "look"], // 数组：核心词汇
+"core_vocab_desc": ["n. 菜单", "v. 查看"], // 数组：核心词汇在此句型中的含义（与core_vocab顺序对应）
+"collocations": ["have a look at + 物品（查看某物）"], // 数组：核心搭配
+"grammar_point": "情态动词Can表请求（非正式），主谓倒装结构：Can + 主语 + 动词原形", // 核心语法解析
+"common_mistakes": ["1. 漏介词at（Can I have a look the menu）", "2. look误读为/lʊk/（正确/luːk/）", "3. 忘记在look后加at（Can I have a look at the menu）", ...], // 数组：句型中语法或单词用法可能出错的地方，包括但不限于常见发音错误，场景语气不当，单词单复数错误，主谓倒装错误、省略介词、省略主语等语法错误；
+"pragmatic_alternative": ["Could I have a look at the menu?（更礼貌，正式场景）", "May I see the menu?（更正式，高阶）", ...], // 语用替代表达
+"scene_transfer_tip": "迁移至餐厅场景：Can I have a look at the wine list?（把menu替换为wine list）", // 场景迁移提示
+"difficulty_tag": "intermediate", // 难度标签（beginner/intermediate/advanced）
+"extended_example": ["Can I have a look at your phone?（向朋友借看手机，非正式场景）", ""], // 数组： 精简拓展例句
+"response_pairs": [], // 数组：对话回应搭配（3-4个核心回应，含肯定/否定/中性，带场景适配说明，设计意图：形成对话闭环，支持角色扮演/实际互动）
+"fluency_hacks": "", // 字符串：口语流畅度技巧（≤30字，聚焦填充词/弱读/语气调节，设计意图：贴近母语者表达节奏，避免生硬卡顿）
+"cultural_note": "", // 字符串：文化适配提示（≤40字，说明中外表达习惯差异，设计意图：避免文化误解，提升沟通得体性）
+"practice_steps": [], // 数组：分阶练习步骤（3步，每步1句话，可操作，设计意图：提供明确学习路径，衔接输入与输出，提升口语落地能力）
+"avoid_scenarios": "", // 字符串：避免使用场景（≤35字，明确禁忌场景+替代方案，设计意图：减少用错场合的尴尬，明确使用边界）
+"self_check_list": [], // 数组：自我检测清单（3-4个可量化检查点，含语法/发音/流畅度维度，设计意图：提供即时自查工具，无需他人批改验证效果）
+"tone_intensity": "", // 字符串：语气强度标注（≤35字，用“弱/中/强”+适用对象描述，设计意图：直观匹配语气与互动对象，避免语气不当）
+"similar_sentence_distinction": "", // 字符串：相似句型辨析（≤40字，聚焦使用场景+核心差异，不搞复杂语法，设计意图：理清易混点，避免张冠李戴）
+"speech_rate_tip": "", // 字符串：语速建议（≤25字，明确日常场景语速+关键部分节奏，设计意图：让表达更自然，提升沟通效率）
+"personalized_tips": "" // 字符串：个性化学习提示（≤30字，分初学者/进阶者给出重点建议，设计意图：适配不同水平需求，提升学习针对性）
+} ] }
+                """
+            )
+            return base + struct
+        if mode == SENTENCE_TYPE_SCENE_DIALOGUE:
+            struct = (
+                """
+"dialog": { // 对象：模拟场景对话模块（适配前端对话交互）
+    "roleOptions": ["customer", "barista"], // 数组：可选角色（固定值：customer/barista）
+    "defaultRole": "customer", // 字符串：默认角色（customer/barista二选一）
+    "dialogRound": 2, // 数字：对话轮数（2-3轮）
+    "list": [ // 数组：对话轮次列表（数量与dialogRound一致）
+    {
+        "roundId": "dialog-001", // 字符串：轮次唯一ID
+        "speaker": "barista", // 字符串：本轮说话者（customer/barista）
+        "speakerEn": "Can I help you?", // 字符串：说话者英文内容
+        "speakerZh": "请问需要点什么？", // 字符串：说话者中文翻译
+        "responseOptions": [ // 数组：用户可选回应（固定3条）
+        {
+            "optionId": "resp-001", // 字符串：选项唯一ID
+            "optionEn": "I'd like to order a latte with less sugar.", // 字符串：选项英文内容
+            "optionZh": "我想点一杯少糖的拿铁。", // 字符串：选项中文翻译
+            "feedback": "✅ 完美！该句型是咖啡厅点餐核心表达，with精准补充饮品定制要求" // 字符串：选择后的交互反馈
+        }
+        ]
+    }
+    ]
+}
+                """
+            )
+            return base + "生成场景对话结构：" + struct
+        if mode == SENTENCE_TYPE_SCENE_EXERCISE:
+            struct = (
+                """
+"sentencePractice": { // 对象：句型套用练习模块（适配前端填空练习）
+    "total": 5, // 数字：练习数量（5-8道）
+    "list": [ // 数组：练习列表（数量与total一致）
+    {
+        "practiceId": "practice-001", // 字符串：练习唯一ID
+        "baseSentenceEn": "I'd like to order ______", // 字符串：基础句型框架（挖空）
+        "baseSentenceZh": "我想点______", // 字符串：框架中文翻译
+        "keywordPool": [ // 数组：可选关键词池（3-4个）
+        {
+            "wordEn": "latte", // 字符串：英文关键词
+            "wordZh": "拿铁", // 字符串：中文翻译
+            "type": "drink" // 字符串：词汇类型（drink/custom/food等）
+        }
+        ],
+        "wrongTips": [ // 数组：常见错误提示（2-3条）
+        "错误：order + bread（面包）→ 咖啡厅场景中order后优先接饮品，面包需用“have”搭配"
+        ],
+        "extendScene": { // 对象：拓展场景（迁移练习）
+        "sceneTag": "milk_tea_shop", // 字符串：拓展场景标签
+        "extendSentenceEn": "I'd like to order ______", // 字符串：拓展句型框架
+        "extendKeywordPool": ["milk tea", "taro balls", "sugar-free"] // 数组：拓展关键词池
+        }
+    }
+    ]
+                """
+            )
+            return base + "生成句型练习结构：" + struct
+        return base
+
+    @staticmethod
+    async def generate_scene_sentence(image_id: int, user_id: int, payload: dict) -> dict:
+        if "user_level" not in payload:
+            payload["user_level"] = "intermediate"
+        prompt = SentenceService._compose_prompt(payload, SENTENCE_TYPE_SCENE_SENTENCE)
+        start_at = datetime.now()
+        res = await Hunyuan.chat(
+            messages=[{"role": "user", "content": prompt}],
+            image_id=image_id,
+            user_id=user_id,
+            system_prompt=None,
+            chat_type=SENTENCE_TYPE_SCENE_SENTENCE
+        )
+        if not res.get("success"):
+            return None
+        image_chat_id = res.get("image_chat_id")
+        parsed = {}
+        try:
+            parsed = json.loads(res.get("result")) if isinstance(res.get("result"), str) else res.get("result", {})
+        except Exception:
+            parsed = {}
+
+        print(parsed)
+        items = []
+        sc = parsed.get("sentence") or {}
+        for idx, d in enumerate(sc.get("list", []), start=1):
+            items.append({
+                "seq": d.get("seq") or idx,
+                "sentence_en": d.get("sentence_en") or d.get("sentenceEn",""),
+                "sentence_zh": d.get("sentence_zh") or d.get("sentenceZh",""),
+                "function_tags": d.get("function_tags") or d.get("functionTags", []),
+                "scene_explanation": d.get("scene_explanation") or d.get("sceneExplanation", ""),
+                "pronunciation_tip": d.get("pronunciation_tip") or d.get("pronunciationTip", ""),
+                "pronunciation_url": d.get("pronunciation_url"),
+                "core_vocab": payload.get("core_vocab") or payload.get("coreVocab") or [],
+                "core_vocab_desc": d.get("core_vocab_desc") or [],
+                "collocations": payload.get("collocations", []),
+                "grammar_point": d.get("grammar_point"),
+                "common_mistakes": d.get("common_mistakes"),
+                "pragmatic_alternative": d.get("pragmatic_alternative") or [],
+                "scene_transfer_tip": d.get("scene_transfer_tip"),
+                "difficulty_tag": d.get("difficulty_tag") or "intermediate",
+                "extended_example": d.get("extended_example"),
+                "response_pairs": d.get("response_pairs") or d.get("responsePairs") or [],
+                "fluency_hacks": d.get("fluency_hacks") or d.get("fluencyHacks"),
+                "cultural_note": d.get("cultural_note") or d.get("culturalNote"),
+                "practice_steps": d.get("practice_steps") or d.get("practiceSteps") or [],
+                "avoid_scenarios": d.get("avoid_scenarios") or d.get("avoidScenarios"),
+                "self_check_list": d.get("self_check_list") or d.get("selfCheckList") or [],
+                "tone_intensity": d.get("tone_intensity") or d.get("toneIntensity"),
+                "similar_sentence_distinction": d.get("similar_sentence_distinction") or d.get("similarSentenceDistinction"),
+                "speech_rate_tip": d.get("speech_rate_tip") or d.get("speechRateTip"),
+                "personalized_tips": d.get("personalized_tips") or d.get("personalizedTips"),
+            })
+        return await scene_sentence_service.create_session_with_items(
+            image_id=image_id,
+            user_id=user_id,
+            image_chat_id=image_chat_id,
+            scene_tag=payload.get("scene_tag") or [],
+            items=items,
+            called_at=start_at
+        )
+
+    @staticmethod
+    async def generate_scene_dialogue(image_id: int, user_id: int, scene_tag: str, desc_en: List[str], desc_zh: List[str], core_vocab: List[str], collocations: List[str]) -> List[SentenceCard]:
+        payload = {
+            "scene_tag": scene_tag,
+            "desc_en": desc_en,
+            "desc_zh": desc_zh,
+            "core_vocab": core_vocab,
+            "collocations": collocations,
+            "user_level": "intermediate"
+        }
+        prompt = SentenceService._compose_prompt(payload, SENTENCE_TYPE_SCENE_DIALOGUE)
+        start_at = datetime.now()
+        res = await Hunyuan.chat(
+            messages=[{"role": "user", "content": prompt}],
+            image_id=image_id,
+            user_id=user_id,
+            system_prompt=None,
+            chat_type=SENTENCE_TYPE_SCENE_DIALOGUE
+        )
+        if not res.get("success"):
+            return None
+        image_chat_id = res.get("image_chat_id")
+        parsed = {}
+        try:
+            parsed = json.loads(res.get("result")) if isinstance(res.get("result"), str) else res.get("result", {})
+        except Exception:
+            parsed = {}
+        items = []
+        sc = parsed.get("sentenceCard") or {}
+        for d in sc.get("list", []):
+            sid = f"card-{int(start_at.timestamp())}"
+            items.append({
+                "sentenceId": sid,
+                "sentenceEn": d.get("sentenceEn"),
+                "sentenceZh": d.get("sentenceZh"),
+                "functionTags": d.get("functionTags") or [],
+                "sceneExplanation": d.get("sceneExplanation") or "",
+                "pronunciationTip": d.get("pronunciationTip"),
+                "coreVocab": d.get("coreVocab") or core_vocab,
+                "collocations": d.get("collocations") or collocations
+            })
+        created: List[SentenceCard] = []
+        async with async_db_session.begin() as db:
+            image = await image_dao.get(db, image_id)
+            for item in items:
+                card = await sentence_card_dao.create(db, {
+                    "image_id": image_id,
+                    "user_id": user_id,
+                    "image_chat_id": image_chat_id,
+                    "card_type": SENTENCE_TYPE_SCENE_SENTENCE,
+                    "scene_tag": scene_tag,
+                    "details": json.dumps(item, ensure_ascii=False),
+                    "called_at": start_at,
+                })
+                created.append(card)
+        return created
+
+    @staticmethod
+    async def generate_sentence_exercise_card(image_id: int, user_id: int, scene_tag: str, desc_en: List[str], desc_zh: List[str], core_vocab: List[str], collocations: List[str]) -> List[SentenceCard]:
+        start_at = datetime.now()
+        items = []
+        max_len = min(len(desc_en or []), len(desc_zh or []))
+        for idx in range(max_len):
+            sid = f"card-{int(start_at.timestamp())}"
+            items.append({
+                "sentenceId": sid,
+                "sentenceEn": desc_en[idx],
+                "sentenceZh": desc_zh[idx],
+                "functionTags": [],
+                "sceneExplanation": "",
+                "pronunciationTip": None,
+                "coreVocab": core_vocab,
+                "collocations": collocations
+            })
+        created: List[SentenceCard] = []
+        async with async_db_session.begin() as db:
+            image = await image_dao.get(db, image_id)
+            for item in items:
+                card = await sentence_card_dao.create(db, {
+                    "image_id": image_id,
+                    "user_id": user_id,
+                    "image_chat_id": None,
+                    "card_type": SENTENCE_TYPE_SCENE_EXERCISE,
+                    "scene_tag": scene_tag,
+                    "details": json.dumps(item, ensure_ascii=False),
+                    "called_at": start_at,
+                })
+                created.append(card)
+        return created
+
+    @staticmethod
+    async def create_scene_task(image_id: int, user_id: int, scene_type: str) -> dict:
+        from backend.common.exception import errors
+        if not await points_service.check_sufficient_points(user_id, SENTENCE_CARD_COST):
+            raise errors.ForbiddenError(msg='积分不足，请充值以继续使用')
+        slot_acquired = await rate_limit_service.acquire_task_slot(user_id)
+        if not slot_acquired:
+            max_tasks = await rate_limit_service.get_user_task_limit(user_id)
+            raise errors.ForbiddenError(msg=f'用户同时最多只能运行 {max_tasks} 个任务，请等待现有任务完成后再试')
+        if scene_type not in (SENTENCE_TYPE_SCENE_DIALOGUE, SENTENCE_TYPE_SCENE_EXERCISE, SENTENCE_TYPE_SCENE_SENTENCE):
+            raise errors.BadRequestError(msg=f'不支持的任务类型 {scene_type}')
+        async with async_db_session.begin() as db:
+            image = await image_dao.get(db, image_id)
+            if not image:
+                raise errors.NotFoundError(msg='图片不存在')
+            dict_level = DictLevel.LEVEL2.value
+            task_params = CreateImageTaskParam(
+                image_id=image_id,
+                user_id=user_id,
+                status=ImageTaskStatus.PENDING,
+                dict_level=dict_level,
+                ref_type=scene_type,
+                ref_id=image_id,
+            )
+            task = await image_task_dao.create_task(db, task_params)
+            await db.flush()
+            task_id = task.id
+            await db.commit()
+        import asyncio
+        asyncio.create_task(SentenceService._process_scene_task(task_id, user_id))
+        return {"task_id": str(task_id), "status": "accepted"}
+
+    @staticmethod
+    async def _process_scene_task(task_id: int, user_id: int) -> None:
+        from backend.common.log import log as logger
+        task_processing_success = False
+        points_deducted = False
+        try:
+            async with background_db_session() as db:
+                task = await image_task_dao.get(db, task_id)
+                image = await image_dao.get(db, task.image_id)
+                if not image or not image.details or "recognition_result" not in image.details:
+                    raise Exception("Image Recognition result not found")
+                recognition = image.details["recognition_result"]
+                scene_tag = recognition.get("scene_tag") or image.details.get("scene_tag") or []
+                description = recognition.get("description") or ""
+                level2 = recognition.get("level2") or {}
+                desc_en = level2.get("desc_en", [])
+                desc_zh = level2.get("desc_zh", [])
+                core_vocab = level2.get("core_vocab", [])
+                collocations = level2.get("collocations", [])
+            if task.ref_type == SENTENCE_TYPE_SCENE_DIALOGUE:
+                cards = await SentenceService.generate_scene_dialogue(task.image_id, task.user_id, scene_tag, desc_en, desc_zh, core_vocab, collocations)
+            elif task.ref_type == SENTENCE_TYPE_SCENE_EXERCISE:
+                cards = await SentenceService.generate_sentence_exercise_card(task.image_id, task.user_id, scene_tag, desc_en, desc_zh, core_vocab, collocations)
+            elif task.ref_type == SENTENCE_TYPE_SCENE_SENTENCE:
+                payload = {
+                    "description": description,
+                    "scene_tag": scene_tag,
+                    "desc_en": desc_en,
+                    "desc_zh": desc_zh,
+                    "core_vocab": core_vocab,
+                    "collocations": collocations,
+                    "user_level": "intermediate",
+                }
+                cards = await SentenceService.generate_scene_sentence(task.image_id, task.user_id, payload)
+            else:
+                raise Exception(f"Unsupported card type: {task.ref_type}")
+            async with background_db_session() as db:
+                await db.begin()
+                image = await image_dao.get(db, task.image_id)
+                points_deducted = await points_service.deduct_points_with_db(
+                    user_id=task.user_id,
+                    amount=SENTENCE_CARD_COST,
+                    db=db,
+                    related_id=image.id if image else None,
+                    details={"task_id": task_id, "sentence_type": task.ref_type},
+                    action=task.ref_type
+                )
+                if not points_deducted:
+                    raise Exception("Failed to deduct points")
+                from backend.app.ai.tasks import update_task_status_with_retry
+                await update_task_status_with_retry(db, task_id, ImageTaskStatus.COMPLETED, result={"count": len(cards)})
+                await db.commit()
+            task_processing_success = True
+        except Exception as e:
+            logger.error(f"Error processing sentence card task {task_id}: {str(e)}")
+            try:
+                async with background_db_session() as db:
+                    await db.begin()
+                    from backend.app.ai.tasks import update_task_status_with_retry
+                    await update_task_status_with_retry(db, task_id, ImageTaskStatus.FAILED, error_message=str(e))
+                    await db.commit()
+            except Exception:
+                pass
+        finally:
+            try:
+                await rate_limit_service.release_task_slot(user_id)
+            except Exception:
+                pass
+
+    @staticmethod
+    async def get_task_status(task_id: int) -> dict:
+        async with async_db_session() as db:
+            from backend.common.exception import errors
+            task = await image_task_dao.get(db, task_id)
+            if not task:
+                raise errors.NotFoundError(msg="Task not found")
+            response = {
+                "task_id": str(task.id),
+                "ref_type": task.ref_type,
+                "ref_id": str(task.ref_id),
+                "status": task.status,
+                "error_message": None,
+            }
+            if task.status == ImageTaskStatus.FAILED:
+                response["error_message"] = task.error_message
+            return response
+
+    @staticmethod
+    async def get_latest_scene_sentence(image_id: int, user_id: int) -> dict | None:
+        async with async_db_session() as db:
+            sentence = await scene_sentence_dao.get_latest_by_image_and_user(db, image_id, user_id)
+            if not sentence:
+                return None
+            items = await scene_sentence_item_dao.get_by_session(db, sentence.id)
+            payload_items = []
+            for it in items:
+                ext = it.ext or {}
+                payload_items.append({
+                    "seq": it.seq,
+                    "sentenceEn": it.sentence_en,
+                    "sentenceZh": it.sentence_zh,
+                    "functionTags": it.function_tags or [],
+                    "sceneExplanation": ext.get("scene_explanation") or "",
+                    "pronunciationTip": ext.get("pronunciation_tip"),
+                    "coreVocab": it.core_vocab or [],
+                    "coreVocabDesc": ext.get("core_vocab_desc") or [],
+                    "collocations": it.collocations or [],
+                    "grammarPoint": ext.get("grammar_point"),
+                    "commonMistakes": ext.get("common_mistakes"),
+                    "pragmaticAlternative": ext.get("pragmatic_alternative") or [],
+                    "sceneTransferTip": ext.get("scene_transfer_tip"),
+                    "difficultyTag": ext.get("difficulty_tag"),
+                    "extendedExample": ext.get("extended_example"),
+                    "responsePairs": ext.get("response_pairs") or [],
+                    "fluencyHacks": ext.get("fluency_hacks"),
+                    "culturalNote": ext.get("cultural_note"),
+                    "practiceSteps": ext.get("practice_steps") or [],
+                    "avoidScenarios": ext.get("avoid_scenarios"),
+                    "selfCheckList": ext.get("self_check_list") or [],
+                    "toneIntensity": ext.get("tone_intensity"),
+                    "similarSentenceDistinction": ext.get("similar_sentence_distinction"),
+                    "speechRateTip": ext.get("speech_rate_tip"),
+                    "personalizedTips": ext.get("personalized_tips"),
+                    "imageTextId": str(it.image_text_id) if it.image_text_id is not None else None,
+                })
+            return {
+                "id": str(sentence.id),
+                "image_id": str(sentence.image_id),
+                "user_id": str(sentence.user_id),
+                "scene_tag": sentence.scene_tag,
+                "total": len(payload_items),
+                "list": payload_items,
+            }
+
+
+sentence_service = SentenceService()
--- a/backend/common/const.py
+++ b/backend/common/const.py
@@ -5,6 +5,7 @@ IMAGE_RECOGNITION_COST = 10 # 0.0015/1000 * 10

 # Speech assessment service cost in points
 SPEECH_ASSESSMENT_COST = 1
+SENTENCE_CARD_COST = 2

 # Points action types
 POINTS_ACTION_SYSTEM_GIFT = "system_gift"
@@ -20,3 +21,7 @@ POINTS_ACTION_REFUND_DEDUCT = "refund_deduct"
 API_TYPE_RECOGNITION = 'recognition'

 FREE_TRIAL_BALANCE = 30
+
+SENTENCE_TYPE_SCENE_DIALOGUE = "scene_dialogue"
+SENTENCE_TYPE_SCENE_EXERCISE = "scene_exercise"
+SENTENCE_TYPE_SCENE_SENTENCE = "scene_sentence"
--- a/backend/core/conf.py
+++ b/backend/core/conf.py
@@ -56,6 +56,10 @@ class Settings(BaseSettings):
    API_TIMEOUT: int = 600
    ASYNC_POLL_INTERVAL: int = 1
    ASYNC_MODE: str = "enable"
+    HUNYUAN_MODLE: str
+    HUNYUAN_APP_ID: str
+    HUNYUAN_SECRET_ID: str
+    HUNYUAN_SECRET_KEY: str
    TENCENT_CLOUD_APP_ID: str
    TENCENT_CLOUD_SECRET_ID: str
    TENCENT_CLOUD_SECRET_KEY: str
--- a/backend/core/registrar.py
+++ b/backend/core/registrar.py
@@ -180,4 +180,4 @@ def register_page(app: FastAPI):
    :param app:
    :return:
    """
-    add_pagination(app)
+    add_pagination(app)
--- a/backend/main.py
+++ b/backend/main.py
@@ -12,13 +12,15 @@ from random import sample
 from string import ascii_letters, digits
 from wechatpayv3 import WeChatPay, WeChatPayType
 from backend.app.admin.tasks import wx_user_index_history
+from backend.app.ai.service.sentence_service import SentenceService


 app = register_app()

@app.get("/")
 async def read_root():
-    # await wx_user_index_history()
+    await wx_user_index_history()
+    # res = await SentenceService()._process_scene_task(2111026809104629760, 2108963527040565248)
    return {"Hello": f"World, {datetime.now().isoformat()}"}


--- a/backend/middleware/qwen.py
+++ b/backend/middleware/qwen.py
@@ -80,47 +80,55 @@ Core objective: Analyze the image based on its PRIMARY SCENE (e.g., office, rest
 level1 (Beginner): 
 - Learning goal: Recognize core vocabulary + use basic functional sentences (describe objects/scenes, simple requests)
 - Vocab: High-frequency daily words (no uncommon words)
- Grammar: Simple present/past tense, basic SV/SVO structure
- Word count per sentence: ~10 words
- Sentence type: 6 unique functional types (describe object, simple request, state fact, point out location, express preference, ask simple question)
+- Grammar: Present continuous, modal verbs (can/could/would), simple clauses
+- Word count per sentence: ≤15 words
+- Sentence type: 6 unique functional types (detailed description, polite request, ask for information, suggest action, state need, confirm fact, express feeling)
+- The sentence structure of the described object: quantity + name + feature + purpose.

 level2 (Intermediate): 
 - Learning goal: Master scene-specific collocations + practical communication sentences (daily/office interaction)
 - Vocab: Scene-specific common words + fixed collocations (e.g., "print a document", "place an order")
- Grammar: Present continuous, modal verbs (can/could/would), simple clauses
- Word count per sentence: 15-20 words
- Sentence type: 7 unique functional types (detailed description, polite request, ask for information, suggest action, state need, confirm fact, express feeling)
-
-level3 (Advanced): 
- Learning goal: Use professional/scene-specific uncommon words + context-aware pragmatic expressions (with tone/formality differences)
- Vocab: Scene-specific professional words (e.g., "barista" for café, "agenda" for meeting) + advanced collocations
 - Grammar: Complex clauses, passive voice, subjunctive mood (as appropriate to the scene)
 - Word count per sentence: ≤25 words
- Sentence type: 8 unique functional types (detailed scene analysis, formal/informal contrast, conditional statement, explain purpose, ask follow-up questions, express suggestion, summarize information, clarify meaning)
+- Sentence type: 8-12 unique functional types (detailed scene analysis, formal/informal contrast, conditional statement, explain purpose, ask follow-up questions, express suggestion, summarize information, clarify meaning)

 // Output Requirements
 1. JSON Structure (add core vocab/collocation for easy parsing):
 {
-  "scene_tag": "xxx", // e.g., "office", "café", "supermarket" (primary scene of the image)
+  "scene_tag": ["xxx", "xxx"], // e.g., "office", "café", "supermarket" (Multiple tags that are consistent with the main scene of the picture)
+  "description": "", // Clear and accurate description of the content of the picture, including but not limited to objects, relationships, colors, etc.
  "level1": {
-    "desc_en": ["sentence1", "sentence2", ...], // 6 unique sentences (no repeated semantics/functions)
-    "desc_zh": ["translation1", "translation2", ...], // one-to-one with desc_en
-    "core_vocab": ["word1", "word2", ...], // 3-5 core high-frequency words from LEVEL1 sentences
-    "collocations": ["word1 + collocation1", ...] // 2-3 fixed collocations (e.g., "sit + on a chair")
+    "desc_en": ["sentence1", "sentence2", ...], // 6 to 8 distinct sentences with different modalities (without repeating the same meaning or function. Don't use Chinese. Consistent with native English speakers' daily communication habits)
+    "desc_zh": ["translation1", "translation2", ...], // one-to-one with desc_en, chinese translation must be natural and not stiff, consistent with native English speakers' daily communication habits.
  },
-  "level2": {
-    "desc_en": ["sentence1", ...], // 7 unique sentences
-    "desc_zh": ["translation1", ...],
-    "core_vocab": ["word1", ...], // 4-6 scene-specific words
-    "collocations": ["collocation1", ...] // 3-4 scene-specific collocations
-  },
-  "level3": {
-    "desc_en": ["sentence1", ...], // 8 unique sentences
-    "desc_zh": ["translation1", ...],
-    "core_vocab": ["word1", ...], // 5-7 professional/uncommon words
-    "collocations": ["collocation1", ...], // 4-5 advanced collocations
-    "pragmatic_notes": ["note1", ...] // 1-2 notes on tone/formality (e.g., "Could you..." is politer than "Can you...")
-  }
+"level2": {
+    "desc_en": [
+        "Requirement: 8-12 daily spoken English sentences matching the image scenario (prioritize short sentences, ≤20 words)",
+        "Type: Declarative sentences / polite interrogative sentences that can be used directly (avoid formal language and complex clauses)",
+        "Scenario Adaptation: Strictly align with the real-life scenario shown in the image (e.g., restaurant ordering, asking for directions on the subway, chatting with friends, etc.)",
+        "Core Principle: Natural and not stiff, consistent with native English speakers' daily communication habits (e.g., prefer \"How's it going?\" over \"How are you recently?\")"
+    ],
+    "desc_zh": [
+        "Requirement: Colloquial Chinese translations of the corresponding English sentences",
+        "Principle: Avoid literal translations and formal expressions; conform to daily Chinese speaking habits (e.g., translate \"Could you pass the salt?\" as \"能递下盐吗？\" instead of \"你能把盐递给我吗？\")",
+        "Adaptability: Translations should fit the logical expression of Chinese scenarios (e.g., more polite for workplace communication, more casual for friend chats)"
+    ],
+    "core_vocab": [
+        "Requirement: 5-8 core spoken words for the scenario",
+        "Standard: High-frequency daily use (avoid rare words and academic terms); can directly replace key words in sentences for reuse",
+        "Example: For the \"supermarket shopping\" scenario, prioritize words like \"discount, check out, cart\" that can be directly applied to sentences"
+    ],
+    "collocations": [
+        "Requirement: 5-8 high-frequency spoken collocations for the scenario",
+        "Standard: Short and practical fixed collocations; can be used by directly replacing core words (avoid complex phrases)",
+        "Example: For the \"food delivery ordering\" scenario, collocations include \"order food, pick up the phone (for delivery calls), track the order\""
+    ],
+    "pragmatic_notes": [
+        "Requirement: 2-4 scenario-specific pragmatic notes (avoid general descriptions)",
+        "Content: Clear usage scenarios + tone adaptation + practical skills (e.g., \"Suitable for chatting with friends; casual tone; starting with the filler word 'actually' makes it more natural\")",
+        "Practical Value: Include \"replacement skills\" (e.g., \"Sentence pattern 'I'm in the mood for + [food]' can be used by directly replacing the food noun\")"
+    ]
+    }
 }
 2. Uniqueness: No repetition in SEMANTICS/FUNCTIONS (not just literal repetition) — e.g., avoid two sentences both meaning "This is a laptop" (even with different wording).
 3. Focus: Prioritize ARTIFICIAL/CENTRAL objects and PRIMARY scene (ignore trivial background elements) — e.g., for a café image, focus on "coffee", "barista", "menu" (not "wall", "floor").
@@ -543,9 +551,14 @@ level3 (Advanced):
    def _log_audit(api_type: str, model_name: str, request_data: dict,
                   response_data: dict, duration: float, status_code: int,
                   error_message: str | None = None, dict_level: str | None = None, 
-                   image_id: int = 0, user_id: int = 0, called_at: datetime | None = None):
+                   image_id: int = 0, user_id: int = 0, called_at: datetime | None = None,
+                   ref_type: str | None = None, ref_id: int | None = None):
        """记录API调用审计日志"""
-        token_usage = response_data.get("usage", {})
+        raw_usage = response_data.get("usage", {})
+        pt = raw_usage.get("input_tokens") or raw_usage.get("prompt_tokens") or 0
+        ct = raw_usage.get("output_tokens") or raw_usage.get("completion_tokens") or 0
+        tt = raw_usage.get("total_tokens") or (pt + ct)
+        token_usage = {"prompt_tokens": int(pt or 0), "completion_tokens": int(ct or 0), "total_tokens": int(tt or 0)}

        cost = Qwen._calculate_cost(api_type, token_usage)

@@ -564,6 +577,8 @@ level3 (Advanced):
            called_at=called_at or datetime.now(),
            api_version=settings.FASTAPI_API_V1_PATH,
            dict_level=dict_level,
+            ref_type=ref_type,
+            ref_id=ref_id,
        )
        try:
            # Use background tasks for audit logging to avoid blocking
@@ -595,4 +610,4 @@ level3 (Advanced):
            # Run in background without blocking
            asyncio.create_task(_create_audit_log())
        except Exception as e:
-            logger.error(f"Failed to save audit log: {str(e)}")
+            logger.error(f"Failed to save audit log: {str(e)}")
--- a/backend/middleware/tencent_hunyuan.py
+++ b/backend/middleware/tencent_hunyuan.py
@@ -0,0 +1,177 @@
+import asyncio
+import json
+import time
+from concurrent.futures import ThreadPoolExecutor
+from datetime import datetime
+
+from tencentcloud.common.common_client import CommonClient
+from tencentcloud.common import credential
+from tencentcloud.common.exception.tencent_cloud_sdk_exception import TencentCloudSDKException
+from tencentcloud.common.profile.client_profile import ClientProfile
+from tencentcloud.common.profile.http_profile import HttpProfile
+
+from backend.core.conf import settings
+from backend.common.log import log as logger
+from backend.app.admin.schema.audit_log import CreateAuditLogParam
+from backend.app.admin.service.audit_log_service import audit_log_service
+from backend.app.ai.schema.image_chat import CreateImageChatParam
+from backend.app.ai.service.image_chat_service import image_chat_service
+
+
+class NonStreamResponse(object):
+    def __init__(self):
+        self.response = ""
+
+    def _deserialize(self, obj):
+        self.response = json.dumps(obj)
+
+
+class Hunyuan:
+    _executor = ThreadPoolExecutor(max_workers=10)
+
+    @staticmethod
+    async def chat(messages: list[dict], image_id: int, user_id: int, system_prompt: str | None = None, chat_type: str | None = "general") -> dict:
+        start_time = time.time()
+        start_at = datetime.now()
+        status_code = 500
+        error_message = ""
+        response_data: dict = {}
+        try:
+            cred = credential.Credential(settings.HUNYUAN_SECRET_ID, settings.HUNYUAN_SECRET_KEY)
+            httpProfile = HttpProfile()
+            httpProfile.endpoint = "hunyuan.tencentcloudapi.com"
+            try:
+                httpProfile.reqTimeout = int(getattr(settings, "API_TIMEOUT", 600))
+            except Exception:
+                httpProfile.reqTimeout = 600
+            clientProfile = ClientProfile()
+            clientProfile.httpProfile = httpProfile
+
+            _msg = ([{"Role": "system", "Content": system_prompt}] if system_prompt else []) + [
+                {"Role": m.get("role"), "Content": m.get("content")} for m in (messages or [])
+            ]
+            req_body = {
+                "Model": settings.HUNYUAN_MODLE,
+                "Messages": _msg,
+                "Stream": False,
+            }
+
+            loop = asyncio.get_event_loop()
+            resp = await loop.run_in_executor(
+                Hunyuan._executor,
+                lambda: CommonClient("hunyuan", "2023-09-01", cred, "", profile=clientProfile)._call_and_deserialize(
+                    "ChatCompletions", req_body, NonStreamResponse
+                )
+            )
+
+            if isinstance(resp, NonStreamResponse):
+                status_code = 200
+                response_data = json.loads(resp.response)
+            else:
+                status_code = 200
+                chunks = []
+                for event in resp:
+                    chunks.append(event)
+                response_data = {"stream": True, "events": chunks}
+
+            duration = time.time() - start_time
+            token_usage = response_data.get("Usage") or response_data.get("usage") or {}
+            model_name = settings.HUNYUAN_MODLE
+
+            image_chat_id = None
+            try:
+                chat = await image_chat_service.create_conversation({
+                    "chat_type": chat_type or "general",
+                    "model_name": model_name,
+                    "image_id": image_id,
+                    "user_id": user_id,
+                    "ext": {"module": chat_type or "general"},
+                })
+                if chat:
+                    image_chat_id = chat.id
+                    msg = await image_chat_service.append_turn(chat.id, {
+                        "model_name": model_name,
+                        "messages": {"messages": req_body.get("Messages")},
+                        "response_data": response_data,
+                        "token_usage": (
+                            {"prompt_tokens": (token_usage.get("PromptTokens") or token_usage.get("prompt_tokens") or token_usage.get("input_tokens") or 0),
+                             "completion_tokens": (token_usage.get("CompletionTokens") or token_usage.get("completion_tokens") or token_usage.get("output_tokens") or 0),
+                             "total_tokens": (token_usage.get("TotalTokens") or token_usage.get("total_tokens") or token_usage.get("total") or
+                                              ((token_usage.get("PromptTokens") or token_usage.get("prompt_tokens") or 0) +
+                                               (token_usage.get("CompletionTokens") or token_usage.get("completion_tokens") or 0)))}
+                            if isinstance(token_usage, dict) else {}
+                        ),
+                        "duration": duration,
+                        "status_code": status_code,
+                        "error_message": error_message or None,
+                        "called_at": start_at,
+                        "ext": {"module": chat_type or "general"},
+                    })
+                norm_usage = {}
+                if isinstance(token_usage, dict):
+                    pt = token_usage.get("PromptTokens") or token_usage.get("prompt_tokens") or token_usage.get("input_tokens") or 0
+                    ct = token_usage.get("CompletionTokens") or token_usage.get("completion_tokens") or token_usage.get("output_tokens") or 0
+                    tt = token_usage.get("TotalTokens") or token_usage.get("total_tokens") or token_usage.get("total") or (pt + ct)
+                    norm_usage = {"prompt_tokens": int(pt or 0), "completion_tokens": int(ct or 0), "total_tokens": int(tt or 0)}
+                audit_log = CreateAuditLogParam(
+                    api_type="chat",
+                    model_name=model_name,
+                    response_data=response_data,
+                    request_data=req_body,
+                    token_usage=norm_usage,
+                    duration=duration,
+                    status_code=status_code,
+                    error_message=error_message,
+                    called_at=start_at,
+                    image_id=image_id,
+                    user_id=user_id,
+                    cost=0,
+                    api_version=settings.FASTAPI_API_V1_PATH,
+                    dict_level=None,
+                    ref_type="image_chat_msg",
+                    ref_id=(msg.id if chat and msg else None),
+                )
+                await audit_log_service.create(obj=audit_log)
+            except Exception as e:
+                logger.error(f"save image chat failed: {str(e)}")
+
+            result_text = ""
+            try:
+                if "Choices" in response_data:
+                    choices = response_data["Choices"]
+                    if isinstance(choices, list) and choices:
+                        msg = choices[0].get("Message", {})
+                        result_text = msg.get("Content", "")
+                elif "output" in response_data:
+                    choices = response_data["output"].get("choices", [])
+                    if isinstance(choices, list) and choices:
+                        msg = choices[0].get("message", {})
+                        content = msg.get("content", "")
+                        if isinstance(content, list):
+                            result_text = " ".join([seg.get("text", "") for seg in content if isinstance(seg, dict)])
+                        elif isinstance(content, str):
+                            result_text = content
+            except Exception:
+                pass
+
+            return {
+                "success": True,
+                "result": result_text,
+                "token_usage": token_usage,
+                "image_chat_id": image_chat_id,
+            }
+
+        except TencentCloudSDKException as err:
+            error_message = str(err)
+            logger.exception(f"hunyuan chat exception: {error_message}")
+            return {
+                "success": False,
+                "error": error_message,
+            }
+        except Exception as e:
+            error_message = str(e)
+            logger.exception(f"hunyuan chat exception: {error_message}")
+            return {
+                "success": False,
+                "error": error_message,
+            }
--- a/backend/middleware/test.py
+++ b/backend/middleware/test.py
@@ -120,4 +120,8 @@
 #     # process(0)
 #     # 录音识别
 #     process_rec(0)
-#     # process_multithread(20)
+#     # process_multithread(20)
+import re
+desc_words = "a well-food."
+words = set(re.findall(r'\b[\w-]+\b', desc_words.lower()))
+print(words)
--- a/deploy/app_server.conf
+++ b/deploy/app_server.conf
@@ -1,9 +1,9 @@
-[program:app_server]
-directory=/app
-command=/usr/local/bin/gunicorn -c /app/deploy/gunicorn.conf.py main:app
-user=root
-autostart=true
-autorestart=true
-startretries=5
-redirect_stderr=true
-stdout_logfile=/var/log/app_server/app_server.log
+[program:app_server]
+directory=/app
+command=/bin/sh -lc 'alembic -c backend/alembic.ini upgrade head && /usr/local/bin/gunicorn -c /app/deploy/gunicorn.conf.py main:app'
+user=root
+autostart=true
+autorestart=true
+startretries=5
+redirect_stderr=true
+stdout_logfile=/var/log/app_server/app_server.log
--- a/requirements.txt
+++ b/requirements.txt
@@ -257,6 +257,7 @@ starlette==0.46.1
    #   asgi-correlation-id
    #   fastapi
 termcolor==2.5.0
+tencentcloud-sdk-python-common==3.1.11
    # via pytest-sugar
 tomli==2.2.1 ; python_full_version < '3.11'
    # via pytest
@@ -322,4 +323,5 @@ zope-event==5.0
 zope-interface==7.2
    # via gevent
 mysql-connector-python==8.0.33  # Added MySQL connector
-    # via fastapi-best-architecture
+    # via fastapi-best-architecture
+pymysql==1.1.2