diff --git a/backend/app/admin/schema/audit_log.py b/backend/app/admin/schema/audit_log.py index 60bff19..5d12e29 100755 --- a/backend/app/admin/schema/audit_log.py +++ b/backend/app/admin/schema/audit_log.py @@ -18,7 +18,7 @@ class AuditLogSchemaBase(SchemaBase): status_code: int = Field(description="HTTP状态码") error_message: Optional[str] = Field("", description="错误信息") called_at: Optional[datetime] = Field(None, description="调用时间") - image_id: int = Field(description="关联的图片ID") + image_id: Optional[int] = Field(None, description="关联的图片ID") user_id: int = Field(description="调用用户ID") api_version: str = Field(description="API版本") dict_level: Optional[str] = Field(None, description="词典等级") diff --git a/backend/app/ai/api/qa.py b/backend/app/ai/api/qa.py index b027975..943b9f5 100644 --- a/backend/app/ai/api/qa.py +++ b/backend/app/ai/api/qa.py @@ -77,7 +77,7 @@ async def reply_conversation(request: Request, session_id: int, obj: Conversatio @router.post('/conversations/{session_id}/recognize_audio', summary='识别音频内容', dependencies=[DependsJwtAuth]) -async def recognize_audio(request: Request, obj: ConversationRecognitionRequest) -> ResponseSchemaModel[ConversationRecognitionResponse]: +async def recognize_audio(request: Request, session_id: int, obj: ConversationRecognitionRequest) -> ResponseSchemaModel[ConversationRecognitionResponse]: res = await qa_service.recognize_audio(file_id=int(obj.file_id), user_id=request.user.id, session_id=session_id) return response_base.success(data=ConversationRecognitionResponse(**res)) diff --git a/backend/app/ai/service/qa_service.py b/backend/app/ai/service/qa_service.py index 12f5790..21731a9 100644 --- a/backend/app/ai/service/qa_service.py +++ b/backend/app/ai/service/qa_service.py @@ -897,6 +897,7 @@ class QaService: # 1. 验证会话 exercise_id = None + image_id = None async with async_db_session.begin() as db: session = await qa_session_dao.get(db, session_id) if not session: @@ -904,6 +905,10 @@ class QaService: if session.starter_user_id != user_id: raise errors.ForbiddenError(msg="Forbidden") exercise_id = session.exercise_id + + exercise = await qa_exercise_dao.get(db, exercise_id) + if exercise: + image_id = exercise.image_id # 2. 获取文件信息 file_obj = await file_service.get_file(file_id) @@ -935,7 +940,7 @@ class QaService: audio_url = temp_file_path # 调用Qwen ASR - res = await Qwen.recognize_speech(audio_url, user_id=user_id) + res = await Qwen.recognize_speech(audio_url, user_id=user_id, image_id=image_id) if not res.get("success"): raise errors.ServerError(msg=res.get("error") or "ASR failed") diff --git a/backend/middleware/qwen.py b/backend/middleware/qwen.py index 3688c8e..0f14e4d 100755 --- a/backend/middleware/qwen.py +++ b/backend/middleware/qwen.py @@ -317,11 +317,12 @@ class Qwen: ) @staticmethod - async def recognize_speech(file_path: str, user_id: int = 0) -> Dict[str, Any]: + async def recognize_speech(file_path: str, user_id: int = 0, image_id: int | None = None) -> Dict[str, Any]: """ 调用通义千问API识别语音内容 :param file_path: 音频文件路径 (本地路径) :param user_id: 用户ID + :param image_id: 关联的图片ID :return: 识别结果 """ api_key = _get_primary_qwen_api_key() @@ -377,7 +378,7 @@ class Qwen: status_code=status_code, error_message=None, called_at=start_at, - image_id=0, + image_id=image_id, user_id=user_id, cost=0, api_version=settings.FASTAPI_API_V1_PATH, @@ -428,7 +429,7 @@ class Qwen: duration=time.time() - start_time, status_code=status_code, error_message=error_message, - image_id=0, + image_id=image_id, user_id=user_id, called_at=start_at )