From 73889524f23b302f78c9111c8531c38ff83114f0 Mon Sep 17 00:00:00 2001
From: Felix <Felix>
Date: Thu, 4 Dec 2025 10:28:17 +0800
Subject: [PATCH] fix audio

---
 backend/app/ai/api/image_text.py             |  3 +-
 backend/app/ai/service/image_text_service.py | 26 +-----------
 backend/app/ai/service/recording_service.py  | 44 +++++++++++++-------
 3 files changed, 32 insertions(+), 41 deletions(-)
diff --git a/backend/app/ai/api/image_text.py b/backend/app/ai/api/image_text.py
index f923e28..55e0fcd 100644
--- a/backend/app/ai/api/image_text.py
+++ b/backend/app/ai/api/image_text.py
@@ -37,6 +37,7 @@ async def init_image_texts(
 
 @router.get("/standard/{text_id}", summary="获取标准音频文件ID", dependencies=[DependsJwtAuth])
 async def get_standard_audio_file_id(
+        request: Request,
         text_id: int,
 ) -> ResponseSchemaModel[dict]:
     """
@@ -48,7 +49,7 @@ async def get_standard_audio_file_id(
     返回:
     - 标准音频的文件ID
     """
-    file_id = await recording_service.get_standard_audio_file_id_by_text_id(text_id)
+    file_id = await recording_service.get_standard_audio_file_id_by_text_id(text_id, request.user.id)
     if not file_id:
         raise errors.NotFoundError(msg="标准音频不存在或创建超时")
     return response_base.success(data={'audio_id': str(file_id)})
diff --git a/backend/app/ai/service/image_text_service.py b/backend/app/ai/service/image_text_service.py
index 3a7bfe3..579c531 100644
--- a/backend/app/ai/service/image_text_service.py
+++ b/backend/app/ai/service/image_text_service.py
@@ -247,18 +247,7 @@ class ImageTextService:
                 if text.id is None:  # 只刷新新创建的记录
                     await db.refresh(text)
 
-            # 为新创建的文本记录生成标准发音音频（使用后台任务）
-            if newly_created_texts:
-                from backend.middleware.tencent_cloud import TencentCloud
-                tencent_cloud = TencentCloud()
-                for text in newly_created_texts:
-                    # 添加后台任务来生成标准发音音频
-                    await tencent_cloud.text_to_speak(
-                        image_id=text.image_id,
-                        content=text.content,
-                        image_text_id=text.id,
-                        user_id=user_id
-                    )
+            # 移除异步标准音频生成，改为按需生成（接口调用时检查并生成）
 
             return True
 
@@ -371,18 +360,7 @@ class ImageTextService:
                 if text.id is None:  # 只刷新新创建的记录
                     await db.refresh(text)
 
-            # 为新创建的文本记录生成标准发音音频（使用后台任务）
-            if newly_created_texts:
-                from backend.middleware.tencent_cloud import TencentCloud
-                tencent_cloud = TencentCloud()
-                for text in newly_created_texts:
-                    # 添加后台任务来生成标准发音音频
-                    await tencent_cloud.text_to_speak(
-                        image_id=text.image_id,
-                        content=text.content,
-                        image_text_id=text.id,
-                        user_id=user_id
-                    )
+            # 移除异步标准音频生成，改为按需生成（接口调用时检查并生成）
 
             return True
         except Exception as e:
diff --git a/backend/app/ai/service/recording_service.py b/backend/app/ai/service/recording_service.py
index 987a311..b442efa 100644
--- a/backend/app/ai/service/recording_service.py
+++ b/backend/app/ai/service/recording_service.py
@@ -56,21 +56,33 @@ class RecordingService:
             return list(result.scalars().all())
 
     @staticmethod
-    async def get_standard_audio_file_id_by_text_id(text_id: int, max_wait_time: int = 30, retry_interval: int = 2) -> Optional[int]:
-        """根据文本ID获取标准音频的文件ID，支持等待机制"""
-        import asyncio
-        import time
-        
-        start_time = time.time()
-        while time.time() - start_time < max_wait_time:
-            async with async_db_session() as db:
-                recording = await recording_dao.get_standard_by_text_id(db, text_id)
-                if recording:
-                    return recording.file_id
-            # 等待指定的时间间隔再重试
-            await asyncio.sleep(retry_interval)
-        
-        # 超时后仍然没有找到，返回None
+    async def get_standard_audio_file_id_by_text_id(text_id: int, user_id: int, max_wait_time: int = 30, retry_interval: int = 2) -> Optional[int]:
+        """按需获取标准音频文件ID：若不存在则生成后返回。"""
+        async with async_db_session() as db:
+            recording = await recording_dao.get_standard_by_text_id(db, text_id)
+            if recording:
+                return recording.file_id
+        # 未找到则按需生成
+        image_text = await image_text_service.get_text_by_id(text_id)
+        if not image_text:
+            return None
+        try:
+            from backend.middleware.tencent_cloud import TencentCloud
+            tts = TencentCloud()
+            await tts.text_to_speak(
+                content=image_text.content,
+                image_text_id=text_id,
+                image_id=image_text.image_id,
+                user_id=user_id
+            )
+        except Exception as e:
+            logger.error(f"On-demand TTS generation failed for text_id={text_id}: {e}")
+            return None
+        # 生成后重新查询
+        async with async_db_session() as db:
+            recording = await recording_dao.get_standard_by_text_id(db, text_id)
+            if recording:
+                return recording.file_id
         return None
 
     @staticmethod
@@ -483,4 +495,4 @@ class RecordingService:
             return None
 
 
-recording_service = RecordingService()
\ No newline at end of file
+recording_service = RecordingService()