fix code

2025-12-13 11:54:59 +08:00
parent 75ffb107cd
commit 0c6485e41d
1 changed files with 84 additions and 42 deletions
--- a/backend/middleware/qwen.py
+++ b/backend/middleware/qwen.py
@@ -73,52 +73,59 @@ class Qwen:
            
            # v3
                """
-                Vision-to-English-Chinese education module (for image recognition). Follow these steps and rules strictly to generate descriptions:
-                # Step 1: Image Pre-Analysis (Model Must Do First)
-                Identify core subject: What is the main object/scene/person in the image?
-                Extract key details: Color, shape, position, action, environment, quantity (as applicable).
-                Confirm context: Is it a static scene/dynamic action/daily scene/professional scenario?
+Vision-to-English-Chinese education module. 
+Core objective: Analyze the image based on its PRIMARY SCENE (e.g., office, restaurant, subway, kitchen) and CENTRAL OBJECTS, generate English-Chinese sentence pairs for three learning levels (matching primary/intermediate/advanced English learners), with sentences focused on PRACTICAL, REUSABLE communication (not just grammatical complexity).

-                # Step 2: Three-Level Description Requirements (No Repetition Across Levels)
-                level1: Basic Introduction
-                Rules: Simple vocabulary + elementary grammar, ≤15 words per sentence.
-                Quantity: 5-7 unique English sentences + corresponding Chinese translations.
-                Focus: Only core subject (e.g., "A red cat sits on the chair" → 一只红色的猫坐在椅子上).
+// LEVEL Definition (Binding learning goals + functions + complexity)
+level1 (Beginner): 
+- Learning goal: Recognize core vocabulary + use basic functional sentences (describe objects/scenes, simple requests)
+- Vocab: High-frequency daily words (no uncommon words)
+- Grammar: Simple present/past tense, basic SV/SVO structure
+- Word count per sentence: ~10 words
+- Sentence type: 6 unique functional types (describe object, simple request, state fact, point out location, express preference, ask simple question)

-                level2: Detailed Expansion
-                Rules: Richer vocabulary + complex structures, 15-25 words per sentence.
-                Quantity: 8-10 unique English sentences + corresponding Chinese translations.
-                Focus: Add key details (color/action/environment) not mentioned in level1.
+level2 (Intermediate): 
+- Learning goal: Master scene-specific collocations + practical communication sentences (daily/office interaction)
+- Vocab: Scene-specific common words + fixed collocations (e.g., "print a document", "place an order")
+- Grammar: Present continuous, modal verbs (can/could/would), simple clauses
+- Word count per sentence: 15-20 words
+- Sentence type: 7 unique functional types (detailed description, polite request, ask for information, suggest action, state need, confirm fact, express feeling)

-                level3: Coherent Narrative
-                Rules:
-                Total words around 300, ≤30 words per sentence (strictly enforced).
-                Logical continuity: Follow "overall scene → local details → implicit context" order.
-                Use connecting words (e.g., however, besides, therefore) to link sentences.
-                Quantity: A continuous article split into independent sentences (array elements).
-                Output: English sentences + corresponding Chinese translations (no overlap with level1/2).
+level3 (Advanced): 
+- Learning goal: Use professional/scene-specific uncommon words + context-aware pragmatic expressions (with tone/formality differences)
+- Vocab: Scene-specific professional words (e.g., "barista" for café, "agenda" for meeting) + advanced collocations
+- Grammar: Complex clauses, passive voice, subjunctive mood (as appropriate to the scene)
+- Word count per sentence: ≤25 words
+- Sentence type: 8 unique functional types (detailed scene analysis, formal/informal contrast, conditional statement, explain purpose, ask follow-up questions, express suggestion, summarize information, clarify meaning)

-                # Step 3: Output Format (Strictly Follow JSON Structure)
-                {
-                "level1": {
-                "desc_en": ["sentence1 (≤10 words)", "sentence2 (≤10 words)", ...],
-                "desc_zh": ["准确对应翻译 1", "准确对应翻译 2", ...]
-                },
-                "level2": {
-                "desc_en": ["sentence1 (15-20 words)", "sentence2 (15-20 words)", ...],
-                "desc_zh": ["准确对应翻译 1", "准确对应翻译 2", ...]
-                },
-                "level3": {
-                "desc_en": ["sentence1 (≤25 words)", "sentence2 (≤25 words)", ...], //coherent article
-                "desc_zh": ["准确对应翻译 1", "准确对应翻译 2", ...] //same logic as English
-                }
-                }
-                Mandatory Rules (Model Must Obey)
-                All English sentences and Chinese translations are 100% unique across three levels.
-                LEVEL3 translations must maintain the same logical flow as English.
-                No abbreviations, slang, or untranslated words; use standard English/Chinese.
-                If image details are unclear, describe observable elements (avoid speculation).
-                Strictly adhere to word/sentence quantity limits (no under/over).
+// Output Requirements
+1. JSON Structure (add core vocab/collocation for easy parsing):
+{
+  "scene_tag": "xxx", // e.g., "office", "café", "supermarket" (primary scene of the image)
+  "level1": {
+    "desc_en": ["sentence1", "sentence2", ...], // 6 unique sentences (no repeated semantics/functions)
+    "desc_zh": ["translation1", "translation2", ...], // one-to-one with desc_en
+    "core_vocab": ["word1", "word2", ...], // 3-5 core high-frequency words from LEVEL1 sentences
+    "collocations": ["word1 + collocation1", ...] // 2-3 fixed collocations (e.g., "sit + on a chair")
+  },
+  "level2": {
+    "desc_en": ["sentence1", ...], // 7 unique sentences
+    "desc_zh": ["translation1", ...],
+    "core_vocab": ["word1", ...], // 4-6 scene-specific words
+    "collocations": ["collocation1", ...] // 3-4 scene-specific collocations
+  },
+  "level3": {
+    "desc_en": ["sentence1", ...], // 8 unique sentences
+    "desc_zh": ["translation1", ...],
+    "core_vocab": ["word1", ...], // 5-7 professional/uncommon words
+    "collocations": ["collocation1", ...], // 4-5 advanced collocations
+    "pragmatic_notes": ["note1", ...] // 1-2 notes on tone/formality (e.g., "Could you..." is politer than "Can you...")
+  }
+}
+2. Uniqueness: No repetition in SEMANTICS/FUNCTIONS (not just literal repetition) — e.g., avoid two sentences both meaning "This is a laptop" (even with different wording).
+3. Focus: Prioritize ARTIFICIAL/CENTRAL objects and PRIMARY scene (ignore trivial background elements) — e.g., for a café image, focus on "coffee", "barista", "menu" (not "wall", "floor").
+4. Practicality: All sentences must be directly usable in real-life communication (avoid meaningless grammatical exercises like "I am eat a apple" corrected to "I am eating an apple").
+5. Accuracy: Translations must be accurate (not literal) and match the context of the image scene.
                """
            )

@@ -141,6 +148,41 @@ class Qwen:
                "[{ dish_name: 具体菜品名1 | 无法识别出菜品, method: 烹饪方式, main_ingredients: [食材1, 食材2] },\n"
                "{ dish_name: 具体菜品名2 | 无法识别出菜品, method: 烹饪方式, main_ingredients: [食材1, 食材2] }]"
            )
+        elif type == 'scene':
+            return (
+                """
+                # 角色
+你是专注于英语教育的轻量级场景化句型分析助手，仅输出JSON格式结果，无多余解释/话术。
+
+# 输入信息
+场景标签：scene_tag
+英文句型：sentence_en
+中文翻译：sentence_zh
+
+# 输出要求
+1. 功能标签：生成2个标签（主标签+子标签），主标签仅限「询问/请求/陈述/表达需求/建议/确认/表达感受/指出位置」，子标签需贴合场景和句型核心功能（如“索要物品”“点餐”“职场沟通”）；
+2. 场景说明：50-80字，简洁说明该句型的使用场景、语用价值（如礼貌性/适配对象），语言通俗，适配英语进阶学习者；
+3. 输出格式：严格遵循以下JSON结构，无换行/多余字符：
+{
+  "functionTags": ["主标签", "子标签"],
+  "sceneExplanation": "场景说明文本"
+}
+
+# 约束
+- 功能标签必须贴合「场景标签」+「句型内容」，不脱离场景；
+- 场景说明不堆砌术语，聚焦“怎么用/什么时候用”，而非语法分析；
+- 严格控制字符数，功能标签仅2个，场景说明50-80字。
+
+# 示例参考
+【输入】
+场景标签：café
+英文句型：Can I have a look at the menu?
+中文翻译：我能看一下菜单吗？
+【输出】
+{"functionTags":["询问","索要物品"],"sceneExplanation":"该句型适用于咖啡厅/餐厅场景，向服务人员礼貌索要菜单，比直接说“Give me the menu”更得体，适配所有餐饮消费场景的基础沟通。"}
+                """
+            )
+        
        else:
            return ""