add conversation

This commit is contained in:
Felix
2026-01-20 20:41:23 +08:00
parent 3728ed54d1
commit 7ade571e13
8 changed files with 935 additions and 12 deletions

View File

@@ -13,6 +13,12 @@ from backend.app.ai.schema.qa import (
QuestionLatestResultResponse,
ImageConversationInitRequest,
ImageConversationInitResponse,
ConversationStartRequest,
ConversationStartResponse,
ConversationSessionSchema,
ConversationReplyRequest,
ConversationReplyResponse,
ConversationLatestResponse,
)
from backend.common.response.response_schema import response_base, ResponseSchemaModel
from backend.common.security.jwt import DependsJwtAuth
@@ -40,6 +46,46 @@ async def get_conversation_setting(request: Request, obj: ImageConversationInitR
return response_base.success(data=data)
@router.post('/conversations/start', summary='启动图片自由对话', dependencies=[DependsJwtAuth])
async def start_conversation(request: Request, obj: ConversationStartRequest) -> ResponseSchemaModel[ConversationStartResponse]:
res = await qa_service.start_conversation(
image_id=obj.image_id,
user_id=request.user.id,
scene=obj.scene,
event=obj.event,
style=obj.style,
user_role=obj.user_role,
assistant_role=obj.assistant_role,
level=obj.level,
info=obj.info,
)
data = ConversationStartResponse(**res)
return response_base.success(data=data)
@router.post('/conversations/{session_id}/reply', summary='回复图片自由对话', dependencies=[DependsJwtAuth])
async def reply_conversation(request: Request, session_id: int, obj: ConversationReplyRequest) -> ResponseSchemaModel[ConversationReplyResponse]:
res = await qa_service.reply_conversation(
session_id=session_id,
user_id=request.user.id,
input_text=obj.content,
)
return response_base.success(data=ConversationReplyResponse(**res))
@router.get('/conversations/{session_id}/latest', summary='获取图片自由对话最新消息', dependencies=[DependsJwtAuth])
async def get_conversation_latest(request: Request, session_id: int) -> ResponseSchemaModel[ConversationLatestResponse]:
res = await qa_service.get_latest_messages(session_id=session_id, user_id=request.user.id)
return response_base.success(data=ConversationLatestResponse(**res))
@router.get('/conversations/{session_id}', summary='获取图片自由对话会话信息', dependencies=[DependsJwtAuth])
async def get_conversation_session(request: Request, session_id: int) -> ResponseSchemaModel[ConversationSessionSchema]:
res = await qa_service.get_conversation_session(session_id=session_id, user_id=request.user.id)
data = ConversationSessionSchema(**res)
return response_base.success(data=data)
@router.get('/exercises/tasks/{task_id}/status', summary='查询练习任务状态', dependencies=[DependsJwtAuth])
async def get_exercise_task_status(task_id: int) -> ResponseSchemaModel[TaskStatusResponse]:
res = await qa_service.get_task_status(task_id)

View File

@@ -41,10 +41,15 @@ class QaQuestionCRUD(CRUDPlus[QaQuestion]):
return inst
async def get_by_exercise_id(self, db: AsyncSession, exercise_id: int) -> List[QaQuestion]:
stmt = select(self.model).where(self.model.exercise_id == exercise_id)
stmt = select(self.model).where(self.model.exercise_id == exercise_id).order_by(self.model.id)
result = await db.execute(stmt)
return list(result.scalars().all())
async def get_latest_by_exercise_id(self, db: AsyncSession, exercise_id: int) -> Optional[QaQuestion]:
stmt = select(self.model).where(self.model.exercise_id == exercise_id).order_by(self.model.id.desc()).limit(1)
result = await db.execute(stmt)
return result.scalars().first()
class QaQuestionAttemptCRUD(CRUDPlus[QaQuestionAttempt]):
async def get(self, db: AsyncSession, id: int) -> Optional[QaQuestionAttempt]:

View File

@@ -47,7 +47,7 @@ class QaPracticeSession(Base):
id: Mapped[snowflake_id_key] = mapped_column(BigInteger, init=False, primary_key=True)
exercise_id: Mapped[int] = mapped_column(BigInteger, ForeignKey('qa_exercise.id'), nullable=False)
starter_user_id: Mapped[int] = mapped_column(BigInteger, ForeignKey('wx_user.id'), nullable=False)
share_id: Mapped[Optional[int]] = mapped_column(BigInteger, nullable=True)
share_id: Mapped[Optional[int]] = mapped_column(BigInteger, nullable=True, default=None)
status: Mapped[str] = mapped_column(String(20), default='ongoing')
started_at: Mapped[Optional[DateTime]] = mapped_column(DateTime, default=None)
completed_at: Mapped[Optional[DateTime]] = mapped_column(DateTime, default=None)
@@ -67,8 +67,8 @@ class QaQuestionAttempt(Base):
question_id: Mapped[int] = mapped_column(BigInteger, ForeignKey('qa_question.id'), nullable=False)
exercise_id: Mapped[int] = mapped_column(BigInteger, ForeignKey('qa_exercise.id'), nullable=False)
user_id: Mapped[int] = mapped_column(BigInteger, ForeignKey('wx_user.id'), nullable=False)
task_id: Mapped[Optional[int]] = mapped_column(BigInteger, ForeignKey('image_processing_task.id'), nullable=True)
recording_id: Mapped[Optional[int]] = mapped_column(BigInteger, ForeignKey('recording.id'), nullable=True)
task_id: Mapped[Optional[int]] = mapped_column(BigInteger, ForeignKey('image_processing_task.id'), nullable=True, default=None)
recording_id: Mapped[Optional[int]] = mapped_column(BigInteger, ForeignKey('recording.id'), nullable=True, default=None)
choice_options: Mapped[Optional[list]] = mapped_column(MySQLJSON, default=None)
cloze_options: Mapped[Optional[str]] = mapped_column(String(100), default=None)
input_text: Mapped[Optional[str]] = mapped_column(Text, default=None)

View File

@@ -141,10 +141,13 @@ class ImageConversationInitRequest(SchemaBase):
class ImageConversationEventSchema(SchemaBase):
event_en: str
event_zh: str
conversation_direction_en: str
conversation_direction_zh: str
event_en: Optional[str] = None
event_zh: Optional[str] = None
conversation_direction_en: Optional[str] = None
conversation_direction_zh: Optional[str] = None
style_en: Optional[str] = None
style_zh: Optional[str] = None
suggested_roles: Optional[List[Dict[str, str]]] = []
class ImageConversationObjectSchema(SchemaBase):
@@ -169,6 +172,73 @@ class ImageConversationInitResponse(SchemaBase):
latest_session: Optional[Dict[str, Any]] = None
class ConversationStartRequest(SchemaBase):
image_id: int
scene: List[str]
event: List[str]
style: Optional[str] = None
user_role: Optional[str] = None
assistant_role: Optional[str] = None
level: Optional[str] = None
info: Optional[str] = None
class ConversationAlternativeItemSchema(SchemaBase):
alt_en: Optional[str] = None
alt_zh: Optional[str] = None
class ConversationAlternativeResponsesSchema(SchemaBase):
positive: Optional[ConversationAlternativeItemSchema] = None
neutral: Optional[ConversationAlternativeItemSchema] = None
negative: Optional[ConversationAlternativeItemSchema] = None
class FreeConversationContentSchema(SchemaBase):
response_en: Optional[str] = None
response_zh: Optional[str] = None
prompt_en: Optional[str] = None
prompt_zh: Optional[str] = None
alternative_responses: Optional[ConversationAlternativeResponsesSchema] = None
correction: Optional[str] = None
text: Optional[str] = None
class ConversationMessageSchema(SchemaBase):
role: str
content: FreeConversationContentSchema
class ConversationStartResponse(SchemaBase):
task_id: str
status: str
exercise_id: Optional[str] = None
class ConversationReplyRequest(SchemaBase):
content: str
audio_id: Optional[str] = None
class ConversationReplyResponse(SchemaBase):
task_id: str
status: str
session_id: Optional[str] = None
class ConversationLatestResponse(SchemaBase):
session_id: str
messages: List[ConversationMessageSchema]
class ConversationSessionSchema(SchemaBase):
exercise_id: str
session_id: str
status: str
updated_at: Optional[str] = None
messages: List[ConversationMessageSchema] = []
CreateAttemptTaskResponse.model_rebuild()
AttemptResultResponse.model_rebuild()
QuestionEvaluationResponse.model_rebuild()

View File

@@ -31,10 +31,11 @@ from backend.common.const import EXERCISE_TYPE_CHOICE, EXERCISE_TYPE_CLOZE, EXER
from backend.app.admin.schema.wx import DictLevel
from backend.app.ai.service.image_task_service import TaskProcessor, image_task_service
from backend.app.ai.model.image_task import ImageProcessingTask
from backend.app.ai.model.qa import QaQuestion
from backend.app.ai.model.qa import QaQuestion, QaPracticeSession
from backend.core.prompts.qa_exercise import get_qa_exercise_prompt
from backend.core.prompts.recognition import get_conversation_prompt_for_image_dialogue
from backend.core.prompts.free_conversation import get_free_conversation_start_prompt, get_free_conversation_reply_prompt
from backend.app.ai.tools.qa_tool import SceneVariationGenerator, Illustrator
class QaExerciseProcessor(TaskProcessor):
@@ -222,6 +223,7 @@ class ConversationInitProcessor(TaskProcessor):
result = {"image_analysis": image_analysis, "token_usage": token_usage}
return result, token_usage
async def _call_llm_chat(self, prompt: str, image_id: int, user_id: int, chat_type: str) -> Dict[str, Any]:
messages = [
SystemMessage(content="You are a helpful assistant."),
@@ -258,6 +260,321 @@ class ConversationInitProcessor(TaskProcessor):
except Exception as e:
return {"success": False, "error": str(e)}
class ConversationStartProcessor(TaskProcessor):
async def process(self, db: AsyncSession, task: ImageProcessingTask) -> Tuple[Dict[str, Any], Dict[str, Any]]:
# task.ref_id is exercise_id
exercise_id = task.ref_id
exercise = await qa_exercise_dao.get(db, exercise_id)
if not exercise:
raise errors.NotFoundError(msg="Exercise not found")
image = await image_dao.get(db, exercise.image_id)
if not image:
raise errors.NotFoundError(msg="Image not found")
# Parse recognition result for description
rr = (image.details or {}).get('recognition_result') or {}
description = ''
try:
d = rr.get('description')
if isinstance(d, str):
description = d
elif isinstance(d, list) and d:
description = d[0] if isinstance(d[0], str) else ''
except Exception:
description = ''
params = exercise.ext or {}
prompt = get_free_conversation_start_prompt(
scene=params.get('scene'),
event=params.get('event'),
user_role=params.get('user_role'),
assistant_role=params.get('assistant_role'),
style=params.get('style'),
level=params.get('level'),
info=params.get('info'),
description=description,
)
res = await self._call_llm_chat(prompt=prompt, image_id=image.id, user_id=task.user_id, chat_type='conversation_start')
if not res.get('success'):
raise Exception(res.get('error') or "LLM call failed")
token_usage = res.get('token_usage') or {}
try:
parsed = json.loads(res.get('result')) if isinstance(res.get('result'), str) else res.get('result')
except Exception:
parsed = {}
if not parsed or not isinstance(parsed, dict):
raise Exception("Invalid LLM response format")
# Update Exercise Status
exercise.status = 'published'
exercise.question_count = 1
# Get or Create Session
session = await qa_session_dao.get_latest_by_user_exercise(db, task.user_id, exercise.id)
if session:
session.status = 'ongoing'
prog = dict(session.progress or {})
prog['total_questions'] = 1
session.progress = prog
else:
prog = {'current_index': 0, 'answered': 0, 'correct': 0, 'attempts': [], 'total_questions': 1}
session = await qa_session_dao.create(db, {
'exercise_id': exercise.id,
'starter_user_id': task.user_id,
'status': 'ongoing',
'started_at': datetime.now(),
'progress': prog,
'ext': None,
})
# Create First Question (AI Message)
question_content = parsed.get('response_en') or ''
question_ext = {
'role': 'assistant',
'response_zh': parsed.get('response_zh'),
'prompt_en': parsed.get('prompt_en'),
'prompt_zh': parsed.get('prompt_zh'),
'alternative_responses': parsed.get('alternative_responses'),
'correction': parsed.get('correction'),
}
await qa_question_dao.create(db, {
'exercise_id': exercise.id,
'image_id': image.id,
'question': question_content,
'user_id': task.user_id,
'payload': None,
'ext': question_ext,
})
await db.flush()
result = {
'exercise_id': str(exercise.id),
'session_id': str(session.id),
'token_usage': token_usage
}
return result, token_usage
async def _call_llm_chat(self, prompt: str, image_id: int, user_id: int, chat_type: str) -> Dict[str, Any]:
messages = [
SystemMessage(content="You are a helpful assistant."),
HumanMessage(content=prompt)
]
metadata = {
"image_id": image_id,
"user_id": user_id,
"api_type": chat_type,
"model_name": settings.LLM_MODEL_TYPE
}
try:
llm = LLMFactory.create_llm(settings.LLM_MODEL_TYPE)
res = await llm.ainvoke(
messages,
config={"callbacks": [AuditLogCallbackHandler(metadata=metadata)]}
)
content = res.content
if not isinstance(content, str):
content = str(content)
token_usage = {}
if res.response_metadata:
token_usage = res.response_metadata.get("token_usage") or res.response_metadata.get("usage") or {}
return {"success": True, "result": content, "token_usage": token_usage}
except Exception as e:
return {"success": False, "error": str(e)}
class ConversationReplyProcessor(TaskProcessor):
async def process(self, db: AsyncSession, task: ImageProcessingTask) -> Tuple[Dict[str, Any], Dict[str, Any]]:
# task.ref_id is attempt_id
attempt_id = task.ref_id
attempt = await qa_attempt_dao.get(db, attempt_id)
if not attempt:
raise errors.NotFoundError(msg="Attempt not found")
exercise = await qa_exercise_dao.get(db, attempt.exercise_id)
if not exercise:
raise errors.NotFoundError(msg="Exercise not found")
image = await image_dao.get(db, exercise.image_id)
# Get Session (to update timestamp/progress)
session = await qa_session_dao.get_latest_by_user_exercise(db, task.user_id, exercise.id)
if not session:
# Should not happen in normal flow, but maybe session closed?
# Or creating a new session?
# We assume session exists.
pass
# Parse recognition result for description
rr = (image.details or {}).get('recognition_result') or {}
description = ''
try:
d = rr.get('description')
if isinstance(d, str):
description = d
elif isinstance(d, list) and d:
description = d[0] if isinstance(d[0], str) else ''
except Exception:
description = ''
# Get history
questions = await qa_question_dao.get_by_exercise_id(db, exercise.id)
history = []
# We need to build history excluding the current attempt (which is linked to the LAST question)
# The current attempt is for the LAST question in `questions`.
# So we iterate up to the second to last question for pairs,
# and then handle the last question.
# Actually, `questions` includes ALL questions.
# The attempt we just created is for `questions[-1]`.
# So `questions[-1]` is the AI message we are replying to.
# We need to construct history for the prompt.
# The prompt expects:
# History:
# AI: ...
# User: ...
# AI: ...
# (Current AI message is NOT in history of prompt?
# Wait, prompt says "History" then "User's New Input".
# If I am replying to AI's "Hello", "Hello" should be in history?
# Usually yes.
# The `get_free_conversation_reply_prompt` implementation:
# history_str += f"{role}: {content}\n"
# User's New Input is appended.
# So `history` should contain everything BEFORE User's New Input.
# That includes the AI message user is replying to.
for i, q in enumerate(questions):
# AI Message
history.append({
'role': 'assistant',
'content': q.question
})
# If this is the last question, it's the one we are replying to.
# We don't add a user attempt for it in `history` list,
# because the "User's New Input" IS the attempt.
if i == len(questions) - 1:
break
# For previous questions, find their attempt
prev_attempt = await qa_attempt_dao.get_latest_completed_by_user_question(db, task.user_id, q.id)
if prev_attempt:
history.append({
'role': 'user',
'content': prev_attempt.input_text
})
user_input = attempt.input_text or ''
params = exercise.ext or {}
prompt = get_free_conversation_reply_prompt(
history=history,
user_input=user_input,
scene=params.get('scene'),
event=params.get('event'),
user_role=params.get('user_role'),
assistant_role=params.get('assistant_role'),
style=params.get('style'),
level=params.get('level'),
info=params.get('info'),
description=description,
)
res = await self._call_llm_chat(prompt=prompt, image_id=image.id, user_id=task.user_id, chat_type='conversation_reply')
if not res.get('success'):
raise Exception(res.get('error') or "LLM call failed")
token_usage = res.get('token_usage') or {}
try:
parsed = json.loads(res.get('result')) if isinstance(res.get('result'), str) else res.get('result')
except Exception:
parsed = {}
if not parsed or not isinstance(parsed, dict):
raise Exception("Invalid LLM response format")
# Update attempt with correction
correction = parsed.get('correction')
if correction:
new_ext = dict(attempt.ext or {})
new_ext['correction'] = correction
attempt.ext = new_ext
attempt.evaluation = {'correction': correction}
attempt.status = 'completed' # It was pending
# Create New Question (AI Response)
question_content = parsed.get('response_en') or ''
question_ext = {
'role': 'assistant',
'response_zh': parsed.get('response_zh'),
'prompt_en': parsed.get('prompt_en'),
'prompt_zh': parsed.get('prompt_zh'),
'alternative_responses': parsed.get('alternative_responses'),
'correction': correction,
}
new_question = await qa_question_dao.create(db, {
'exercise_id': exercise.id,
'image_id': image.id,
'question': question_content,
'user_id': task.user_id,
'payload': None,
'ext': question_ext,
})
# Update Session
if session:
session.updated_at = datetime.now()
prog = dict(session.progress or {})
prog['total_questions'] = (prog.get('total_questions') or 0) + 1
session.progress = prog
await db.flush()
result = {
'session_id': str(session.id) if session else '',
'new_question_id': str(new_question.id),
'token_usage': token_usage
}
return result, token_usage
async def _call_llm_chat(self, prompt: str, image_id: int, user_id: int, chat_type: str) -> Dict[str, Any]:
messages = [
SystemMessage(content="You are a helpful assistant."),
HumanMessage(content=prompt)
]
metadata = {
"image_id": image_id,
"user_id": user_id,
"api_type": chat_type,
"model_name": settings.LLM_MODEL_TYPE
}
try:
llm = LLMFactory.create_llm(settings.LLM_MODEL_TYPE)
res = await llm.ainvoke(
messages,
config={"callbacks": [AuditLogCallbackHandler(metadata=metadata)]}
)
content = res.content
if not isinstance(content, str):
content = str(content)
token_usage = {}
if res.response_metadata:
token_usage = res.response_metadata.get("token_usage") or res.response_metadata.get("usage") or {}
return {"success": True, "result": content, "token_usage": token_usage}
except Exception as e:
return {"success": False, "error": str(e)}
class QaService:
async def get_conversation_setting(self, image_id: int, user_id: int) -> Optional[Dict[str, Any]]:
async with async_db_session() as db:
@@ -290,6 +607,223 @@ class QaService:
"latest_session": latest_session_info,
}
async def start_conversation(
self,
image_id: int,
user_id: int,
scene: List[str],
event: List[str],
style: Optional[str] = None,
user_role: Optional[str] = None,
assistant_role: Optional[str] = None,
level: Optional[str] = None,
info: Optional[str] = None,
) -> Dict[str, Any]:
# Check points and rate limit
if not await points_service.check_sufficient_points(user_id, LLM_CHAT_COST):
raise errors.ForbiddenError(msg='积分不足,请获取积分后继续使用')
slot_acquired = await rate_limit_service.acquire_task_slot(user_id)
if not slot_acquired:
max_tasks = await rate_limit_service.get_user_task_limit(user_id)
raise errors.ForbiddenError(msg=f'用户同时最多只能运行 {max_tasks} 个任务,请等待现有任务完成后再试')
async with async_db_session.begin() as db:
image = await image_dao.get(db, image_id)
if not image:
raise errors.NotFoundError(msg="Image not found")
# Create Exercise
exercise = await qa_exercise_dao.create(db, {
"image_id": image_id,
"created_by": user_id,
"type": "free_conversation",
"description": None,
"status": "ongoing",
"ext": {
"scene": scene,
"event": event,
"user_role": user_role,
"assistant_role": assistant_role,
"style": style,
"level": level,
"info": info,
},
})
await db.flush()
# Create Session (Pre-create to return session_id immediately)
prog = {'current_index': 0, 'answered': 0, 'correct': 0, 'attempts': [], 'total_questions': 0}
session = await qa_session_dao.create(db, {
'exercise_id': exercise.id,
'starter_user_id': user_id,
'status': 'initializing',
'started_at': datetime.now(),
'progress': prog,
'ext': None,
})
await db.flush()
# Create Task
task = await image_task_dao.create_task(db, CreateImageTaskParam(
image_id=image_id,
user_id=user_id,
dict_level=(getattr(getattr(image, 'dict_level', None), 'name', None) or 'LEVEL1'),
ref_type='qa_exercise',
ref_id=exercise.id,
status=ImageTaskStatus.PENDING,
))
await db.flush()
task_id = task.id
# Dispatch Task
asyncio.create_task(image_task_service.process_task(task_id, user_id, ConversationStartProcessor()))
return {
"task_id": str(task_id),
"status": "processing",
"exercise_id": str(exercise.id),
"session_id": str(session.id)
}
async def reply_conversation(
self,
session_id: int,
user_id: int,
input_text: str,
) -> Dict[str, Any]:
# Check points and rate limit
if not await points_service.check_sufficient_points(user_id, LLM_CHAT_COST):
raise errors.ForbiddenError(msg='积分不足,请获取积分后继续使用')
slot_acquired = await rate_limit_service.acquire_task_slot(user_id)
if not slot_acquired:
max_tasks = await rate_limit_service.get_user_task_limit(user_id)
raise errors.ForbiddenError(msg=f'用户同时最多只能运行 {max_tasks} 个任务,请等待现有任务完成后再试')
async with async_db_session.begin() as db:
session = await qa_session_dao.get(db, session_id)
if not session:
raise errors.NotFoundError(msg="Session not found")
if session.starter_user_id != user_id:
raise errors.ForbiddenError(msg="Forbidden")
exercise = await qa_exercise_dao.get(db, session.exercise_id)
# Create Attempt (User Input)
# Link to the last question (the one AI asked)
last_question = await qa_question_dao.get_latest_by_exercise_id(db, exercise.id)
if not last_question:
raise errors.ServerError(msg="No question to reply to")
attempt = await qa_attempt_dao.create(db, {
"user_id": user_id,
"question_id": last_question.id,
"exercise_id": exercise.id,
"input_text": input_text,
"status": "pending",
"evaluation": None,
"ext": None
})
await db.flush()
# Create Task
task = await image_task_dao.create_task(db, CreateImageTaskParam(
image_id=exercise.image_id,
user_id=user_id,
dict_level='LEVEL1', # Default or fetch from image
ref_type='qa_attempt',
ref_id=attempt.id,
status=ImageTaskStatus.PENDING,
))
await db.flush()
task_id = task.id
asyncio.create_task(image_task_service.process_task(task_id, user_id, ConversationReplyProcessor()))
return {
"task_id": str(task_id),
"status": "processing",
"session_id": str(session.id)
}
async def _get_messages_for_session(self, db: AsyncSession, exercise_id: int, user_id: int) -> List[Dict[str, Any]]:
questions = await qa_question_dao.get_by_exercise_id(db, exercise_id)
messages = []
for q in questions:
# AI Message
ext = q.ext or {}
messages.append({
"role": "assistant",
"content": {
"response_en": q.question,
"response_zh": ext.get("response_zh"),
"prompt_en": ext.get("prompt_en"),
"prompt_zh": ext.get("prompt_zh"),
"alternative_responses": ext.get("alternative_responses"),
"correction": ext.get("correction"),
}
})
# User Reply (Attempt)
attempt = await qa_attempt_dao.get_latest_completed_by_user_question(db, user_id, q.id)
if attempt:
messages.append({
"role": "user",
"content": {
"text": attempt.input_text,
"correction": (attempt.evaluation or {}).get("correction")
}
})
return messages
async def get_latest_messages(self, session_id: int, user_id: int) -> Dict[str, Any]:
async with async_db_session() as db:
session = await qa_session_dao.get(db, session_id)
if not session or session.starter_user_id != user_id:
raise errors.NotFoundError(msg="Session not found")
# Optimization: Directly fetch the latest question from DB to avoid loading full history
latest_q = await qa_question_dao.get_latest_by_exercise_id(db, session.exercise_id)
latest_messages = []
if latest_q:
ext = latest_q.ext or {}
latest_messages.append({
"role": "assistant",
"content": {
"response_en": latest_q.question,
"response_zh": ext.get("response_zh"),
"prompt_en": ext.get("prompt_en"),
"prompt_zh": ext.get("prompt_zh"),
"alternative_responses": ext.get("alternative_responses"),
"correction": ext.get("correction"),
}
})
return {
"session_id": str(session_id),
"messages": latest_messages
}
async def get_conversation_session(self, session_id: int, user_id: int) -> Dict[str, Any]:
async with async_db_session() as db:
session = await qa_session_dao.get(db, session_id)
if not session or session.starter_user_id != user_id:
raise errors.NotFoundError(msg="Session not found")
exercise = await qa_exercise_dao.get(db, session.exercise_id)
messages = await self._get_messages_for_session(db, session.exercise_id, user_id)
return {
"exercise_id": str(exercise.id),
"session_id": str(session.id),
"status": session.status,
"updated_at": (exercise.updated_time.isoformat() if getattr(exercise, "updated_time", None) else None),
"messages": messages,
}
async def create_exercise_task(self, image_id: int, user_id: int, type: Optional[str] = "scene_basic") -> Dict[str, Any]:
is_conversation_init = type == 'init_conversion'

View File

@@ -0,0 +1,257 @@
from typing import List, Optional
def get_free_conversation_start_prompt(
scene: List[str],
event: List[str],
user_role: Optional[str],
assistant_role: Optional[str],
style: Optional[str],
level: Optional[str],
info: Optional[str],
description: str,
) -> str:
scene_str = ", ".join(scene) if scene else ""
event_str = ", ".join(event) if event else ""
info_str = info or ""
conversation_style = style or ""
user_role_str = user_role or ""
assistant_role_str = assistant_role or ""
level_str = level or "easy"
level_prompt = """
— Basic Communication, Short & Simple (Like a 5-year-old speaking)
- **Vocabulary**: Only high-frequency daily words (≤2-syllable words, e.g., food, drink, table, happy; avoid rare words like "delicious" → use "good", "tasty" max)
- **Grammar**: Limited to 3 basic structures:
1. Simple present tense (I like this.)
2. Present continuous tense (The food is hot.)
3. Basic modal verbs (can/will, e.g., I can eat.)
- **Sentence Length**: ≤10 words per sentence; 1-2 sentences per response (no complex clauses)
- **Expression Goal**: Only complete basic communication (greet, ask simple questions, state likes/dislikes)
- **Style Adaptation**: Even formal style stays simple (e.g., formal → "Can you help me?" instead of "Would you mind assisting me?")
"""
if level_str.lower() == "medium":
level_prompt = """
— Detailed Discussion + Emotional Expression
- **Vocabulary**: High-frequency words + scene-specific collocations (e.g., restaurant → "menu, order, signature dish"; meeting → "task, deadline, suggestion")
- **Grammar**: Basic structures + limited complex ones:
1. Present perfect tense (I have tried this before.)
2. Simple conditional sentences (If we order steak, it will be good.)
3. Coordinate clauses (and/but/or, e.g., The food is good but expensive.)
- **Sentence Length**: ≤15 words per sentence; 1-2 sentences per response
- **Expression Goal**: Add details (e.g., "The steak is hot and juicy") + emotional words (e.g., happy, excited, worried, tired)
- **Style Adaptation**: Match style with emotion (e.g., casual → "This is awesome!"; formal → "I am pleased with this plan.")
"""
elif level_str.lower() == "hard":
level_prompt = """
Daily Communication + Communication Skills + Extended Expression
- **Vocabulary**: Daily words + collocations + advanced synonyms/extended phrases (e.g., "good""delicious, flavorful, mouth-watering"; "ask""inquire about, seek advice on")
- **Grammar**: Full range of structures + sophisticated usage:
1. Complex conditional sentences (If we had started earlier, we could have finished on time.)
2. Inversion (Rarely do we see such a great plan.)
3. Attributive clauses (The restaurant that we visited yesterday is great.)
- **Sentence Length**: ≤20 words per sentence; 1-2 sentences per response (concise but rich)
- **Expression Goal**:
1. Basic communication + details + emotion
2. Add **communication skills**: euphemism (e.g., "Im not sure if this works" instead of "This is bad"), topic guidance (e.g., "Speaking of which, what do you think about..."), persuasion (e.g., "Considering the deadline, we should prioritize this task")
3. Extended expression: paraphrase, cultural references (e.g., "This steak is as good as the one in New York")
- **Style Adaptation**: Style drives skill usage (e.g., professional → use logical persuasion; friendly → use casual euphemism)
"""
base = f"""
You are a professional English conversation partner for intermediate English learners. Follow the rules below to conduct natural, targeted multi-round conversations and output structured JSON.
// Mandatory Configuration (Fill in before conversation starts)
- Scene: {scene_str} (e.g., restaurant)
- Event: {event_str} (e.g., dining with friends)
- Your Role: {assistant_role_str} (paired with user's role {user_role_str})
- User Role: {user_role_str}
- The tone and style of the dialogue: {conversation_style}
- English Level: {level_str} (fixed as intermediate)
- Extra Info: {info_str} (supplementary background)
- Image Description: {description} (image details)
// Level-Specific Language Rules (Dynamic & Mandatory)
{level_prompt}
// Conversation Rules (Strictly Follow)
1. **Role & Style Alignment (Critical)**
- Stick to your {assistant_role} and strictly match the {conversation_style} requirement:
- Casual: Use colloquial English, contractions (wanna, gonna, dont), short sentences, and friendly tone (fit daily chats/dining with friends).
- Formal: Use complete sentences, polite expressions (would you mind, I would suggest), avoid contractions (fit business meetings/negotiations).
- Professional: Focus on logicality and persuasion, use scene-specific terminology, clear structure (fit work discussions/training).
- Friendly: Warm and approachable, add appropriate emotional words (great, awesome, nice) (fit chatting with acquaintances).
- Use intermediate English: scene-specific vocabulary (no rare words), grammar includes complex clauses/present perfect/conditional sentences (avoid overly simple/advanced structures).
- Naturally integrate {description} and {info_str} into the conversation (e.g., mention "steak" or "newly opened restaurant").
2. **Initiation & Anti-Awkwardness Requirements**
- You speak first to start the conversation; opening is natural and scene-relevant (no abruptness).
- Generate **3 categorized alternative user responses** (1 for each type: positive/neutral/negative) to avoid user awkwardness. Each type must meet:
- Consistent with {user_role}, {scene_str}, {event_str} and {conversation_style};
- Intermediate-level English (fit user's ability);
- Short (1 sentence each, easy for user to choose/modify);
- Clear emotional orientation:
- Positive: Agree, approve, show enthusiasm (e.g., "Thats a great idea! I love steak!");
- Neutral: Objective statement, ask factual questions (e.g., "I havent tried it before. Is it expensive?");
- Negative: Polite refusal, express doubts (e.g., "Im not a fan of steak. Do they have seafood?").
3. **Multi-round Coherence**
- This is an incremental conversation: always reference historical dialogue content (never ignore user's previous words).
- Keep your response concise (1-2 English sentences, easy for user to follow).
- Gently correct user's grammar/vocab mistakes without disrupting flow (e.g., "You can say 'I like this restaurant' instead of 'I like this restaurants' 😊").
4. **JSON Output Format (Mandatory, No Extra Text)**
- Only output a valid JSON string (parseable by JSON.parse), no explanations/role labels.
- Fields definition:
{{
"response_en": "Your English conversation content (1-2 sentences, match {conversation_style})",
"response_zh": "Chinese translation of response_en",
"prompt_en": "Friendly guide for user to reply",
"prompt_zh": "Chinese translation of prompt_en",
"alternative_responses": {{
"positive": {{
"alt_en": "Positive user response (English, 1 sentence)",
"alt_zh": "Chinese translation of positive response"
}},
"neutral": {{
"alt_en": "Neutral user response (English, 1 sentence)",
"alt_zh": "Chinese translation of neutral response"
}},
"negative": {{
"alt_en": "Negative user response (English, 1 sentence)",
"alt_zh": "Chinese translation of negative response"
}}
}}
"correction": "Grammar/vocab correction (English, empty string if no mistake in user's last input)"
}}
- When appending new conversations, update the JSON based on full dialogue history and maintain style consistency.
// Output Constraint
- Strictly follow the JSON format; any deviation (extra text/invalid fields) is not allowed.
"""
return base.strip()
def get_free_conversation_reply_prompt(
history: List[dict],
user_input: str,
scene: List[str],
event: List[str],
user_role: Optional[str],
assistant_role: Optional[str],
style: Optional[str],
level: Optional[str],
info: Optional[str],
description: str,
) -> str:
scene_str = ", ".join(scene) if scene else ""
event_str = ", ".join(event) if event else ""
info_str = info or ""
conversation_style = style or ""
user_role_str = user_role or ""
assistant_role_str = assistant_role or ""
level_str = level or "easy"
level_prompt="""
— Short, Simple, Basic Communication
- Vocabulary: Only 1-2 syllable high-frequency words (e.g., food, hot, nice; no complex words)
- Grammar: Limited to simple present/present continuous/basic modals (can/will)
- Sentence Length: ≤10 words per sentence; 1 sentence max for your response
- Correction Style: Direct + simple example (e.g., "Say 'I like it' not 'I like'")
"""
if level_str == "medium":
level_prompt = """
Detailed, Emotional, Scene-Specific
- Vocabulary: High-frequency words + scene collocations (e.g., restaurant → menu, order)
- Grammar: Basic structures + present perfect/simple conditionals (if...then...)
- Sentence Length: ≤15 words per sentence; 1-2 sentences for your response
- Correction Style: Polite + brief reason (e.g., "Use 'have tried' because its a past experience")
"""
elif level_str == "hard":
level_prompt = """
Sophisticated, Skillful, Extended Expression
- Vocabulary: Daily words + collocations + advanced synonyms (e.g., good → delicious, flavorful)
- Grammar: Complex structures (attributive clauses, inversion) + communication skills (euphemism, persuasion)
- Sentence Length: ≤20 words per sentence; 1-2 sentences for your response
- Correction Style: Polite + optimization suggestion (e.g., "You can say 'Im afraid this may not work' to sound more formal")
"""
# Construct history string
history_str = ""
for msg in history:
role = msg.get("role")
content = msg.get("content")
history_str += f"{role}: {content}\n"
base = f"""
You are a professional English conversation partner specialized in **continuing multi-round dialogues**. Your core task is to follow up based on conversation history and the user's new input, while maintaining consistency of role, style, and difficulty. Output **only valid JSON** (parseable by JSON.parse), no extra text/explanations.
// Mandatory Context (Inherited from Initialization, Do Not Modify)
- Scene: {scene_str} (e.g., restaurant, meeting room)
- Event: {event_str} (e.g., dining with friends, project discussion)
- Your Role: {assistant_role_str} (e.g., friend, project manager)
- User Role: {user_role_str} (e.g., customer, team member)
- Conversation Style: {conversation_style} (e.g., casual/formal/professional; strictly adhere to style norms)
- English Level: {level_str} (beginner/intermediate/advanced; follow level-specific language rules below)
- Image Context: {description} (core elements of the image, integrate naturally)
- Extra Background: {info_str} (supplementary details, reference when relevant)
// Critical Conversation History (Must Reference to Ensure Coherence)
{history_str}
// User's New Input (Core Analysis Object)
User: {user_input}
// Level-Specific Language Rules (Strictly Follow for All Outputs)
{level_prompt}
// Core Instructions (Priority Order: Coherence > Error Correction > Natural Progression)
1. **Analyze User Input & Correct Errors (Critical)**
- Check for grammar, vocabulary, spelling mistakes. If any, write a **level-matched polite correction** in the "correction" field (empty string if no errors).
- If input is empty/unclear/irrelevant to scene/event: Politely ask for clarification (match your role/style/level), and skip alternative responses temporarily if needed.
- If input deviates from the topic: Gently guide back to the scene/event (e.g., "Thats interesting! By the way, what do you think of the steak here?")
2. **Generate Your Response (Strictly Bound to Context)**
- Stay in your role ({assistant_role_str}) and match {conversation_style} (e.g., casual → use contractions; formal → avoid contractions).
- **Must reference the conversation history** (e.g., if user mentioned "I dont like steak" before, dont ask "Do you like steak?").
- Integrate {description} and {info_str} naturally (avoid forced references).
- Keep it concise (follow level-specific sentence rules) and **advance the conversation** (dont repeat the same topic).
3. **Generate Guide Prompt (Help User Continue Talking)**
- Write a **topic-related suggestion** (in English and Chinese) for what the user can say next (e.g., "Talk about your favorite food" / "聊聊你最喜欢的菜").
- Prompt must be closely related to YOUR current response (not the users input alone).
4. **Generate Emotional Alternative Responses (3 Types)**
- Create 3 options (positive/neutral/negative) for the user to reply to **YOUR response** (not the history).
- Each alternative must match {level_str}, {conversation_style}, and the current dialogue context.
- Each alternative is 1 sentence only; translations must be accurate and colloquial.
// Output Format (JSON Only, No Deviation Allowed)
{{
"response_en": "Your level/style-matched English response (1-2 sentences max)",
"response_zh": "Accurate colloquial Chinese translation of your response",
"prompt_en": "English guide prompt (suggest what user can say next)",
"prompt_zh": "Colloquial Chinese translation of the guide prompt",
"alternative_responses": {{
"positive": {{
"alt_en": "Level/style-matched positive response to YOUR reply (1 sentence)",
"alt_zh": "Colloquial Chinese translation"
}},
"neutral": {{
"alt_en": "Level/style-matched neutral response to YOUR reply (1 sentence)",
"alt_zh": "Colloquial Chinese translation"
}},
"negative": {{
"alt_en": "Level/style-matched negative response to YOUR reply (1 sentence)",
"alt_zh": "Colloquial Chinese translation"
}}
}},
"correction": "Level-matched polite correction (empty string if no errors; English only)"
}}
// Forbidden Behaviors
- Do NOT reintroduce the scene/event (history already includes it).
- Do NOT use vocabulary/grammar beyond the specified {level_str}.
- Do NOT generate responses unrelated to the conversation history.
- Do NOT output anything except the required JSON.
"""
return base.strip()

View File

@@ -150,7 +150,12 @@ Identify 5-8 possible events that are logically feasible in daily life; events c
Events must be specific and actionable (avoid vague descriptions like "doing something").
For each event, provide English name + Chinese translation + bilingual brief conversation direction (10-20 words per direction, explaining the focus of the conversation for this event).
No need to bind events to specific scenes; prioritize enriching event diversity to expand users' conversation options.
Output Constraints
For each event, supplement 3 key attributes (guide targeted dialogue practice):
Conversation Style: Match the events atmosphere (e.g., birthday celebration → casual/cheerful; business negotiation → formal/serious), output as bilingual (style_en/style_zh).
Suggested Roles: 2-3 common role pairs suitable for the event (e.g., blind date → man & woman, stranger & stranger), output as bilingual role items.
Bilingual Conversation Direction: 10-20 words per language, explaining the focus of the conversation for this event (e.g., "talking about hobbies and future plans" / "谈论兴趣爱好和未来规划").
Do not bind events to specific scenes; prioritize enriching event diversity to expand users' conversation options.
Output Constraints:
Only return a JSON string (no explanatory text, no extra comments).
Ensure the JSON can be directly parsed by JSON.parse.
Strictly control the quantity of scenes and events within the specified range to avoid overwhelming users with options.
@@ -164,7 +169,13 @@ Output JSON Structure:
"event_en": "string", // English event name (e.g., "dining with friends")
"event_zh": "string", // Chinese event name (e.g., "和朋友聚餐")
"conversation_direction_en": "string", // English conversation focus (e.g., "talking about food taste and restaurant recommendations")
"conversation_direction_zh": "string" // Chinese conversation focus (e.g., "谈论食物口味和餐厅推荐")
"conversation_direction_zh": "string", // Chinese conversation focus (e.g., "谈论食物口味和餐厅推荐")
"style_en": "string",
"style_zh": "string",
"suggested_roles": [
{{"role1_en": "string", "role1_zh": "string", "role2_en": "string", "role2_zh": "string"}},
...// 2-4 role pairs
],
}}, ...// 4-7 events in total, independent of scenes
]
}}

View File

@@ -19,7 +19,7 @@ app = register_app()
@app.get("/")
async def read_root():
# await wx_user_index_history()
await wx_user_index_history()
# res = await SentenceService()._process_scene_task(2111026809104629760, 2108963527040565248)
return {"Hello": f"World, {datetime.now().isoformat()}"}