语言检测并动态更新语种 TODO 后续切换为llm识别用户语种意图模式

2026-05-04 14:53:38 +08:00
parent dcf29a3b84
commit cbee81ee44
7 changed files with 304 additions and 11 deletions
--- a/src/server/deep_agent/agents/main_agent.py
+++ b/src/server/deep_agent/agents/main_agent.py
@@ -1,15 +1,17 @@
 import logging
-from typing import Callable
+from typing import Callable, Any, Optional, Dict
 from dataclasses import dataclass

 from deepagents import create_deep_agent
 from deepagents.backends import FilesystemBackend, CompositeBackend, StateBackend
-from langchain.agents.middleware import SummarizationMiddleware, ToolRetryMiddleware, wrap_model_call, ModelRequest, ModelResponse, wrap_tool_call, dynamic_prompt
-from langchain_core.messages import ToolMessage, SystemMessage
+from fast_langdetect import detect
+from langchain.agents.middleware import SummarizationMiddleware, ToolRetryMiddleware, wrap_model_call, ModelRequest, ModelResponse, wrap_tool_call, dynamic_prompt, before_model, AgentMiddleware, hook_config
+from langchain_core.messages import ToolMessage, SystemMessage, AIMessage, HumanMessage
 from langgraph.checkpoint.mongodb import MongoDBSaver
 from langgraph.checkpoint.serde.jsonplus import JsonPlusSerializer
 from langgraph.constants import END
 from langgraph.prebuilt.tool_node import ToolCallRequest
+from langgraph.runtime import Runtime
 from langgraph.store.memory import InMemoryStore
 from langgraph.types import Command
 from pymongo import MongoClient
@@ -96,7 +98,7 @@ async def report_control(request: ToolCallRequest, handler: Callable[[ToolCallRe
 def user_role_prompt(request: ModelRequest) -> str:
    """Generate system prompts based on use_report status and language preference."""
    use_report = request.runtime.context.use_report
-    language = request.runtime.context.language  # 默认简体中文
+    language = request.runtime.context.language

    # ==================== 报告功能状态提示（支持中英文） ====================
    if use_report:
@@ -132,7 +134,13 @@ def user_role_prompt(request: ModelRequest) -> str:
            CRITICAL:
            - Be sure to use the above settings when generating line drawings/images.
            - Do not refer to these three settings repeatedly when generating reports or text-only answers."""
-
+    langguage_prompt = f"""
+    ## Custom Language Rules
+    - All content of the final report and all reply content MUST be fully written in: {language}
+    - No mixed languages, no bilingual contrast, no extra English annotations.
+    - Maintain native, fluent, professional expression conforming to the language habits of {language}.
+    - All professional terms, captions, notes and reference descriptions must follow the unified {language} specification.
+    """
    final_prompt = backend_prompt + SYSTEM_PROMPT_MAPPING[f'SYSTEM_BASE_PROMPT_en'] + report_status + SYSTEM_PROMPT_MAPPING[f"SYSTEM_RULES_PROMPT_en"]

    logger.info(
@@ -142,6 +150,58 @@ def user_role_prompt(request: ModelRequest) -> str:
    return final_prompt


+from langchain.agents.middleware import AgentState
+
+
+class LanguageDetectionMiddleware(AgentMiddleware):
+    """使用 fast-langdetect（基于 fastText）自动检测语言"""
+
+    def __init__(self, min_length: int = 8, default_lang: str = "zh"):
+        self.min_length = min_length
+        self.default_lang = default_lang
+
+    def before_model(self, state: AgentState, runtime=None) -> Optional[Dict[str, Any]]:
+        messages = state.get("messages", [])
+        if not messages:
+            return None
+
+        last_msg = messages[-1]
+        if not isinstance(last_msg, HumanMessage):
+            return None
+
+        content = last_msg.content if hasattr(last_msg, "content") else str(last_msg)
+        content = content[0].get("text").strip()
+
+        if len(content) < self.min_length:
+            return None
+
+        try:
+            detected_lang = "en"
+            confidence = 0
+            # 单语言检测（最常用）
+            res = detect(text=content, model="auto", k=1)
+            if res and res[0].get("lang") and res[0].get("score", 0) > 0.5:
+                detected_lang = res[0]["lang"]
+                confidence = res[0]["score"]
+
+                print(f"🔍 fast-langdetect 检测到: {detected_lang} (score={confidence:.4f})")
+
+                runtime.context.language = detected_lang
+
+            return {
+                "language": detected_lang,
+                "preferred_language": detected_lang,
+                "language_confidence": float(confidence),
+            }
+
+        except Exception as e:
+            print(f"语言检测失败: {e}")
+            return {"language": self.default_lang}
+
+    async def abefore_model(self, state: AgentState, runtime=None):
+        return self.before_model(state, runtime)
+
+
 def build_main_agent(workspace_dir, enable_thinking):
    research_subagent = build_researcher_subagent(workspace_dir)
    # painter_subagent = build_painter_subagent(workspace_dir)
@@ -151,6 +211,7 @@ def build_main_agent(workspace_dir, enable_thinking):
        user_profile_subagent
    ]
    middleware = [
+        LanguageDetectionMiddleware(min_length=8, default_lang="en"),
        user_role_prompt,
        report_control,
        SummarizationMiddleware(
--- a/src/server/deep_agent/agents/researcher.py
+++ b/src/server/deep_agent/agents/researcher.py
@@ -1,3 +1,5 @@
+from langchain.agents.middleware import dynamic_prompt, ModelRequest
+
 from src.server.deep_agent.init_prompt import build_researcher_prompt
 from src.server.deep_agent.tools.crawl_tool import create_crawl4ai_batch_tool
 from src.server.deep_agent.tools.report_generator_tool import create_report_generator_tool
@@ -6,6 +8,52 @@ from src.server.deep_agent.tools.structured_retrieval_tool import create_structu
 from src.server.deep_agent.tools.user_persona_tool import query_report_profile


+@dynamic_prompt
+def language_control(request: ModelRequest) -> str:
+    """Generate system prompts based on use_report status and language preference."""
+    language = request.runtime.context.language  # 默认简体中文
+
+    final_prompt = f"""
+You are a professional furniture design researcher.
+
+Your primary goal:
+- Generate a high-quality, structured furniture design research report based on the user's request and user profile.
+- The report should be clear, insightful, and written in well-structured Markdown format.
+- It should include design trends, materials, color directions, representative cases, and relevant references.
+
+You are allowed to:
+- Retrieve user profile information (e.g., style, room type, preferences)
+- Generate research keywords
+- Search for relevant topics and sources
+- Crawl and read web content
+- Extract structured insights
+- Generate the final report
+
+Tool usage guidelines:
+- If necessary, first retrieve the user profile to better understand preferences.
+- Use meaningful and relevant keywords for research.
+- When crawling web content, try to process multiple sources efficiently (avoid repeated calls).
+- Focus on extracting key insights such as trends, materials, colors, and case studies.
+- Use the report_generator tool to produce the final report.
+
+Important rules:
+- Your objective is to complete a high-quality report, not to strictly follow a fixed sequence of steps.
+- You may adapt your approach depending on the situation.
+- Avoid calling the same tool repeatedly (especially crawl tools).
+- If some data is missing, proceed with available information and clearly mention any limitations.
+- Once the report is generated, consider the task complete and stop further actions.
+
+## Custom Language Rules
+- All content of the final report and all reply content MUST be fully written in: {language}
+- No mixed languages, no bilingual contrast, no extra English annotations.
+- Maintain native, fluent, professional expression conforming to the language habits of {language}.
+- All professional terms, captions, notes and reference descriptions must follow the unified {language} specification.
+
+"""
+
+    return final_prompt
+
+
 def build_researcher_subagent(workspace_dir):
    crawl4ai_batch = create_crawl4ai_batch_tool(workspace_dir)
    structured_retrieval = create_structured_retrieval_tool(workspace_dir)
@@ -28,10 +76,11 @@ This sub-agent will:
 - Produce a complete research report

 Do NOT use this sub-agent for:
- User profile collection (handled by user_profile_subagent)
+- User profile collection 
 - Image generation or editing tasks
        """,
        "system_prompt": build_researcher_prompt(),
+        "middleware": [language_control],
        "tools": [
            query_report_profile,
            topic_research,
--- a/src/server/deep_agent/agents/user_profile.py
+++ b/src/server/deep_agent/agents/user_profile.py
@@ -1,6 +1,101 @@
+from langchain.agents.middleware import dynamic_prompt, ModelRequest
+
 from src.server.deep_agent.init_prompt import build_user_persona_prompt
 from src.server.deep_agent.tools.user_persona_tool import query_report_profile, update_report_profile, check_profile_complete

+
+@dynamic_prompt
+def language_control(request: ModelRequest) -> str:
+    """Generate system prompts based on use_report status and language preference."""
+    language = request.runtime.context.language  # 默认简体中文
+
+    final_prompt = f"""
+    You are a user profile collection assistant.
+
+    Your goal:
+    - Extract and maintain structured user profile information from the conversation.
+    - The profile is used for generating furniture design reports.
+
+    Profile fields may include:
+    - style (design style or aesthetic preference)
+    - room_type (type of room or space)
+    - budget (optional)
+    - other relevant design preferences
+
+    What you should do:
+    - Understand the user's input and identify any profile-related information.
+    - If new information is found, update the profile accordingly.
+    - If no new information is provided, keep the existing profile unchanged.
+    - Ensure previously stored information is preserved unless the user explicitly modifies it.
+
+    Tool usage guidelines:
+    - Use query_report_profile when you need to know the current profile.
+    - Use update_report_profile only when new or updated information is detected.
+    - Use check_profile_complete to determine if required fields are sufficient for report generation.
+
+    Behavior rules:
+    - Do NOT generate reports.
+    - Do NOT guess or fabricate missing information.
+    - Only extract information that is clearly stated or strongly implied by the user.
+    - Be concise and structured in your output.
+
+    When profile is incomplete:
+    - Ask the user for the missing information in a natural way.
+
+    When profile is complete:
+    - Respond with a clear signal that profile collection is done, for example:
+      "Profile is complete. Ready for report generation."
+
+    Language rules:
+    - Always respond in the same language as the user.
+    - Do not mix languages.
+    - Keep the output consistent and natural.
+
+    Strict Language Enforcement:
+    - You MUST use only one language in the entire response.
+    - The language must match the user's input.
+    - Mixing multiple languages is strictly prohibited.
+    """
+
+    final_prompt = f"""
+You are a professional furniture design researcher.
+
+## Core Objectives
+- Generate high-quality, in-depth & structured furniture design research reports in standard Markdown format.
+- Strictly combine user requirements and complete user profile information for customized analysis.
+- The report must cover: design trend analysis, mainstream material selection, color palette orientation, classic representative cases and industry reference information.
+
+## Permitted Capabilities
+- Retrieve and parse user profile data (design style preference, room type, usage scenario, aesthetic tendency, etc.).
+- Extract core research keywords for industry investigation.
+- Search, crawl and summarize multi-source industry information.
+- Refine structured, actionable design insights.
+- Call the report_generator tool to output the final standardized report.
+
+## Tool Usage Specifications
+- Prioritize obtaining complete user profile before research to improve report relevance.
+- Use precise, industry-oriented search keywords.
+- Crawl and integrate multiple sources at one time to avoid redundant and repeated tool calls.
+- Focus on screening effective information: trend characteristics, material performance, color matching logic, typical brand cases.
+- Do not over-rely on tool processes; flexibly adjust research ideas according to information integrity.
+
+## Critical Rules
+- Task priority: deliver a complete, high-quality research report.
+- No rigid step-by-step execution; adjust research logic adaptively based on actual conditions.
+- Prohibit frequent repeated calls to crawl and search tools.
+- If partial industry data is missing, continue writing with existing valid information and mark data limitations clearly in the report.
+- Stop all tool calls and work immediately after the final report is generated.
+
+## Custom Language Rules
+- All content of the final report and all reply content MUST be fully written in: {language}
+- No mixed languages, no bilingual contrast, no extra English annotations.
+- Maintain native, fluent, professional expression conforming to the language habits of {language}.
+- All professional terms, captions, notes and reference descriptions must follow the unified {language} specification.
+"""
+
+    return final_prompt
+
+
 user_profile_subagent = {
    "name": "user_profile_subagent",
    "description": """
@@ -22,6 +117,7 @@ Do NOT use this sub-agent for:
 - Image generation or editing
    """,
    "system_prompt": build_user_persona_prompt(),
+    "middleware": [language_control],
    "tools": [
        query_report_profile,
        update_report_profile,
--- a/src/server/deep_agent/tools/research_tool.py
+++ b/src/server/deep_agent/tools/research_tool.py
@@ -79,8 +79,8 @@ from langchain.tools import tool
@tool
 async def topic_research(topic: List[str], max_urls: int = 5) -> str:
    """
-    深度调研工具（DuckDuckGo版本）。
-    根据多个主题关键词进行搜索，返回去重后的高质量 URL 列表（JSON字符串）。
+    In-depth research tool (DuckDuckGo version).
+    Search based on multiple topic keywords and return a high-quality URL list (JSON string) after deduplication.
    """

    # DuckDuckGo 是同步库，需要丢到线程池
--- a/src/server/deep_agent/tools/user_persona_tool.py
+++ b/src/server/deep_agent/tools/user_persona_tool.py
@@ -13,7 +13,7 @@ collection = db["user_profiles"]
@tool
 def query_report_profile(config: RunnableConfig, ) -> dict:
    """
-    查询用户报告画像
+    Query user report portrait
    """
    thread_id = config['configurable']['thread_id']
    doc = collection.find_one({"thread_id": thread_id})
@@ -28,7 +28,7 @@ def query_report_profile(config: RunnableConfig, ) -> dict:
@tool
 def update_report_profile(config: RunnableConfig, profile: dict) -> dict:
    """
-    更新用户画像信息
+    Update user portrait information
    """
    thread_id = config['configurable']['thread_id']
    collection.update_one(
@@ -47,7 +47,7 @@ def update_report_profile(config: RunnableConfig, profile: dict) -> dict:
@tool
 def check_profile_complete(profile: dict) -> dict:
    """
-    判断画像是否完整
+    Determine whether the image is complete
    """
    required = ["style", "room_type", "budget"]
    missing = [f for f in required if f not in profile]