语言检测并动态更新语种 TODO 后续切换为llm识别用户语种意图模式
This commit is contained in:
@@ -1,15 +1,17 @@
|
||||
import logging
|
||||
from typing import Callable
|
||||
from typing import Callable, Any, Optional, Dict
|
||||
from dataclasses import dataclass
|
||||
|
||||
from deepagents import create_deep_agent
|
||||
from deepagents.backends import FilesystemBackend, CompositeBackend, StateBackend
|
||||
from langchain.agents.middleware import SummarizationMiddleware, ToolRetryMiddleware, wrap_model_call, ModelRequest, ModelResponse, wrap_tool_call, dynamic_prompt
|
||||
from langchain_core.messages import ToolMessage, SystemMessage
|
||||
from fast_langdetect import detect
|
||||
from langchain.agents.middleware import SummarizationMiddleware, ToolRetryMiddleware, wrap_model_call, ModelRequest, ModelResponse, wrap_tool_call, dynamic_prompt, before_model, AgentMiddleware, hook_config
|
||||
from langchain_core.messages import ToolMessage, SystemMessage, AIMessage, HumanMessage
|
||||
from langgraph.checkpoint.mongodb import MongoDBSaver
|
||||
from langgraph.checkpoint.serde.jsonplus import JsonPlusSerializer
|
||||
from langgraph.constants import END
|
||||
from langgraph.prebuilt.tool_node import ToolCallRequest
|
||||
from langgraph.runtime import Runtime
|
||||
from langgraph.store.memory import InMemoryStore
|
||||
from langgraph.types import Command
|
||||
from pymongo import MongoClient
|
||||
@@ -96,7 +98,7 @@ async def report_control(request: ToolCallRequest, handler: Callable[[ToolCallRe
|
||||
def user_role_prompt(request: ModelRequest) -> str:
|
||||
"""Generate system prompts based on use_report status and language preference."""
|
||||
use_report = request.runtime.context.use_report
|
||||
language = request.runtime.context.language # 默认简体中文
|
||||
language = request.runtime.context.language
|
||||
|
||||
# ==================== 报告功能状态提示(支持中英文) ====================
|
||||
if use_report:
|
||||
@@ -132,7 +134,13 @@ def user_role_prompt(request: ModelRequest) -> str:
|
||||
CRITICAL:
|
||||
- Be sure to use the above settings when generating line drawings/images.
|
||||
- Do not refer to these three settings repeatedly when generating reports or text-only answers."""
|
||||
|
||||
langguage_prompt = f"""
|
||||
## Custom Language Rules
|
||||
- All content of the final report and all reply content MUST be fully written in: {language}
|
||||
- No mixed languages, no bilingual contrast, no extra English annotations.
|
||||
- Maintain native, fluent, professional expression conforming to the language habits of {language}.
|
||||
- All professional terms, captions, notes and reference descriptions must follow the unified {language} specification.
|
||||
"""
|
||||
final_prompt = backend_prompt + SYSTEM_PROMPT_MAPPING[f'SYSTEM_BASE_PROMPT_en'] + report_status + SYSTEM_PROMPT_MAPPING[f"SYSTEM_RULES_PROMPT_en"]
|
||||
|
||||
logger.info(
|
||||
@@ -142,6 +150,58 @@ def user_role_prompt(request: ModelRequest) -> str:
|
||||
return final_prompt
|
||||
|
||||
|
||||
from langchain.agents.middleware import AgentState
|
||||
|
||||
|
||||
class LanguageDetectionMiddleware(AgentMiddleware):
|
||||
"""使用 fast-langdetect(基于 fastText)自动检测语言"""
|
||||
|
||||
def __init__(self, min_length: int = 8, default_lang: str = "zh"):
|
||||
self.min_length = min_length
|
||||
self.default_lang = default_lang
|
||||
|
||||
def before_model(self, state: AgentState, runtime=None) -> Optional[Dict[str, Any]]:
|
||||
messages = state.get("messages", [])
|
||||
if not messages:
|
||||
return None
|
||||
|
||||
last_msg = messages[-1]
|
||||
if not isinstance(last_msg, HumanMessage):
|
||||
return None
|
||||
|
||||
content = last_msg.content if hasattr(last_msg, "content") else str(last_msg)
|
||||
content = content[0].get("text").strip()
|
||||
|
||||
if len(content) < self.min_length:
|
||||
return None
|
||||
|
||||
try:
|
||||
detected_lang = "en"
|
||||
confidence = 0
|
||||
# 单语言检测(最常用)
|
||||
res = detect(text=content, model="auto", k=1)
|
||||
if res and res[0].get("lang") and res[0].get("score", 0) > 0.5:
|
||||
detected_lang = res[0]["lang"]
|
||||
confidence = res[0]["score"]
|
||||
|
||||
print(f"🔍 fast-langdetect 检测到: {detected_lang} (score={confidence:.4f})")
|
||||
|
||||
runtime.context.language = detected_lang
|
||||
|
||||
return {
|
||||
"language": detected_lang,
|
||||
"preferred_language": detected_lang,
|
||||
"language_confidence": float(confidence),
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
print(f"语言检测失败: {e}")
|
||||
return {"language": self.default_lang}
|
||||
|
||||
async def abefore_model(self, state: AgentState, runtime=None):
|
||||
return self.before_model(state, runtime)
|
||||
|
||||
|
||||
def build_main_agent(workspace_dir, enable_thinking):
|
||||
research_subagent = build_researcher_subagent(workspace_dir)
|
||||
# painter_subagent = build_painter_subagent(workspace_dir)
|
||||
@@ -151,6 +211,7 @@ def build_main_agent(workspace_dir, enable_thinking):
|
||||
user_profile_subagent
|
||||
]
|
||||
middleware = [
|
||||
LanguageDetectionMiddleware(min_length=8, default_lang="en"),
|
||||
user_role_prompt,
|
||||
report_control,
|
||||
SummarizationMiddleware(
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
from langchain.agents.middleware import dynamic_prompt, ModelRequest
|
||||
|
||||
from src.server.deep_agent.init_prompt import build_researcher_prompt
|
||||
from src.server.deep_agent.tools.crawl_tool import create_crawl4ai_batch_tool
|
||||
from src.server.deep_agent.tools.report_generator_tool import create_report_generator_tool
|
||||
@@ -6,6 +8,52 @@ from src.server.deep_agent.tools.structured_retrieval_tool import create_structu
|
||||
from src.server.deep_agent.tools.user_persona_tool import query_report_profile
|
||||
|
||||
|
||||
@dynamic_prompt
|
||||
def language_control(request: ModelRequest) -> str:
|
||||
"""Generate system prompts based on use_report status and language preference."""
|
||||
language = request.runtime.context.language # 默认简体中文
|
||||
|
||||
final_prompt = f"""
|
||||
You are a professional furniture design researcher.
|
||||
|
||||
Your primary goal:
|
||||
- Generate a high-quality, structured furniture design research report based on the user's request and user profile.
|
||||
- The report should be clear, insightful, and written in well-structured Markdown format.
|
||||
- It should include design trends, materials, color directions, representative cases, and relevant references.
|
||||
|
||||
You are allowed to:
|
||||
- Retrieve user profile information (e.g., style, room type, preferences)
|
||||
- Generate research keywords
|
||||
- Search for relevant topics and sources
|
||||
- Crawl and read web content
|
||||
- Extract structured insights
|
||||
- Generate the final report
|
||||
|
||||
Tool usage guidelines:
|
||||
- If necessary, first retrieve the user profile to better understand preferences.
|
||||
- Use meaningful and relevant keywords for research.
|
||||
- When crawling web content, try to process multiple sources efficiently (avoid repeated calls).
|
||||
- Focus on extracting key insights such as trends, materials, colors, and case studies.
|
||||
- Use the report_generator tool to produce the final report.
|
||||
|
||||
Important rules:
|
||||
- Your objective is to complete a high-quality report, not to strictly follow a fixed sequence of steps.
|
||||
- You may adapt your approach depending on the situation.
|
||||
- Avoid calling the same tool repeatedly (especially crawl tools).
|
||||
- If some data is missing, proceed with available information and clearly mention any limitations.
|
||||
- Once the report is generated, consider the task complete and stop further actions.
|
||||
|
||||
## Custom Language Rules
|
||||
- All content of the final report and all reply content MUST be fully written in: {language}
|
||||
- No mixed languages, no bilingual contrast, no extra English annotations.
|
||||
- Maintain native, fluent, professional expression conforming to the language habits of {language}.
|
||||
- All professional terms, captions, notes and reference descriptions must follow the unified {language} specification.
|
||||
|
||||
"""
|
||||
|
||||
return final_prompt
|
||||
|
||||
|
||||
def build_researcher_subagent(workspace_dir):
|
||||
crawl4ai_batch = create_crawl4ai_batch_tool(workspace_dir)
|
||||
structured_retrieval = create_structured_retrieval_tool(workspace_dir)
|
||||
@@ -28,10 +76,11 @@ This sub-agent will:
|
||||
- Produce a complete research report
|
||||
|
||||
Do NOT use this sub-agent for:
|
||||
- User profile collection (handled by user_profile_subagent)
|
||||
- User profile collection
|
||||
- Image generation or editing tasks
|
||||
""",
|
||||
"system_prompt": build_researcher_prompt(),
|
||||
"middleware": [language_control],
|
||||
"tools": [
|
||||
query_report_profile,
|
||||
topic_research,
|
||||
|
||||
@@ -1,6 +1,101 @@
|
||||
from langchain.agents.middleware import dynamic_prompt, ModelRequest
|
||||
|
||||
from src.server.deep_agent.init_prompt import build_user_persona_prompt
|
||||
from src.server.deep_agent.tools.user_persona_tool import query_report_profile, update_report_profile, check_profile_complete
|
||||
|
||||
|
||||
@dynamic_prompt
|
||||
def language_control(request: ModelRequest) -> str:
|
||||
"""Generate system prompts based on use_report status and language preference."""
|
||||
language = request.runtime.context.language # 默认简体中文
|
||||
|
||||
final_prompt = f"""
|
||||
You are a user profile collection assistant.
|
||||
|
||||
Your goal:
|
||||
- Extract and maintain structured user profile information from the conversation.
|
||||
- The profile is used for generating furniture design reports.
|
||||
|
||||
Profile fields may include:
|
||||
- style (design style or aesthetic preference)
|
||||
- room_type (type of room or space)
|
||||
- budget (optional)
|
||||
- other relevant design preferences
|
||||
|
||||
What you should do:
|
||||
- Understand the user's input and identify any profile-related information.
|
||||
- If new information is found, update the profile accordingly.
|
||||
- If no new information is provided, keep the existing profile unchanged.
|
||||
- Ensure previously stored information is preserved unless the user explicitly modifies it.
|
||||
|
||||
Tool usage guidelines:
|
||||
- Use query_report_profile when you need to know the current profile.
|
||||
- Use update_report_profile only when new or updated information is detected.
|
||||
- Use check_profile_complete to determine if required fields are sufficient for report generation.
|
||||
|
||||
Behavior rules:
|
||||
- Do NOT generate reports.
|
||||
- Do NOT guess or fabricate missing information.
|
||||
- Only extract information that is clearly stated or strongly implied by the user.
|
||||
- Be concise and structured in your output.
|
||||
|
||||
When profile is incomplete:
|
||||
- Ask the user for the missing information in a natural way.
|
||||
|
||||
When profile is complete:
|
||||
- Respond with a clear signal that profile collection is done, for example:
|
||||
"Profile is complete. Ready for report generation."
|
||||
|
||||
Language rules:
|
||||
- Always respond in the same language as the user.
|
||||
- Do not mix languages.
|
||||
- Keep the output consistent and natural.
|
||||
|
||||
Strict Language Enforcement:
|
||||
- You MUST use only one language in the entire response.
|
||||
- The language must match the user's input.
|
||||
- Mixing multiple languages is strictly prohibited.
|
||||
"""
|
||||
|
||||
final_prompt = f"""
|
||||
You are a professional furniture design researcher.
|
||||
|
||||
## Core Objectives
|
||||
- Generate high-quality, in-depth & structured furniture design research reports in standard Markdown format.
|
||||
- Strictly combine user requirements and complete user profile information for customized analysis.
|
||||
- The report must cover: design trend analysis, mainstream material selection, color palette orientation, classic representative cases and industry reference information.
|
||||
|
||||
## Permitted Capabilities
|
||||
- Retrieve and parse user profile data (design style preference, room type, usage scenario, aesthetic tendency, etc.).
|
||||
- Extract core research keywords for industry investigation.
|
||||
- Search, crawl and summarize multi-source industry information.
|
||||
- Refine structured, actionable design insights.
|
||||
- Call the report_generator tool to output the final standardized report.
|
||||
|
||||
## Tool Usage Specifications
|
||||
- Prioritize obtaining complete user profile before research to improve report relevance.
|
||||
- Use precise, industry-oriented search keywords.
|
||||
- Crawl and integrate multiple sources at one time to avoid redundant and repeated tool calls.
|
||||
- Focus on screening effective information: trend characteristics, material performance, color matching logic, typical brand cases.
|
||||
- Do not over-rely on tool processes; flexibly adjust research ideas according to information integrity.
|
||||
|
||||
## Critical Rules
|
||||
- Task priority: deliver a complete, high-quality research report.
|
||||
- No rigid step-by-step execution; adjust research logic adaptively based on actual conditions.
|
||||
- Prohibit frequent repeated calls to crawl and search tools.
|
||||
- If partial industry data is missing, continue writing with existing valid information and mark data limitations clearly in the report.
|
||||
- Stop all tool calls and work immediately after the final report is generated.
|
||||
|
||||
## Custom Language Rules
|
||||
- All content of the final report and all reply content MUST be fully written in: {language}
|
||||
- No mixed languages, no bilingual contrast, no extra English annotations.
|
||||
- Maintain native, fluent, professional expression conforming to the language habits of {language}.
|
||||
- All professional terms, captions, notes and reference descriptions must follow the unified {language} specification.
|
||||
"""
|
||||
|
||||
return final_prompt
|
||||
|
||||
|
||||
user_profile_subagent = {
|
||||
"name": "user_profile_subagent",
|
||||
"description": """
|
||||
@@ -22,6 +117,7 @@ Do NOT use this sub-agent for:
|
||||
- Image generation or editing
|
||||
""",
|
||||
"system_prompt": build_user_persona_prompt(),
|
||||
"middleware": [language_control],
|
||||
"tools": [
|
||||
query_report_profile,
|
||||
update_report_profile,
|
||||
|
||||
@@ -79,8 +79,8 @@ from langchain.tools import tool
|
||||
@tool
|
||||
async def topic_research(topic: List[str], max_urls: int = 5) -> str:
|
||||
"""
|
||||
深度调研工具(DuckDuckGo版本)。
|
||||
根据多个主题关键词进行搜索,返回去重后的高质量 URL 列表(JSON字符串)。
|
||||
In-depth research tool (DuckDuckGo version).
|
||||
Search based on multiple topic keywords and return a high-quality URL list (JSON string) after deduplication.
|
||||
"""
|
||||
|
||||
# DuckDuckGo 是同步库,需要丢到线程池
|
||||
|
||||
@@ -13,7 +13,7 @@ collection = db["user_profiles"]
|
||||
@tool
|
||||
def query_report_profile(config: RunnableConfig, ) -> dict:
|
||||
"""
|
||||
查询用户报告画像
|
||||
Query user report portrait
|
||||
"""
|
||||
thread_id = config['configurable']['thread_id']
|
||||
doc = collection.find_one({"thread_id": thread_id})
|
||||
@@ -28,7 +28,7 @@ def query_report_profile(config: RunnableConfig, ) -> dict:
|
||||
@tool
|
||||
def update_report_profile(config: RunnableConfig, profile: dict) -> dict:
|
||||
"""
|
||||
更新用户画像信息
|
||||
Update user portrait information
|
||||
"""
|
||||
thread_id = config['configurable']['thread_id']
|
||||
collection.update_one(
|
||||
@@ -47,7 +47,7 @@ def update_report_profile(config: RunnableConfig, profile: dict) -> dict:
|
||||
@tool
|
||||
def check_profile_complete(profile: dict) -> dict:
|
||||
"""
|
||||
判断画像是否完整
|
||||
Determine whether the image is complete
|
||||
"""
|
||||
required = ["style", "room_type", "budget"]
|
||||
missing = [f for f in required if f not in profile]
|
||||
|
||||
Reference in New Issue
Block a user