弃用langgrpah更换deepagent

2026-03-11 21:45:46 +08:00
parent c862121b48
commit 7042d428fa
44 changed files with 2847 additions and 619 deletions
--- a/src/server/deep_agent/init.py
+++ b/src/server/deep_agent/init.py
--- a/src/server/deep_agent/agents/init_llm.py
+++ b/src/server/deep_agent/agents/init_llm.py
@@ -0,0 +1,23 @@
+from langchain_qwq import ChatQwen
+
+from src.core.config import settings
+
+llm = ChatQwen(
+    model="qwen3.5-flash",
+    max_tokens=3_000,
+    timeout=None,
+    max_retries=2,
+    enable_thinking=False,
+    api_key=settings.QWEN_API_KEY
+)
+
+title_llm = ChatQwen(
+    model="qwen-plus",
+    max_tokens=3_000,
+    timeout=None,
+    max_retries=2,
+    streaming=False,
+    temperature=0.1,
+    top_p=0.8,
+    api_key=settings.QWEN_API_KEY
+)
--- a/src/server/deep_agent/agents/main_agent.py
+++ b/src/server/deep_agent/agents/main_agent.py
@@ -0,0 +1,51 @@
+from pathlib import Path
+
+from deepagents import create_deep_agent
+from deepagents.backends import FilesystemBackend
+from langchain.agents.middleware import SummarizationMiddleware
+from langgraph.checkpoint.mongodb import MongoDBSaver
+from langgraph.checkpoint.serde.jsonplus import JsonPlusSerializer
+from pymongo import MongoClient
+
+from src.core.config import MONGO_URI
+from src.server.deep_agent.agents.painter import painter_subagent
+from src.server.deep_agent.agents.researcher import research_subagent
+from src.server.deep_agent.agents.user_profile import user_profile_subagent
+from src.server.deep_agent.init_prompt import build_system_prompt
+from src.server.deep_agent.tools.report_generator_tool import llm
+
+TOOL_DIR = Path(__file__).resolve().parent
+PROJECT_ROOT = TOOL_DIR.parent
+client = MongoClient(MONGO_URI)
+checkpointer = MongoDBSaver(
+    client=client["furniture_agent_db"],
+    db_name="fida_agent_db",
+    collection_name="fida_agent_collection",
+    serde=JsonPlusSerializer(pickle_fallback=True),  # ← 關鍵這一行
+)
+subagents = [
+    painter_subagent,
+    research_subagent,
+    user_profile_subagent
+]
+
+
+def build_main_agent(use_report):
+    main_agent = create_deep_agent(
+        model=llm,
+        system_prompt=build_system_prompt(use_report=use_report),
+        subagents=subagents,
+        checkpointer=checkpointer,
+        backend=FilesystemBackend(
+            root_dir=str(PROJECT_ROOT / "agent_workspace"),
+            virtual_mode=False,  # 重要：關掉虛擬模式 → 真的寫硬碟
+        ),
+        middleware=[
+            SummarizationMiddleware(
+                model=llm,
+                trigger=("tokens", 3000),
+                keep=("messages", 100),
+            ),
+        ],
+    )
+    return main_agent
--- a/src/server/deep_agent/agents/painter.py
+++ b/src/server/deep_agent/agents/painter.py
@@ -0,0 +1,22 @@
+from langchain.agents.middleware import wrap_tool_call
+
+from src.server.deep_agent.agents.init_llm import llm
+from src.server.deep_agent.init_prompt import build_painter_prompt
+from src.server.deep_agent.tools.generate_furniture_sketch import generate_furniture
+
+
+@wrap_tool_call
+async def log_tool_calls(request, handler):
+    """Intercept and log every tool call - demonstrates cross-cutting concern."""
+    print(request)
+    return handler(request)
+
+
+painter_subagent = {
+    "name": "painter_subagent",
+    "description": "理解用户意图，使用prompt,调用generate_furniture工具生成家具sketch草图.",
+    "system_prompt": build_painter_prompt(),
+    "tools": [generate_furniture],
+    "model": llm,
+    # "middleware": [log_tool_calls],
+}
--- a/src/server/deep_agent/agents/researcher.py
+++ b/src/server/deep_agent/agents/researcher.py
@@ -0,0 +1,21 @@
+from src.server.deep_agent.agents.init_llm import llm
+from src.server.deep_agent.init_prompt import build_researcher_prompt
+from src.server.deep_agent.tools.crawl_tool import crawl4ai_batch
+from src.server.deep_agent.tools.report_generator_tool import report_generator
+from src.server.deep_agent.tools.research_tool import topic_research
+from src.server.deep_agent.tools.structured_retrieval_tool import structured_retrieval
+from src.server.deep_agent.tools.user_persona_tool import query_report_profile
+
+research_subagent = {
+    "name": "research-agent",
+    "description": "通过网络搜索对家具设计开展深度研究并整合结论",
+    "system_prompt": build_researcher_prompt(),
+    "tools": [
+        query_report_profile,
+        topic_research,
+        crawl4ai_batch,
+        structured_retrieval,
+        report_generator
+    ],
+    "model": llm
+}
--- a/src/server/deep_agent/agents/user_profile.py
+++ b/src/server/deep_agent/agents/user_profile.py
@@ -0,0 +1,15 @@
+from src.server.deep_agent.agents.init_llm import llm
+from src.server.deep_agent.init_prompt import build_user_persona_prompt
+from src.server.deep_agent.tools.user_persona_tool import query_report_profile, update_report_profile, check_profile_complete
+
+user_profile_subagent = {
+    "name": "user_profile_subagent",
+    "description": "收集用户报告画像并存储到MongoDB",
+    "system_prompt": build_user_persona_prompt(),
+    "model": llm,
+    "tools": [
+        query_report_profile,
+        update_report_profile,
+        check_profile_complete,
+    ],
+}
--- a/src/server/deep_agent/init_prompt.py
+++ b/src/server/deep_agent/init_prompt.py
@@ -0,0 +1,141 @@
+def build_system_prompt(use_report):
+    system_prompt = f"""
+    你是主调度 Agent（Supervisor），负责理解用户意图并选择合适的子Agent。
+    当前参数：
+    use_report = {use_report}
+    系统中存在两个相关子Agent：
+    1. user_profile_subagent
+       负责收集和维护用户画像信息，包括但不限于：
+       - style（风格）
+       - room_type（房间类型）
+       - budget（预算）
+       - 其他报告生成所需信息
+    
+    2. research-subagent
+       负责生成完整报告、调研、总结、分析。
+    
+    3. painter_subagent
+        负责根据用户描述,构造适用于生成家具sketch的prompt,使用prompt用工具生成图片.
+        
+    ========================
+    执行规则
+    ========================
+    【1】当用户请求报告 / 调研 / 分析 / 总结时：
+    先判断是否已经具备足够的用户画像信息。
+    如果用户需求信息不足（例如缺少风格、房间类型、预算、主题、范围等）：
+    → 调用 user_profile_subagent 收集信息  
+    不要直接生成报告。
+    如果用户画像信息已经完整：
+    → 调用 research-subagent 生成报告。
+    ------------------------
+    【2】当 use_report = False 时：
+    - 严禁调用 research-subagent
+    - 如果用户明确请求报告、调研、总结、分析：
+    
+    请礼貌回复：
+    "报告功能当前未开启，你可以打开 use_report=True 后我来帮你生成报告。"
+    - 其他普通问题可以正常回答或调用其他子Agent。
+    ------------------------
+    【3】用户画像优先级规则
+    只要用户输入包含以下情况：
+    - 表达设计需求
+    - 提供偏好信息（例如风格、预算、房间类型）
+    - 修改之前的偏好
+    - 补充报告信息
+    都应该优先调用：
+    user_profile_subagent
+    用于更新或收集用户画像。
+    ------------------------
+    【4】调度原则
+    - user_profile_subagent 只负责 **信息收集**
+    - research-subagent 只负责 **报告生成**
+    不要混用职责。
+    ========================
+    严格输出规则
+    ========================
+    - 当生成图片时，绝对不要输出图片路径、file:// 地址、URL、本地链接
+    - 只输出文字描述，不输出任何图片链接或路径
+    """
+    return system_prompt
+
+
+def build_painter_prompt():
+    prompt = """
+    你是一名专业的prompt优化专家，专注于家具设计草图生成。你的任务是：
+        1. 分析用户查询，理解核心意图，包括家具类型、风格、尺寸、颜色、材料等关键元素
+        2. 基于意图，优化并生成一个详细、精确的prompt，适合用于AI图片生成工具创建家具sketch草图（例如，线条简洁、手绘风格、焦点在设计细节上）
+        3. 使用优化的prompt调用图片生成工具，生成并返回草图图片
+        4. 如果需要，建议额外变体或改进
+
+        输出格式：
+        - 用户意图总结（1–2段）
+        - 优化后的prompt（完整文本）
+        - 生成的图片描述（如果工具返回）
+        - 建议改进（项目符号，可选）
+   【严格输出规则】
+    - 当生成图片时，**绝对不要输出图片路径、file:// 地址、URL、本地链接**。
+    - 只输出文字描述，不输出任何图片链接或路径。
+
+    """
+    return prompt
+
+
+def build_researcher_prompt():
+    prompt = """
+    你是一名专业的家具设计研究员。你的任务是：
+
+    【0】获取用户画像：
+    - 首先调用 get_user_profile 工具，获取当前用户画像信息（如风格、房间类型、预算等）。
+    - 根据用户画像，生成五个与用户需求和偏好高度相关的研究词条。
+
+    【1】关键词拆解：
+    1. 将研究主题结合用户画像拆解为可搜索的查询关键词
+    2. 将关键词组合成五个待搜索的词条
+
+    【2】搜索与爬取：
+    3. 使用 topic_research 工具搜索这五个词条获取相关、权威的网址
+    4. 使用 crawl4ai_batch 批量爬取网址（仅可调用一次，禁止重复调用）
+
+    【3】结构化处理与报告：
+    5. 使用 structured_retrieval 对爬取内容进行结构化提取（重点：设计趋势、材质创新、颜色应用、代表案例、品牌参考）
+    6. 使用 report_generator 基于提取内容生成完整 Markdown 报告
+
+    【严格工具调用规则】：
+    - 调用顺序必须严格：get_user_profile → topic_research → crawl4ai_batch（仅一次） → structured_retrieval → report_generator。
+    - 不得跳回前面步骤或重复任何工具。
+    - 如果爬取结果为空或极少，直接说明：
+      “由于部分来源暂时不可访问，本报告基于有限可用信息生成，可能不够全面。如需更完整资料，请提供具体网址或调整需求。”
+    - 一旦生成 report_generator 的输出，就视为任务完成，直接结束，不要再思考或调用其他工具。
+    - crawl4ai_batch 最多只能调用一次，即使部分网址失败，也禁止再次调用 crawl4ai_batch 或 topic_research。
+
+    现在开始严格执行以上规则。
+    """
+    return prompt
+
+
+def build_user_persona_prompt():
+    prompt = """
+            你是用户画像收集助手。
+
+            你的任务是从用户对话中理解并提取报告画像信息，包括但不限于：
+            - style（装修风格）
+            - room_type（房间类型）
+            - budget（预算）
+
+            工作流程：
+
+            1. 先调用 query_report_profile 查询当前画像
+            2. 从用户输入中理解是否包含新的画像信息
+            3. 如果有新的信息，合并旧画像并调用 update_report_profile 更新
+            4. 调用 check_profile_complete 判断是否完整
+            5. 如果缺少字段，引导用户补充
+            6. 如果完整，回复：
+
+            "画像收集完成，即将为你生成报告！"
+
+            注意：
+            - 不要编造信息
+            - 不要覆盖已有字段，除非用户明确修改
+            - 只负责画像收集，不生成报告
+            """
+    return prompt
--- a/src/server/deep_agent/run_test.py
+++ b/src/server/deep_agent/run_test.py
@@ -0,0 +1,131 @@
+import asyncio
+import uuid
+
+from langchain_core.messages import AIMessageChunk, ToolMessageChunk, ToolMessage
+from src.server.deep_agent.agents.main_agent import build_main_agent
+
+agent = build_main_agent(use_report=True)
+
+
+async def continuous_chat():
+    thread_id = str(uuid.uuid4())
+    print("===== 家具设计助手（支持持续对话+记忆）=====")
+    print("输入 'exit' 或 '退出' 结束对话\n")
+
+    while True:
+        user_input = input("你：")  # 注意：input() 在异步中仍是阻塞的，但对 CLI 够用
+
+        if user_input.lower() in ["exit", "退出", "q", "quit"]:
+            print("助手：再见！如需继续设计，随时回来～")
+            break
+
+        if not user_input.strip():
+            print("助手：请输入有效的设计需求，我会尽力解答～")
+            continue
+
+        print("\n助手：正在处理你的需求...\n")
+
+        # 现在可以安全使用 async for
+        async for stream in agent.astream(
+                {"messages": user_input},
+                stream_mode=["updates", "messages", "custom"],
+                subgraphs=True,
+                version="v2",
+                config={"configurable": {"thread_id": thread_id}}
+        ):
+
+            print(stream)
+            _, mode, chunks = stream
+            if mode == "updates":
+                print(f"[updates] {chunks}")
+
+            elif mode == "messages":
+                token, metadata = chunks
+                subagent_name = metadata.get('lc_agent_name', "main_agent")
+
+                if isinstance(token, AIMessageChunk):  # 默认回复 思考内容
+                    reasoning = [b for b in token.content_blocks if b["type"] == "reasoning"]
+                    text = [b for b in token.content_blocks if b["type"] == "text"]
+                    if reasoning:
+                        print(f"[thinking] {reasoning[0]['reasoning']}", end="")
+                    if text:
+                        print(text[0]["text"], end="")
+
+                elif isinstance(token, ToolMessageChunk):  # 工具返回
+                    print(f"[tool|{token.name}] {token.content}", end="")
+
+                elif isinstance(token, ToolMessage):  # 工具返回
+                    print(f"[tool|{token.name}] {token.content}", end="")
+                else:
+                    continue
+
+            elif mode == "custom":
+                print(f"[report] {chunks.get('delta', '')}", end="")
+
+        # if chunk["type"] == "messages":
+        #     token, metadata = chunk["data"]
+        #     if not isinstance(token, AIMessageChunk):
+        #         continue
+        #     reasoning = [b for b in token.content_blocks if b["type"] == "reasoning"]
+        #     text = [b for b in token.content_blocks if b["type"] == "text"]
+        #     if reasoning:
+        #         print(f"[thinking] {reasoning[0]['reasoning']}", end="")
+        #     if text:
+        #         print(text[0]["text"], end="")
+        # print(chunk)
+        # namespace, _, chunk = event
+        # token, metadata = chunk
+        # Identify source: "main" or the subagent namespace segment
+        # is_subagent = any(s.startswith("tools:") for s in namespace)
+
+        # source = next((s for s in namespace if s.startswith("tools:")), "main") if is_subagent else "main"
+
+        # if token.content_blocks:
+        #     if token.additional_kwargs.get("reasoning_content", None):  # 粗糙但常见判断
+        #         if not has_printed_thinking_header:
+        #             print("[思考过程]")
+        #             has_printed_thinking_header = True
+        #         print(token.content_blocks[0].get("reasoning", ""), end="", flush=True)
+        #     else:
+        #         if not has_printed_header:
+        #             print("[agent回答]")
+        #             has_printed_header = True
+        #         print(token.content_blocks[0].get("text", ""), end="", flush=True)
+        #
+        # # Tool call chunks (streaming tool invocations)
+        # if token.tool_call_chunks:
+        #     for tc in token.tool_call_chunks:
+        #         if tc.get("name"):
+        #             print(f"\n[{source}] Tool call: {tc['name']}")
+        #         # Args stream in chunks - write them incrementally
+        #         if tc.get("args"):
+        #             print(tc["args"], end="", flush=True)
+        #
+        # # Tool results
+        # if token.type == "tool":
+        #     print(f"\n[{source}] Tool result [{token.name}]: {str(token.content)[:150]}")
+        #
+        # # Regular AI content (skip tool call messages)
+        # if token.type == "ai" and token.content and not token.tool_call_chunks:
+        #     print(token.content, end="", flush=True)
+
+    #     if namespace:
+    #         print(f"[子代理: {namespace}]")
+    #     else:
+    #         print("[主助手]")
+    #     print(chunk)
+    #     print("-" * 50 + "\n")
+    #
+    #     chunk_list.append(str(chunk))
+    #
+    # if not chunk_list:
+    #     assistant_response = "抱歉，我暂时无法处理你的请求，请稍后再试。"
+    # else:
+    #     assistant_response = "\n".join(chunk_list)
+    #
+    # print(f"[最终完整回复]\n{assistant_response}\n" + "=" * 60 + "\n")
+
+
+# 启动方式改成：
+if __name__ == "__main__":
+    asyncio.run(continuous_chat())
--- a/src/server/deep_agent/tools/conversation_title_tool.py
+++ b/src/server/deep_agent/tools/conversation_title_tool.py
@@ -0,0 +1,27 @@
+from langchain_core.prompts import PromptTemplate
+
+from src.server.deep_agent.agents.init_llm import title_llm
+
+
+def conversation_title(full_conversation):
+    title_prompt = PromptTemplate(
+        input_variables=["full_conversation"],
+        template="""
+        请严格按照以下要求生成对话标题：
+        1. 标题长度：8-15个字，纯中文，无标点、无特殊符号、无换行
+        2. 标题内容：基于完整对话，精准概括核心主题（兼顾用户需求和助手回复）
+        3. 标题风格：自然口语化，符合中文表达习惯，不冗余
+        
+        完整对话内容：
+        {full_conversation}
+        
+        仅输出标题，不要输出任何额外解释、说明或标点符号。
+        """
+    )
+    title_chain = title_prompt | title_llm
+    response = title_chain.invoke({"full_conversation": full_conversation})
+    return response
+
+
+if __name__ == '__main__':
+    print(conversation_title("你好"))
--- a/src/server/deep_agent/tools/crawl_tool.py
+++ b/src/server/deep_agent/tools/crawl_tool.py
@@ -0,0 +1,191 @@
+import time
+import asyncio
+from typing import List, Dict, Any
+from urllib.parse import urlparse
+from pathlib import Path
+
+import uuid
+from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig, CacheMode
+from langchain_core.tools import tool
+
+# ─────────────────────────────────────
+# 路径配置
+# ─────────────────────────────────────
+
+TOOL_DIR = Path(__file__).resolve().parent
+PROJECT_ROOT = TOOL_DIR.parent
+
+# DeepAgents 推荐目录
+SAVE_DIR = PROJECT_ROOT / "agent_workspace" / "raw_data"
+SAVE_DIR.mkdir(parents=True, exist_ok=True)
+
+print(f"tool save : {str(PROJECT_ROOT / "agent_workspace")}")
+
+# ─────────────────────────────────────
+# Browser 配置
+# ─────────────────────────────────────
+
+browser_config = BrowserConfig(
+    headless=True,
+    verbose=False,
+    java_script_enabled=True,
+    user_agent=(
+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
+        "AppleWebKit/537.36 (KHTML, like Gecko) "
+        "Chrome/118.0 Safari/537.36"
+    ),
+)
+
+run_config = CrawlerRunConfig(
+    cache_mode=CacheMode.BYPASS,
+    word_count_threshold=5,
+    excluded_tags=["script", "style", "nav", "footer"],
+    remove_overlay_elements=True,
+    process_iframes=True,
+)
+
+
+# ─────────────────────────────────────
+# URL → 文件名
+# ─────────────────────────────────────
+
+def build_filename(url: str) -> str:
+    parsed = urlparse(url)
+
+    domain = parsed.netloc.replace("www.", "").replace(".", "_")
+    path_part = parsed.path.strip("/").replace("/", "_")[:50] or "index"
+
+    ts = int(time.time())
+    rand = uuid.uuid4().hex[:6]
+
+    return f"{ts}_{rand}_{domain}_{path_part}.md"
+
+
+# ─────────────────────────────────────
+# 单个 URL 抓取
+# ─────────────────────────────────────
+
+async def crawl_one(crawler, url: str, sem: asyncio.Semaphore) -> Dict[str, Any]:
+    async with sem:
+        try:
+            result = await crawler.arun(url=url, config=run_config)
+
+            if not result.success:
+                return {
+                    "url": url,
+                    "success": False,
+                    "error": f"status={getattr(result, 'status_code', 'unknown')}"
+                }
+
+            markdown = result.markdown or ""
+
+            if len(markdown) < 500:
+                return {
+                    "url": url,
+                    "success": False,
+                    "error": "content too short"
+                }
+
+            filename = build_filename(url)
+            filepath = SAVE_DIR / filename
+
+            header = (
+                f"<!-- Source: {url} -->\n"
+                f"<!-- Saved: {time.strftime('%Y-%m-%d %H:%M:%S')} -->\n\n"
+            )
+
+            with open(filepath, "w", encoding="utf-8") as f:
+                f.write(header + markdown)
+
+            return {
+                "url": url,
+                "success": True,
+                "file": str(filepath)
+            }
+
+        except Exception as e:
+            return {
+                "url": url,
+                "success": False,
+                "error": str(e)
+            }
+
+
+# ─────────────────────────────────────
+# Async 主逻辑
+# ─────────────────────────────────────
+
+async def _crawl4ai_batch(urls: List[str]) -> Dict[str, Any]:
+    urls = list(set(urls))  # 去重
+
+    if not urls:
+        return {"error": "no urls"}
+
+    sem = asyncio.Semaphore(5)  # 并发限制
+
+    async with AsyncWebCrawler(config=browser_config) as crawler:
+
+        tasks = [
+            crawl_one(crawler, url, sem)
+            for url in urls
+        ]
+
+        results = await asyncio.gather(*tasks)
+
+    success_files = []
+    summary = []
+
+    for r in results:
+
+        if r["success"]:
+            success_files.append(r["file"])
+            summary.append(f"✅ {r['url']}")
+        else:
+            summary.append(f"❌ {r['url']} ({r['error']})")
+
+    return {
+        "saved_files": success_files,
+        "count": len(success_files),
+        "summary": summary,
+    }
+
+
+# ─────────────────────────────────────
+# Tool（同步）
+# ─────────────────────────────────────
+@tool
+def crawl4ai_batch(urls: List[str]) -> str:
+    """
+    Batch crawl webpages and save their content as markdown files.
+
+    Args:
+        urls: List of webpage URLs to crawl.
+
+    Returns:
+        A summary of crawling results and saved file paths.
+    """
+
+    try:
+        result = asyncio.run(_crawl4ai_batch(urls))
+
+        if "error" in result:
+            return f"❌ Error: {result['error']}"
+
+        output = [
+            "### 批量抓取完成 ###",
+            f"成功保存文件: {result['count']}",
+            f"保存目录: {SAVE_DIR}",
+            "",
+            "抓取详情:"
+        ]
+
+        output.extend(result["summary"])
+
+        if result["saved_files"]:
+            output.append("\n可读取文件:")
+            output.extend(result["saved_files"])
+
+        return "\n".join(output)
+
+    except Exception as e:
+        return f"🚨 爬虫系统异常: {str(e)}"
--- a/src/server/deep_agent/tools/generate_furniture_sketch.py
+++ b/src/server/deep_agent/tools/generate_furniture_sketch.py
@@ -0,0 +1,94 @@
+import json
+import logging
+import uuid
+from google.oauth2 import service_account
+from langchain_core.tools import tool
+from google import genai
+from google.genai.types import GenerateContentConfig, Modality
+
+from minio import Minio
+
+from src.core.config import settings
+from src.server.utils.new_oss_client import oss_upload_image
+
+logger = logging.getLogger(__name__)
+# 初始化全局凭证和客户端
+creds = service_account.Credentials.from_service_account_file(
+    settings.GOOGLE_GENAI_USE_VERTEXAI,
+    scopes=["https://www.googleapis.com/auth/cloud-platform"],
+)
+
+minio_client = Minio(settings.MINIO_URL, access_key=settings.MINIO_ACCESS, secret_key=settings.MINIO_SECRET, secure=settings.MINIO_SECURE)
+client = genai.Client(
+    credentials=creds,
+    project=settings.GOOGLE_CLOUD_PROJECT,
+    location=settings.GOOGLE_CLOUD_LOCATION,
+    vertexai=True
+)
+
+
+@tool
+async def generate_furniture(prompt: str) -> str:
+    """
+    使用 Gemini 图像生成模型根据详细的英文提示词生成家具设计草图。
+    """
+    print(f"\n[系统日志] 正在调用 Nano Banana (Gemini Image Gen) ...")
+
+    try:
+        response = client.models.generate_content(
+            model="gemini-2.5-flash-image",
+            contents=(f"Generate a professional furniture design sketch: {prompt}"),
+            config=GenerateContentConfig(
+                response_modalities=[Modality.TEXT, Modality.IMAGE],
+            ),
+        )
+
+        image_bytes = None
+        for part in response.candidates[0].content.parts:
+            if part.inline_data:
+                image_bytes = part.inline_data.data
+                break
+
+        if not image_bytes:
+            return "未能生成图像数据。"
+        object_name = f"furniture/sketches/{uuid.uuid4()}.png"
+        bucket = "fida-test"  # 替换为你的 bucket 名称
+        # 3. 调用你的上传函数
+        upload_res = oss_upload_image(
+            oss_client=minio_client,
+            bucket=bucket,
+            object_name=object_name,
+            image_bytes=image_bytes
+        )
+
+        if upload_res:
+            # 4. 构造访问链接 (如果是私有 bucket，需使用 presigned_get_object)
+            # 这里简单示例为直接访问地址
+            image_url = f"{bucket}/{object_name}"
+            return json.dumps(
+                {
+                    "tool_name": "generate_furniture",
+                    "data": image_url,
+                    "tool_status": "success"
+                },
+                ensure_ascii=False
+            )
+        else:
+            return json.dumps(
+                {
+                    "tool_name": "generate_furniture",
+                    "data": "图片生成成功，但上传至存储服务器失败。",
+                    "tool_status": "error"
+                },
+                ensure_ascii=False
+            )
+    except Exception as e:
+        logger.warning(e)
+        return json.dumps(
+            {
+                "tool_name": "generate_furniture",
+                "data": f"绘图流程异常",
+                "tool_status": "error"
+            },
+            ensure_ascii=False
+        )
--- a/src/server/deep_agent/tools/report_generator_tool.py
+++ b/src/server/deep_agent/tools/report_generator_tool.py
@@ -0,0 +1,151 @@
+import os
+import json
+import re
+from typing import Optional, List, Dict
+from langchain_qwq import ChatQwen
+from langgraph.config import get_stream_writer
+from pydantic import BaseModel, Field
+from langchain_core.tools import tool
+from langchain_core.messages import SystemMessage, HumanMessage
+
+from src.core.config import settings
+
+# =========================
+# LLM 初始化
+# =========================
+
+
+llm = ChatQwen(
+    enable_thinking=False,
+    model="qwen3.5-flash",
+    temperature=0.2,
+    max_tokens=3_000,
+    timeout=None,
+    max_retries=2,
+    api_key=settings.QWEN_API_KEY)
+
+
+# =========================
+# Tool 输入 Schema
+# =========================
+
+class ReportInput(BaseModel):
+    report_topic: str = Field(
+        ...,
+        description="Main topic of the report, e.g. '2026 Sofa Design Trends'"
+    )
+    structured_data: List[Dict] = Field(
+        ...,
+        description="Structured retrieval result items"
+    )
+    language: Optional[str] = Field(
+        default="English",
+        description="Output language"
+    )
+
+
+# =========================
+# LangGraph Tool
+# =========================
+
+@tool("report_generator", args_schema=ReportInput)
+async def report_generator(
+        report_topic: str,
+        structured_data: List[Dict],
+        language: str = "English"
+) -> dict:
+    """
+    Generate a professional design/market report
+    directly from structured retrieval results.
+    """
+
+    writer = get_stream_writer()
+    if not structured_data:
+        error_msg = "Error: No structured data provided."
+        writer({"type": "report_error", "message": error_msg})
+        return error_msg
+
+    collected_data_str = json.dumps(
+        structured_data,
+        ensure_ascii=False,
+        indent=2
+    )
+
+    # =========================
+    # Prompt
+    # =========================
+
+    system_prompt = f"""
+    You are a professional design trend analyst.
+    
+    Generate a long, structured Markdown report.
+    
+    REQUIREMENTS:
+    
+    1. Follow MECE principle.
+    2. Embed images ONLY if they start with https://
+       using: ![alt](url)
+    3. Insert images inline.
+    4. Every key insight must cite source:
+       [Website Name](url)
+    5. Use Markdown headings.
+    6. Start directly with title.
+    7. Be detailed and analytical.
+    
+    Output Language: {language}
+    """
+
+    user_prompt = f"""
+    Topic: {report_topic}
+    
+    Input Data:
+    {collected_data_str}
+    """
+
+    # =========================
+    # 调用 LLM
+    # =========================
+    writer({"type": "report_start", "topic": report_topic, "language": language})
+
+    full_report = ""
+    try:
+        report_llm = llm.with_config(
+            callbacks=[]
+        )
+        async for chunk in report_llm.astream(
+                [
+                    SystemMessage(content=system_prompt),
+                    HumanMessage(content=user_prompt)
+                ]
+        ):
+            if chunk.content:  # Gemini 返回的 chunk.content
+                delta = chunk.content
+                full_report += delta
+                # return {"type": "report_delta", "delta": delta}
+                writer({"type": "report_delta", "delta": delta})  # ← 实时推送给前端
+        writer({"type": "report_stop", "topic": report_topic, "language": language})
+    except Exception as e:
+        error_msg = f"LLM generation failed: {str(e)}"
+        writer({"type": "report_error", "message": error_msg})
+        return error_msg
+
+    report_content = full_report.strip()
+
+    # =========================
+    # 保存报告
+    # =========================
+    output_dir = "workspace/reports"
+    os.makedirs(output_dir, exist_ok=True)
+
+    safe_topic = re.sub(r'[\\/*?:"<>|]', "", report_topic.replace(" ", "_"))
+    filename = f"{output_dir}/{safe_topic}.md"
+
+    try:
+        with open(filename, "w", encoding="utf-8") as f:
+            f.write(report_content)
+        writer({"type": "report_complete", "file_path": filename})
+    except Exception as e:
+        writer({"type": "report_save_warning", "message": str(e)})
+
+    # 返回完整内容（作为 tool result），同时正文已通过 delta 流式输出
+    return report_content + f"\n\n✅ Report saved to: {filename}"
--- a/src/server/deep_agent/tools/research_tool.py
+++ b/src/server/deep_agent/tools/research_tool.py
@@ -0,0 +1,67 @@
+import asyncio
+import json
+from datetime import datetime
+from typing import List, Set, Optional
+from langchain_core.tools import tool
+from tavily import TavilyClient
+
+from src.core.config import settings
+
+# 模拟配置加载
+TAVILY_API_KEY = settings.TAVILY_API_KEY
+
+
+@tool
+async def topic_research(topic: list[str], max_urls: int = 5) -> str:
+    """
+    深度调研工具。该工具会利用 Tavily 搜索引擎针对特定主题进行多维度搜索。
+    它会自动生成针对性的搜索词（包含年份和趋势），并返回去重后的高质量 URL 列表。
+    """
+    if not TAVILY_API_KEY:
+        return "❌ 错误: 未配置 TAVILY_API_KEY。"
+
+    client = TavilyClient(api_key=TAVILY_API_KEY)
+
+    # 1. 自动生成多维度搜索词 (在工具内部快速生成)
+
+    # 2. 并行执行搜索
+    async def perform_search(q: str):
+        # 使用 asyncio.to_thread 运行同步的 Tavily SDK
+        def sync_search():
+            try:
+                response = client.search(
+                    query=q,
+                    search_depth="advanced",
+                    max_results=5,
+                    include_answer=False
+                )
+                return response.get('results', [])
+            except Exception as e:
+                print(f"Search error: {e}")
+                return []
+
+        return await asyncio.to_thread(sync_search)
+
+    search_tasks = [perform_search(q) for q in topic]
+    search_results_list = await asyncio.gather(*search_tasks)
+
+    # 3. 结果去重与过滤
+    seen_urls: Set[str] = set()
+    final_urls = []
+
+    # 常见的非内容页面过滤
+    skip_extensions = ('.pdf', '.jpg', '.png', '.zip', '.exe')
+
+    for results in search_results_list:
+        for item in results:
+            url = item.get('url')
+            if url and url not in seen_urls:
+                if not url.lower().endswith(skip_extensions):
+                    seen_urls.add(url)
+                    final_urls.append(url)
+
+    # 4. 结果截断
+    selected_urls = final_urls[:max_urls]
+
+    # 返回 JSON 字符串，便于 Agent 下一步调用批量爬虫 (Crawl4ai)
+    return json.dumps(selected_urls, ensure_ascii=False)
--- a/src/server/deep_agent/tools/structured_retrieval_tool.py
+++ b/src/server/deep_agent/tools/structured_retrieval_tool.py
@@ -0,0 +1,225 @@
+import os
+import re
+import json
+from datetime import datetime
+from typing import List, Dict, Optional
+
+from pydantic import BaseModel, Field
+from langchain_core.tools import tool
+from langchain_core.documents import Document
+
+# RAG
+from langchain_community.vectorstores import FAISS
+from langchain_huggingface import HuggingFaceEmbeddings
+from sentence_transformers import CrossEncoder
+
+# =========================
+# 全局模型（单例）
+# =========================
+
+_EMBEDDING_MODEL = HuggingFaceEmbeddings(
+    model_name="sentence-transformers/all-MiniLM-L6-v2"
+)
+
+_RERANK_MODEL = CrossEncoder(
+    "cross-encoder/ms-marco-MiniLM-L-6-v2"
+)
+
+
+class StructuredRetrievalInput(BaseModel):
+    file_paths: List[str] = Field(..., description="List of local markdown file paths.")
+    query: str = Field(..., description="Extraction query")
+    source_url: Optional[str] = Field(None, description="Optional global source URL")
+
+
+@tool("structured_retrieval", args_schema=StructuredRetrievalInput)
+def structured_retrieval(
+        file_paths: List[str],
+        query: str,
+        source_url: Optional[str] = None
+) -> Dict:
+    """
+    Batch structured extraction from markdown files.
+    - Performs vector search + re-ranking
+    - Saves extracted structured data as JSON file to disk
+    - Returns ONLY summary (status, count, file path)
+    """
+
+    # ── 1. 收集所有文件內容 ──────────────────────────────────────
+    all_docs_pool: List[Document] = []
+
+    for path in file_paths:
+        if not os.path.exists(path) or not path.endswith((".md", ".markdown")):
+            continue
+
+        file_name = os.path.basename(path)
+
+        with open(path, "r", encoding="utf-8") as f:
+            content = f.read()
+
+        current_source = source_url or _extract_source_from_md(content) or "unknown"
+
+        sections = _split_markdown_by_headers(content)
+
+        for sec in sections:
+            all_docs_pool.append(
+                Document(
+                    page_content=sec,
+                    metadata={"source_url": current_source, "file_name": file_name}
+                )
+            )
+
+    if not all_docs_pool:
+        return {"status": "no_documents_found", "items_count": 0, "json_path": None}
+
+    # ── 2. Vector search ────────────────────────────────────────────
+    vector_store = FAISS.from_documents(all_docs_pool, _EMBEDDING_MODEL)
+    retrieved = vector_store.similarity_search(query, k=200)
+
+    # ── 3. 提取結構化片段 ──────────────────────────────────────────
+    structured_items = []
+
+    for doc in retrieved:
+        text = doc.page_content.strip()
+        if len(text) < 30:
+            continue
+
+        images = list(set(re.findall(r"!\[.*?\]\((.*?)\)", text)))
+
+        structured_items.append(
+            {
+                "text": text,
+                "images": images,
+                "source_url": doc.metadata.get("source_url"),
+                "file_name": doc.metadata.get("file_name")
+            }
+        )
+
+    # ── 4. Re-rank ──────────────────────────────────────────────────
+    if structured_items:
+        unique_items = {item["text"]: item for item in structured_items}.values()
+        pairs = [[query, item["text"]] for item in unique_items]
+        scores = _RERANK_MODEL.predict(pairs)
+
+        sorted_items = sorted(
+            zip(scores, unique_items),
+            key=lambda x: x[0],
+            reverse=True
+        )
+        top_items = [item for _, item in sorted_items[:50]]
+    else:
+        top_items = []
+
+    # ── 5. 寫入 JSON 文件 ──────────────────────────────────────────
+    if not top_items:
+        return {"status": "no_relevant_content", "items_count": 0, "json_path": None}
+
+    # 產生有意義的檔名
+    safe_query = re.sub(r'[^a-zA-Z0-9\u4e00-\u9fa5]', '_', query)[:40]
+    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+    json_filename = f"extracted_{safe_query}_{timestamp}.json"
+
+    # 建議的儲存目錄（與 crawl4ai_batch 對齊）
+    output_dir = os.path.join(os.path.dirname(file_paths[0]), "..", "extracted")
+    os.makedirs(output_dir, exist_ok=True)
+
+    json_path = os.path.join(output_dir, json_filename)
+
+    with open(json_path, "w", encoding="utf-8") as f:
+        json.dump(
+            {
+                "query": query,
+                "extracted_at": timestamp,
+                "item_count": len(top_items),
+                "items": top_items
+            },
+            f,
+            ensure_ascii=False,
+            indent=2
+        )
+
+    # ── 6. 只回傳摘要 ──────────────────────────────────────────────
+    return {
+        "status": "success",
+        "items_count": len(top_items),
+        "json_path": json_path,
+        "summary": f"已提取 {len(top_items)} 個高相關片段，儲存於 {json_path}"
+    }
+
+
+def _extract_source_from_md(content: str) -> Optional[str]:
+    match = re.search(r"<!--\s*Source:\s*(.*?)\s*-->", content)
+    return match.group(1).strip() if match else None
+
+
+# =========================
+# Markdown Header Split
+# =========================
+
+def _split_markdown_by_headers(
+        content: str,
+        max_chars: int = 2000,
+        overlap: int = 150,
+):
+    header_re = re.compile(
+        r'^(#{1,6})\s+(.+?)\s*$',
+        re.MULTILINE
+    )
+
+    matches = list(header_re.finditer(content))
+
+    if not matches:
+        return _chunk_text(content, max_chars, overlap)
+
+    sections = []
+
+    for i, m in enumerate(matches):
+        start = m.start()
+        end = (
+            matches[i + 1].start()
+            if i + 1 < len(matches)
+            else len(content)
+        )
+
+        block = content[start:end].strip()
+        if block:
+            sections.append(block)
+
+    final_sections = []
+
+    for s in sections:
+        if len(s) > max_chars:
+            final_sections.extend(
+                _chunk_text(s, max_chars, overlap)
+            )
+        else:
+            final_sections.append(s)
+
+    return final_sections
+
+
+def _chunk_text(
+        text: str,
+        max_chars: int = 2000,
+        overlap: int = 150
+):
+    text = text.strip()
+    if len(text) <= max_chars:
+        return [text]
+
+    chunks = []
+    start = 0
+
+    while start < len(text):
+        end = min(len(text), start + max_chars)
+        chunk = text[start:end].strip()
+
+        if chunk:
+            chunks.append(chunk)
+
+        if end == len(text):
+            break
+
+        start = max(0, end - overlap)
+
+    return chunks
--- a/src/server/deep_agent/tools/user_persona_tool.py
+++ b/src/server/deep_agent/tools/user_persona_tool.py
@@ -0,0 +1,57 @@
+from datetime import datetime
+
+from langchain_core.runnables import RunnableConfig
+from langchain_core.tools import tool
+from pymongo import MongoClient
+from src.core.config import MONGO_URI
+
+client = MongoClient(MONGO_URI)
+db = client["report_agent"]
+collection = db["user_profiles"]
+
+
+@tool
+def query_report_profile(config: RunnableConfig, ) -> dict:
+    """
+    查询用户报告画像
+    """
+    thread_id = config['configurable']['thread_id']
+    doc = collection.find_one({"thread_id": thread_id})
+
+    if not doc:
+        return {"profile": {}}
+
+    doc.pop("_id", None)
+    return doc
+
+
+@tool
+def update_report_profile(config: RunnableConfig, profile: dict) -> dict:
+    """
+    更新用户画像信息
+    """
+    thread_id = config['configurable']['thread_id']
+    collection.update_one(
+        {"thread_id": thread_id},
+        {
+            "$set": {
+                "profile": profile
+            }
+        },
+        upsert=True
+    )
+
+    return {"status": "success", "profile": profile}
+
+
+@tool
+def check_profile_complete(profile: dict) -> dict:
+    """
+    判断画像是否完整
+    """
+    required = ["style", "room_type", "budget"]
+    missing = [f for f in required if f not in profile]
+    return {
+        "complete": len(missing) == 0,
+        "missing_fields": missing
+    }