feat 接入report

2026-03-03 17:33:51 +08:00
parent 1ecb02d706
commit 1ade907828
23 changed files with 4079 additions and 516 deletions
--- a/src/server/agent/tools/research_tool.py
+++ b/src/server/agent/tools/research_tool.py
@@ -0,0 +1,74 @@
+import asyncio
+import json
+from datetime import datetime
+from typing import List, Set, Optional
+from langchain_core.tools import tool
+from tavily import TavilyClient
+
+from src.core.config import settings
+
+# 模拟配置加载
+TAVILY_API_KEY = settings.TAVILY_API_KEY
+
+
+@tool
+async def topic_research(topic: str, max_urls: int = 15) -> str:
+    """
+    深度调研工具。该工具会利用 Tavily 搜索引擎针对特定主题进行多维度搜索。
+    它会自动生成针对性的搜索词（包含年份和趋势），并返回去重后的高质量 URL 列表。
+    """
+    if not TAVILY_API_KEY:
+        return "❌ 错误: 未配置 TAVILY_API_KEY。"
+
+    client = TavilyClient(api_key=TAVILY_API_KEY)
+
+    # 1. 自动生成多维度搜索词 (在工具内部快速生成)
+    current_year = datetime.now().strftime("%Y")
+    queries = [
+        f"{topic} trends {current_year}",
+        f"{topic} market analysis {current_year}",
+        f"top selling {topic} styles {current_year}",
+        f"best {topic} materials and colors {current_year}"
+    ]
+
+    # 2. 并行执行搜索
+    async def perform_search(q: str):
+        # 使用 asyncio.to_thread 运行同步的 Tavily SDK
+        def sync_search():
+            try:
+                response = client.search(
+                    query=q,
+                    search_depth="advanced",
+                    max_results=5,
+                    include_answer=False
+                )
+                return response.get('results', [])
+            except Exception as e:
+                print(f"Search error: {e}")
+                return []
+
+        return await asyncio.to_thread(sync_search)
+
+    search_tasks = [perform_search(q) for q in queries]
+    search_results_list = await asyncio.gather(*search_tasks)
+
+    # 3. 结果去重与过滤
+    seen_urls: Set[str] = set()
+    final_urls = []
+
+    # 常见的非内容页面过滤
+    skip_extensions = ('.pdf', '.jpg', '.png', '.zip', '.exe')
+
+    for results in search_results_list:
+        for item in results:
+            url = item.get('url')
+            if url and url not in seen_urls:
+                if not url.lower().endswith(skip_extensions):
+                    seen_urls.add(url)
+                    final_urls.append(url)
+
+    # 4. 结果截断
+    selected_urls = final_urls[:max_urls]
+
+    # 返回 JSON 字符串，便于 Agent 下一步调用批量爬虫 (Crawl4ai)
+    return json.dumps(selected_urls, ensure_ascii=False)