feat 接入report
This commit is contained in:
74
src/server/agent/tools/research_tool.py
Normal file
74
src/server/agent/tools/research_tool.py
Normal file
@@ -0,0 +1,74 @@
|
||||
import asyncio
|
||||
import json
|
||||
from datetime import datetime
|
||||
from typing import List, Set, Optional
|
||||
from langchain_core.tools import tool
|
||||
from tavily import TavilyClient
|
||||
|
||||
from src.core.config import settings
|
||||
|
||||
# 模拟配置加载
|
||||
TAVILY_API_KEY = settings.TAVILY_API_KEY
|
||||
|
||||
|
||||
@tool
|
||||
async def topic_research(topic: str, max_urls: int = 15) -> str:
|
||||
"""
|
||||
深度调研工具。该工具会利用 Tavily 搜索引擎针对特定主题进行多维度搜索。
|
||||
它会自动生成针对性的搜索词(包含年份和趋势),并返回去重后的高质量 URL 列表。
|
||||
"""
|
||||
if not TAVILY_API_KEY:
|
||||
return "❌ 错误: 未配置 TAVILY_API_KEY。"
|
||||
|
||||
client = TavilyClient(api_key=TAVILY_API_KEY)
|
||||
|
||||
# 1. 自动生成多维度搜索词 (在工具内部快速生成)
|
||||
current_year = datetime.now().strftime("%Y")
|
||||
queries = [
|
||||
f"{topic} trends {current_year}",
|
||||
f"{topic} market analysis {current_year}",
|
||||
f"top selling {topic} styles {current_year}",
|
||||
f"best {topic} materials and colors {current_year}"
|
||||
]
|
||||
|
||||
# 2. 并行执行搜索
|
||||
async def perform_search(q: str):
|
||||
# 使用 asyncio.to_thread 运行同步的 Tavily SDK
|
||||
def sync_search():
|
||||
try:
|
||||
response = client.search(
|
||||
query=q,
|
||||
search_depth="advanced",
|
||||
max_results=5,
|
||||
include_answer=False
|
||||
)
|
||||
return response.get('results', [])
|
||||
except Exception as e:
|
||||
print(f"Search error: {e}")
|
||||
return []
|
||||
|
||||
return await asyncio.to_thread(sync_search)
|
||||
|
||||
search_tasks = [perform_search(q) for q in queries]
|
||||
search_results_list = await asyncio.gather(*search_tasks)
|
||||
|
||||
# 3. 结果去重与过滤
|
||||
seen_urls: Set[str] = set()
|
||||
final_urls = []
|
||||
|
||||
# 常见的非内容页面过滤
|
||||
skip_extensions = ('.pdf', '.jpg', '.png', '.zip', '.exe')
|
||||
|
||||
for results in search_results_list:
|
||||
for item in results:
|
||||
url = item.get('url')
|
||||
if url and url not in seen_urls:
|
||||
if not url.lower().endswith(skip_extensions):
|
||||
seen_urls.add(url)
|
||||
final_urls.append(url)
|
||||
|
||||
# 4. 结果截断
|
||||
selected_urls = final_urls[:max_urls]
|
||||
|
||||
# 返回 JSON 字符串,便于 Agent 下一步调用批量爬虫 (Crawl4ai)
|
||||
return json.dumps(selected_urls, ensure_ascii=False)
|
||||
Reference in New Issue
Block a user