移除旧版本chat
This commit is contained in:
@@ -1,169 +0,0 @@
|
|||||||
from pathlib import Path
|
|
||||||
from typing import AsyncGenerator, Dict, Any
|
|
||||||
from deepagents import create_deep_agent
|
|
||||||
from deepagents.backends import FilesystemBackend
|
|
||||||
from langchain_core.messages import HumanMessage, SystemMessage, ToolMessage, AIMessage, AIMessageChunk
|
|
||||||
from langchain_core.runnables import RunnableConfig
|
|
||||||
from langchain_qwq import ChatQwen
|
|
||||||
|
|
||||||
from src.core.config import settings
|
|
||||||
from src.server.agent.prompt import SYSTEM_PROMPT, visualizer_prompt, designer_prompt
|
|
||||||
from src.server.agent.state import AgentState
|
|
||||||
from src.server.agent.tools.generate_furniture_sketch import generate_furniture
|
|
||||||
from src.server.agent.tools.crawl_tool import crawl4ai_batch
|
|
||||||
from src.server.agent.tools.report_generator_tool import report_generator
|
|
||||||
from src.server.agent.tools.research_tool import topic_research
|
|
||||||
from src.server.agent.tools.structured_retrieval_tool import structured_retrieval
|
|
||||||
from src.server.agent.tools.terminate_tool import terminate
|
|
||||||
from src.server.agent.tools.user_persona_tool import manage_user_persona
|
|
||||||
from src.server.utils.generate_suggestion import generate_chat_suggestions
|
|
||||||
|
|
||||||
# 目前這個主程式檔案所在的目錄
|
|
||||||
MAIN_DIR = Path(__file__).resolve().parent
|
|
||||||
|
|
||||||
# 專案根目錄(因為 main.py 跟 tools/ 同級,所以 parent 就是根)
|
|
||||||
PROJECT_ROOT = MAIN_DIR
|
|
||||||
|
|
||||||
model = ChatQwen(
|
|
||||||
enable_thinking=False,
|
|
||||||
model="qwen3.5-flash",
|
|
||||||
max_tokens=3_000,
|
|
||||||
timeout=None,
|
|
||||||
max_retries=2,
|
|
||||||
api_key=settings.QWEN_API_KEY)
|
|
||||||
|
|
||||||
tools = [manage_user_persona, topic_research, crawl4ai_batch, structured_retrieval, report_generator, terminate]
|
|
||||||
research_agent = create_deep_agent(
|
|
||||||
model=model,
|
|
||||||
tools=tools,
|
|
||||||
system_prompt=SYSTEM_PROMPT,
|
|
||||||
backend=FilesystemBackend(
|
|
||||||
root_dir=str(PROJECT_ROOT / "agent_workspace"),
|
|
||||||
virtual_mode=False, # 重要:關掉虛擬模式 → 真的寫硬碟
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
# 辅助函数:根据配置动态获取 LLM
|
|
||||||
def get_model(config: RunnableConfig, streaming=False):
|
|
||||||
temp = config["configurable"].get("llm_temperature", 0.5)
|
|
||||||
return ChatQwen(
|
|
||||||
enable_thinking=False,
|
|
||||||
model="qwen3.5-flash",
|
|
||||||
max_tokens=3_000,
|
|
||||||
timeout=None,
|
|
||||||
max_retries=2,
|
|
||||||
temperature=temp,
|
|
||||||
api_key=settings.QWEN_API_KEY,
|
|
||||||
streaming=streaming
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
# --- 1. Designer Agent (设计顾问) ---
|
|
||||||
async def designer_node(state: AgentState, config: RunnableConfig):
|
|
||||||
"""负责细化设计需求,提供专业参数"""
|
|
||||||
model = get_model(config) # 获取带动态温度的模型
|
|
||||||
|
|
||||||
messages = state["messages"]
|
|
||||||
system_prompt = SystemMessage(content=designer_prompt)
|
|
||||||
should_suggest = len(state["messages"]) % 5 == 0
|
|
||||||
response = await model.ainvoke([system_prompt] + messages)
|
|
||||||
return {"messages": [response], "require_suggestion": should_suggest}
|
|
||||||
|
|
||||||
|
|
||||||
async def researcher_node(
|
|
||||||
state: AgentState,
|
|
||||||
config: RunnableConfig
|
|
||||||
) -> AsyncGenerator[Dict[str, Any], None]:
|
|
||||||
use_report = config["configurable"].get("use_report", False)
|
|
||||||
if not use_report:
|
|
||||||
yield {
|
|
||||||
"messages": [AIMessage(
|
|
||||||
content="深度报告功能未启用,请通过前端按钮触发。",
|
|
||||||
name="Researcher"
|
|
||||||
)],
|
|
||||||
"next": "Supervisor"
|
|
||||||
}
|
|
||||||
return
|
|
||||||
|
|
||||||
messages = state["messages"]
|
|
||||||
last_human = next((m for m in reversed(messages) if isinstance(m, HumanMessage)), None)
|
|
||||||
|
|
||||||
if not last_human:
|
|
||||||
yield {
|
|
||||||
"messages": [AIMessage(
|
|
||||||
content="深度研究节点:未找到有效的用户问题",
|
|
||||||
name="Researcher"
|
|
||||||
)],
|
|
||||||
"next": "Supervisor"
|
|
||||||
}
|
|
||||||
return
|
|
||||||
current_step = "正在启动深度报告生成..."
|
|
||||||
yield {
|
|
||||||
"messages": [AIMessage(
|
|
||||||
content="正在启动深度报告生成...",
|
|
||||||
name="Researcher",
|
|
||||||
additional_kwargs={
|
|
||||||
"current_step": current_step,
|
|
||||||
"streaming": True
|
|
||||||
}
|
|
||||||
)]
|
|
||||||
}
|
|
||||||
async for chunk in research_agent.astream(
|
|
||||||
{"messages": messages[-12:]},
|
|
||||||
config=config
|
|
||||||
):
|
|
||||||
if "messages" in chunk and isinstance(chunk["messages"], AIMessageChunk):
|
|
||||||
yield {
|
|
||||||
"messages": chunk["messages"], # 逐 token 追加
|
|
||||||
# 可以額外 yield 一些 metadata,例如
|
|
||||||
# "node": "Researcher",
|
|
||||||
# "status": "thinking"
|
|
||||||
}
|
|
||||||
else:
|
|
||||||
yield chunk
|
|
||||||
|
|
||||||
|
|
||||||
# --- 3. Visualizer Agent (视觉专家) ---
|
|
||||||
async def visualizer_node(state: AgentState, config: RunnableConfig):
|
|
||||||
"""负责将自然语言转化为绘图 Prompt 并调用绘图工具"""
|
|
||||||
model = get_model(config, streaming=False)
|
|
||||||
tools = [generate_furniture]
|
|
||||||
llm_with_tools = model.bind_tools(tools)
|
|
||||||
messages = state["messages"]
|
|
||||||
system_prompt = SystemMessage(content=visualizer_prompt)
|
|
||||||
response = await llm_with_tools.ainvoke([system_prompt] + messages)
|
|
||||||
|
|
||||||
if response.tool_calls:
|
|
||||||
tool_call = response.tool_calls[0]
|
|
||||||
if tool_call["name"] == "generate_furniture":
|
|
||||||
img_url = await generate_furniture.ainvoke(tool_call["args"])
|
|
||||||
return {
|
|
||||||
"messages": [
|
|
||||||
response,
|
|
||||||
ToolMessage(content=img_url, tool_call_id=tool_call["id"]) # 标记这是一个图片结果
|
|
||||||
]
|
|
||||||
}
|
|
||||||
|
|
||||||
return {"messages": [response]}
|
|
||||||
|
|
||||||
|
|
||||||
# --- 4. Suggester Agent (推荐对话专家) ---
|
|
||||||
async def suggester_node(state: AgentState, config: RunnableConfig):
|
|
||||||
"""专门生成追问建议的节点,作为流程终点"""
|
|
||||||
model = get_model(config)
|
|
||||||
messages = state["messages"]
|
|
||||||
|
|
||||||
# 只需要分析最近的对话
|
|
||||||
suggestions = await generate_chat_suggestions(messages, model)
|
|
||||||
|
|
||||||
# 返回一个特殊消息,前端通过解析 additional_kwargs 获取按钮内容
|
|
||||||
return {
|
|
||||||
"messages": [
|
|
||||||
AIMessage(
|
|
||||||
content="",
|
|
||||||
additional_kwargs={"suggestions": suggestions},
|
|
||||||
name="Suggester"
|
|
||||||
)
|
|
||||||
]
|
|
||||||
}
|
|
||||||
@@ -1,114 +0,0 @@
|
|||||||
import random
|
|
||||||
from typing import Literal
|
|
||||||
from langchain_core.messages import AIMessage
|
|
||||||
from langchain_core.runnables import RunnableConfig
|
|
||||||
from langchain_qwq import ChatQwen
|
|
||||||
from langgraph.checkpoint.serde.jsonplus import JsonPlusSerializer
|
|
||||||
from langgraph.graph import StateGraph, END, START
|
|
||||||
from pydantic import BaseModel
|
|
||||||
from pymongo import MongoClient
|
|
||||||
|
|
||||||
from src.core.config import MONGO_URI, settings
|
|
||||||
from src.server.agent.state import AgentState
|
|
||||||
from src.server.agent.agents import designer_node, researcher_node, visualizer_node, suggester_node
|
|
||||||
from langgraph.checkpoint.mongodb import MongoDBSaver
|
|
||||||
|
|
||||||
|
|
||||||
# --- Supervisor (路由逻辑) ---
|
|
||||||
# 定义路由的输出结构,强制 LLM 选择一个
|
|
||||||
class RouteResponse(BaseModel):
|
|
||||||
# 将 FINISH 替换或增加 Suggester
|
|
||||||
next: Literal["Designer", "Researcher", "Visualizer", "Suggester", "FINISH"]
|
|
||||||
|
|
||||||
|
|
||||||
llm_supervisor = ChatQwen(
|
|
||||||
model="qwen3.5-flash",
|
|
||||||
max_tokens=3_000,
|
|
||||||
timeout=None,
|
|
||||||
max_retries=2,
|
|
||||||
api_key=settings.QWEN_API_KEY)
|
|
||||||
|
|
||||||
|
|
||||||
def supervisor_node(state: AgentState, config: RunnableConfig):
|
|
||||||
configurable = config["configurable"]
|
|
||||||
use_report = configurable.get("use_report", False)
|
|
||||||
suggest_frequency = configurable.get("require_suggestion", 0.6) # 0.0~1.0
|
|
||||||
|
|
||||||
messages = state["messages"]
|
|
||||||
if not messages:
|
|
||||||
return {"next": "Suggester"}
|
|
||||||
|
|
||||||
# ── system prompt 保持不变 ──
|
|
||||||
system_prompt = f"""你是家具设计主管,负责分配任务。
|
|
||||||
当前设定:
|
|
||||||
- 是否需要市场研究报告:{'是' if use_report else '否'}
|
|
||||||
|
|
||||||
严格遵守以下规则:
|
|
||||||
- 如果 **不需要** 市场研究报告(use_report = False),**绝对不能** 选择 Researcher
|
|
||||||
- 只有在 **明确需要** 市场报告、竞争分析、材质趋势、价格区间等外部资讯时,才选择 Researcher,且 **必须** use_report = True
|
|
||||||
- 常见分配:
|
|
||||||
- 纯设计、风格、尺寸、材质建议 → Designer
|
|
||||||
- 需要生成图片、渲染 → Visualizer
|
|
||||||
- 需要产生建议按钮 → Suggester
|
|
||||||
- 需要市场报告 → Researcher(但只有 use_report=True 时才允许)
|
|
||||||
- 对话已完整、无需继续 → FINISH
|
|
||||||
|
|
||||||
用户最后说了什么?请根据实际需求决定下一步。
|
|
||||||
"""
|
|
||||||
|
|
||||||
chain = llm_supervisor.with_structured_output(RouteResponse)
|
|
||||||
decision = chain.invoke([{"role": "system", "content": system_prompt}] + messages)
|
|
||||||
next_node = decision.next # 防空默认 FINISH
|
|
||||||
|
|
||||||
# 安全阀:禁止非法选择 Researcher
|
|
||||||
if next_node == "Researcher" and not use_report:
|
|
||||||
print("警告:LLM 违规选择了 Researcher,已强制改为 Suggester 或 FINISH")
|
|
||||||
next_node = "Suggester" if state.get("require_suggestion", False) else "FINISH"
|
|
||||||
|
|
||||||
# 核心改动:只有 LLM 决定 FINISH 时,才掷骰子看是否插入 Suggester
|
|
||||||
if next_node == "FINISH":
|
|
||||||
# 满足概率条件 → 插入 Suggester
|
|
||||||
if suggest_frequency > 0 and random.random() < suggest_frequency:
|
|
||||||
next_node = "Suggester"
|
|
||||||
|
|
||||||
return {"next": next_node}
|
|
||||||
|
|
||||||
|
|
||||||
# --- 构建 Graph ---
|
|
||||||
workflow = StateGraph(AgentState)
|
|
||||||
|
|
||||||
workflow.add_node("Supervisor", supervisor_node)
|
|
||||||
workflow.add_node("Designer", designer_node)
|
|
||||||
workflow.add_node("Researcher", researcher_node)
|
|
||||||
workflow.add_node("Visualizer", visualizer_node)
|
|
||||||
workflow.add_node("Suggester", suggester_node)
|
|
||||||
workflow.add_edge(START, "Supervisor")
|
|
||||||
|
|
||||||
# 修改条件边映射
|
|
||||||
workflow.add_conditional_edges(
|
|
||||||
"Supervisor",
|
|
||||||
lambda state: state["next"],
|
|
||||||
{
|
|
||||||
"Designer": "Designer",
|
|
||||||
"Researcher": "Researcher",
|
|
||||||
"Visualizer": "Visualizer",
|
|
||||||
"Suggester": "Suggester", # 原本的 FINISH 现在指向 Suggester
|
|
||||||
"FINISH": END # 直接结束,不给建议
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
# 专家执行完依然回到 Supervisor
|
|
||||||
workflow.add_edge("Designer", "Supervisor")
|
|
||||||
workflow.add_edge("Researcher", "Supervisor")
|
|
||||||
workflow.add_edge("Visualizer", "Supervisor")
|
|
||||||
# 重点:Suggester 可以是整个流程的终点
|
|
||||||
workflow.add_edge("Suggester", END)
|
|
||||||
|
|
||||||
client = MongoClient(MONGO_URI)
|
|
||||||
checkpointer = MongoDBSaver(
|
|
||||||
client=client["furniture_agent_db"],
|
|
||||||
db_name="langgraph",
|
|
||||||
collection_name="checkpoints",
|
|
||||||
serde=JsonPlusSerializer(pickle_fallback=True), # ← 關鍵這一行
|
|
||||||
)
|
|
||||||
app = workflow.compile(checkpointer=checkpointer)
|
|
||||||
@@ -1,119 +0,0 @@
|
|||||||
SYSTEM_PROMPT = """
|
|
||||||
You are "TrendAgent" - a focused, efficient design trend analysis agent.
|
|
||||||
Your ONLY goal: produce one high-quality Markdown trend report per user request.
|
|
||||||
|
|
||||||
TOOL ORDER & DISCIPLINE IS MANDATORY - DO NOT INVENT STEPS
|
|
||||||
|
|
||||||
┌───────────────────────────────────────────────────────┐
|
|
||||||
│ Phase 0 - Context & Persona (必须先完成) │
|
|
||||||
└───────────────────────────────────────────────────────┘
|
|
||||||
|
|
||||||
Rules for Phase 0:
|
|
||||||
1. ALWAYS start with manage_user_persona(command="get")
|
|
||||||
2. If STATUS == "INCOMPLETE" or persona missing critical fields (Design Type, Style, Target Audience, Color Preference, etc.):
|
|
||||||
→ MUST call manage_user_persona(command="ask") to collect missing info
|
|
||||||
→ After user answers → call manage_user_persona(command="set", ...)
|
|
||||||
→ Loop until STATUS == "READY"
|
|
||||||
3. Only when STATUS == "READY" → proceed to Phase 1
|
|
||||||
4. Never assume or fabricate persona details
|
|
||||||
|
|
||||||
┌───────────────────────────────────────────────────────┐
|
|
||||||
│ Phase 1 - Planning (必须执行一次且只能一次) │
|
|
||||||
└───────────────────────────────────────────────────────┘
|
|
||||||
|
|
||||||
When persona READY and user gave a clear trend request:
|
|
||||||
1. Call write_todos EXACTLY ONCE with a strict plan containing:
|
|
||||||
- 3–6 concrete steps (numbered)
|
|
||||||
- Which URLs/topics to research
|
|
||||||
- Expected output of each major tool
|
|
||||||
- Final deliverable: one Markdown report
|
|
||||||
2. After receiving todos, you MUST follow this exact sequence unless impossible
|
|
||||||
3. Do NOT call any other tool until write_todos is done
|
|
||||||
|
|
||||||
┌───────────────────────────────────────────────────────┐
|
|
||||||
│ Phase 2 - Research & Collection │
|
|
||||||
└───────────────────────────────────────────────────────┘
|
|
||||||
|
|
||||||
Follow todos order:
|
|
||||||
- Use topic_research → get 3–8 high-quality URLs (add persona [Style] [Type] in query)
|
|
||||||
- Select best 3–6 URLs → call crawl4ai_batch ONCE with list
|
|
||||||
- Get file paths → call structured_retrieval ONCE with file_paths list
|
|
||||||
|
|
||||||
┌───────────────────────────────────────────────────────┐
|
|
||||||
│ Phase 3 - Synthesis & Delivery │
|
|
||||||
└───────────────────────────────────────────────────────┘
|
|
||||||
|
|
||||||
After structured_retrieval summary received:
|
|
||||||
- If extracted item count ≥ 8–12 AND covers main aspects in todos → ready to report
|
|
||||||
- Call report_generator ONCE (it reads local JSON/DB)
|
|
||||||
- After report_generator success → call terminate
|
|
||||||
- If data obviously insufficient → call topic_research again (max 1 extra round)
|
|
||||||
|
|
||||||
┌───────────────────────────────────────────────────────┐
|
|
||||||
│ HARD RULES - MUST OBEY │
|
|
||||||
└───────────────────────────────────────────────────────┘
|
|
||||||
|
|
||||||
• Never load full JSON/markdown into context - trust local storage
|
|
||||||
• Batch everything possible (crawl4ai_batch + structured_retrieval)
|
|
||||||
• Call tools in PHASE ORDER - no jumping, no repetition
|
|
||||||
• After report_generator → next action MUST be terminate
|
|
||||||
• If stuck > 4 steps without progress → call terminate with note "Incomplete - insufficient data"
|
|
||||||
• Never hallucinate trend data - base everything on retrieved content
|
|
||||||
• Report must start each section with **Conclusion First** insight
|
|
||||||
• Include [IMAGE_REF_xx] placeholders where visuals were extracted
|
|
||||||
|
|
||||||
Current status: Phase 0
|
|
||||||
"""
|
|
||||||
|
|
||||||
designer_prompt = """
|
|
||||||
你是家具设计团队的主管(Supervisor)。
|
|
||||||
请根据用户的意图,选择最合适的专家:
|
|
||||||
- Designer: 设计建议、参数细化、闲聊、问候。
|
|
||||||
- Visualizer: 绘图、看草图。
|
|
||||||
- Researcher: 市场报告、趋势。
|
|
||||||
|
|
||||||
只需输出专家名称。
|
|
||||||
"""
|
|
||||||
|
|
||||||
visualizer_prompt = """
|
|
||||||
你是专业的家具工业设计视觉专家,专长将文字描述转化为**清晰、结构明确的家具线稿(line drawing / technical sketch)**。
|
|
||||||
|
|
||||||
你的任务流程(必须严格遵守):
|
|
||||||
1. 先基于全部对话上下文,在内心生成一个详细的英文 Stable Diffusion Prompt(不要提前输出)。
|
|
||||||
2. 最后只调用 generate_furniture 工具,把刚才生成的 prompt 作为参数传入。
|
|
||||||
3. 工具调用完成后,自然结束,不要额外说话或解释。
|
|
||||||
|
|
||||||
Prompt 生成要求(仅供内部参考,必须全部做到):
|
|
||||||
- 全程使用英文
|
|
||||||
- 核心目标:生成专业的**单品家具线稿**(technical line drawing / clean sketch / industrial design sketch)
|
|
||||||
- 必须包含以下关键词(尽量靠前放置):
|
|
||||||
line drawing, technical sketch, clean line art, black and white, wireframe sketch, contour lines only, no shading, no color, no rendering, no texture, no shadow, no gradient
|
|
||||||
- 必须包含:
|
|
||||||
highly detailed linework, sharp precise lines, professional industrial design sketch, clear construction lines, accurate proportions, furniture design blueprint style
|
|
||||||
- 视角描述:isometric view 或 three-quarter view 或 front + side + top combination view(根据家具特性选择最适合展示结构的视角)
|
|
||||||
- 背景与整体风格:
|
|
||||||
pure white background, studio style, no people, no text, no logo, no dimension lines unless requested, no color fills
|
|
||||||
- 负面提示强制包含(negative prompt 部分要写强):
|
|
||||||
blurry, low quality, sketchy messy lines, thick marker, pencil shading, colored pencil, watercolor, rendering, photorealistic, 3d render, realistic lighting, shadow, gradient, depth of field, bokeh, cartoon, anime, deformed, extra limbs, bad anatomy
|
|
||||||
- 品质修饰词(放在结尾):
|
|
||||||
masterpiece, best quality, ultra detailed linework, professional CAD sketch style, clean and precise
|
|
||||||
|
|
||||||
现在开始执行:
|
|
||||||
- 先在内心完整构建符合以上全部要求的英文 prompt
|
|
||||||
- 然后在回复中先单独输出这个完整的 prompt(让用户能检查)
|
|
||||||
- 最后立即调用工具:generate_furniture,参数 prompt = 你刚才输出的完整内容
|
|
||||||
- 不要做其他任何说明或聊天
|
|
||||||
"""
|
|
||||||
designer_prompt = """
|
|
||||||
你是一位资深的家具设计师,经验丰富、审美一流、沟通温暖且高效。
|
|
||||||
你的核心目标:快速理解用户想法,并用最合适的方式推进设计。
|
|
||||||
你可以:
|
|
||||||
1. 用户描述模糊时,可以自然地询问或给出建议,但**绝不强迫补充**尺寸、材质、人体工学等细节,除非用户自己关心或需要明确这些参数。
|
|
||||||
2. 如果用户提到想看图、想出效果图、想画草图、想渲染等,**直接同意并推动**:
|
|
||||||
- 用一句话确认或赞美用户的想法
|
|
||||||
- 主动说“我这就帮你把当前设计转给视觉专家生成效果图/草图”
|
|
||||||
- 然后让 Visualizer 节点去处理(不需要你先写一大段细节描述)
|
|
||||||
3. 回复时像和懂设计的客户聊天一样:专业、亲切、有创意,偶尔带点热情或幽默,但始终围绕家具设计。
|
|
||||||
|
|
||||||
永远不要用“必须”“请先描述清楚”“按照流程”等强硬的流程化语言。
|
|
||||||
"""
|
|
||||||
@@ -1,74 +0,0 @@
|
|||||||
import asyncio
|
|
||||||
from langchain_core.messages import HumanMessage, AIMessage, ToolMessage
|
|
||||||
|
|
||||||
from src.server.agent.graph import app
|
|
||||||
|
|
||||||
|
|
||||||
async def async_main():
|
|
||||||
config = {"configurable": {"thread_id": "project_alpha"}}
|
|
||||||
|
|
||||||
print("測試模式已啟動 (輸入 'exit' 離開,'history' 查看歷史並回溯)")
|
|
||||||
use_report = input("是否启用深度报告?(y/n): ").lower() == 'y'
|
|
||||||
while True:
|
|
||||||
user_input = input("\n👤 輸入訊息: ").strip()
|
|
||||||
|
|
||||||
if user_input.lower() in ["exit", "quit", "結束"]:
|
|
||||||
print("測試結束")
|
|
||||||
break
|
|
||||||
|
|
||||||
if user_input.lower() == "history":
|
|
||||||
# 你的 history 邏輯(這裡不變)
|
|
||||||
print("\n=== 歷史檢查點 ===")
|
|
||||||
states = [s async for s in app.aget_state_history(config)]
|
|
||||||
for idx, state_tuple in enumerate(states):
|
|
||||||
cp_id = state_tuple.config["configurable"].get("checkpoint_id", "N/A")
|
|
||||||
messages = state_tuple.values.get("messages", [])
|
|
||||||
if messages:
|
|
||||||
last_msg = messages[-1]
|
|
||||||
msg_type = type(last_msg).__name__
|
|
||||||
content_preview = str(last_msg.content)[:60].replace("\n", " ")
|
|
||||||
node = getattr(last_msg, "name", "無節點名")
|
|
||||||
print(f"[{idx}] {cp_id[:12]}... | {node} | {msg_type} | {content_preview}...")
|
|
||||||
target = input("\n輸入要回溯的 checkpoint ID (或 Enter 取消): ").strip()
|
|
||||||
if target:
|
|
||||||
config["configurable"]["checkpoint_id"] = target
|
|
||||||
print(f"已切換到 checkpoint {target}")
|
|
||||||
continue
|
|
||||||
|
|
||||||
if not user_input:
|
|
||||||
continue
|
|
||||||
|
|
||||||
print("\n🤖 開始處理...")
|
|
||||||
|
|
||||||
try:
|
|
||||||
last_output = ""
|
|
||||||
async for event in app.astream(
|
|
||||||
{"messages": [HumanMessage(content=user_input)]},
|
|
||||||
config,
|
|
||||||
stream_mode="updates"
|
|
||||||
):
|
|
||||||
for node_name, update in event.items():
|
|
||||||
if "messages" in update:
|
|
||||||
for msg in update["messages"]:
|
|
||||||
if isinstance(msg, AIMessage):
|
|
||||||
content = msg.content.strip()
|
|
||||||
if content and content != last_output:
|
|
||||||
print(f"\n[{node_name}] {msg.name or 'AI'}: {content}")
|
|
||||||
last_output = content
|
|
||||||
elif isinstance(msg, ToolMessage):
|
|
||||||
print(f" → 工具 {msg.name}: {msg.content[:120]}{'...' if len(msg.content) > 120 else ''}")
|
|
||||||
else:
|
|
||||||
print(f" ({node_name}) {type(msg).__name__}")
|
|
||||||
|
|
||||||
final_state = await app.aget_state(config)
|
|
||||||
final_msg = final_state.values["messages"][-1]
|
|
||||||
print(f"\n=== 完成 ===\n最終訊息: {final_msg.content[:300]}{'...' if len(final_msg.content) > 300 else ''}")
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
print(f"錯誤:{str(e)}")
|
|
||||||
import traceback
|
|
||||||
traceback.print_exc()
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
asyncio.run(async_main())
|
|
||||||
@@ -1,11 +0,0 @@
|
|||||||
import operator
|
|
||||||
from typing import Annotated, Sequence, TypedDict, Union, Optional
|
|
||||||
from langchain_core.messages import BaseMessage
|
|
||||||
|
|
||||||
|
|
||||||
class AgentState(TypedDict):
|
|
||||||
# messages 存储完整的对话历史,operator.add 表示新消息是追加而不是覆盖
|
|
||||||
messages: Annotated[Sequence[BaseMessage], operator.add]
|
|
||||||
# next 存储 Supervisor 决定的下一步是谁
|
|
||||||
next: str
|
|
||||||
require_suggestion: bool # 是否需要建议按钮
|
|
||||||
@@ -1,118 +0,0 @@
|
|||||||
import time
|
|
||||||
import asyncio
|
|
||||||
from typing import List
|
|
||||||
from urllib.parse import urlparse
|
|
||||||
from pathlib import Path
|
|
||||||
from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig, CacheMode
|
|
||||||
from langchain_core.tools import tool
|
|
||||||
|
|
||||||
# ─────────────── 重要:計算路徑 ───────────────
|
|
||||||
# 目前這個檔案 (crawl4ai_batch.py) 所在的目錄
|
|
||||||
TOOL_DIR = Path(__file__).resolve().parent
|
|
||||||
|
|
||||||
# 專案根目錄(假設 tools 資料夾與主程式同級)
|
|
||||||
PROJECT_ROOT = TOOL_DIR.parent
|
|
||||||
|
|
||||||
# 儲存爬取結果的目錄(你可以自由決定放在哪裡)
|
|
||||||
# 建議選項 A:放在專案根目錄下的 workspace/raw_data
|
|
||||||
SAVE_DIR = PROJECT_ROOT / "workspace" / "raw_data"
|
|
||||||
|
|
||||||
# 建議選項 B:如果你打算讓 deep agent 直接讀取,建議放在 agent_workspace 底下
|
|
||||||
# SAVE_DIR = PROJECT_ROOT / "agent_workspace" / "raw_data"
|
|
||||||
|
|
||||||
# 確保目錄存在
|
|
||||||
SAVE_DIR.mkdir(parents=True, exist_ok=True)
|
|
||||||
|
|
||||||
|
|
||||||
# ────────────────────────────────────────────────
|
|
||||||
|
|
||||||
|
|
||||||
@tool
|
|
||||||
async def crawl4ai_batch(urls: List[str]) -> str:
|
|
||||||
"""
|
|
||||||
高性能网页爬虫,支持并行处理多个 URL。
|
|
||||||
爬取后的 Markdown 内容将保存到本地 workspace/raw_data 目录中。
|
|
||||||
返回执行结果摘要和保存的文件路径列表。
|
|
||||||
"""
|
|
||||||
if not urls:
|
|
||||||
return "❌ 错误: 未提供任何 URL。"
|
|
||||||
|
|
||||||
# print(f"🕷️ 正在并行爬取 {len(urls)} 个 URL...")
|
|
||||||
# print(f"儲存目錄: {SAVE_DIR}")
|
|
||||||
|
|
||||||
browser_config = BrowserConfig(
|
|
||||||
headless=True,
|
|
||||||
verbose=False,
|
|
||||||
java_script_enabled=True,
|
|
||||||
user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
|
||||||
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
|
||||||
"Chrome/118.0.5993.118 Safari/537.36",
|
|
||||||
proxy=None, # 可选,如果需要代理填 "http://user:pass@ip:port"
|
|
||||||
)
|
|
||||||
|
|
||||||
run_config = CrawlerRunConfig(
|
|
||||||
cache_mode=CacheMode.BYPASS,
|
|
||||||
word_count_threshold=5,
|
|
||||||
excluded_tags=["script", "style", "nav", "footer"],
|
|
||||||
remove_overlay_elements=True,
|
|
||||||
process_iframes=True,
|
|
||||||
)
|
|
||||||
|
|
||||||
results_summary = []
|
|
||||||
saved_files = []
|
|
||||||
|
|
||||||
try:
|
|
||||||
async with AsyncWebCrawler(config=browser_config) as crawler:
|
|
||||||
tasks = [crawler.arun(url=url, config=run_config) for url in urls]
|
|
||||||
crawl_results = await asyncio.gather(*tasks, return_exceptions=True)
|
|
||||||
|
|
||||||
for i, result in enumerate(crawl_results):
|
|
||||||
url = urls[i]
|
|
||||||
|
|
||||||
if isinstance(result, Exception):
|
|
||||||
results_summary.append(f"❌ 抓取失败 {url}: {str(result)}")
|
|
||||||
continue
|
|
||||||
|
|
||||||
if result.success:
|
|
||||||
markdown_content = result.markdown or ""
|
|
||||||
|
|
||||||
if len(markdown_content) < 500:
|
|
||||||
results_summary.append(f"⏩ 跳过 {url} (内容过短)")
|
|
||||||
continue
|
|
||||||
|
|
||||||
# 生成檔名
|
|
||||||
parsed = urlparse(url)
|
|
||||||
domain = parsed.netloc.replace("www.", "").replace(".", "_")
|
|
||||||
path_part = parsed.path.strip("/").replace("/", "_")[:50] or "index"
|
|
||||||
filename = f"{int(time.time())}_{domain}_{path_part}.md"
|
|
||||||
|
|
||||||
# 完整檔案路徑
|
|
||||||
filepath = SAVE_DIR / filename
|
|
||||||
|
|
||||||
# 寫入檔案
|
|
||||||
with open(filepath, "w", encoding="utf-8") as f:
|
|
||||||
header = f"<!-- Source: {url} -->\n<!-- Saved: {time.strftime('%Y-%m-%d %H:%M:%S')} -->\n\n"
|
|
||||||
f.write(header + markdown_content)
|
|
||||||
|
|
||||||
saved_files.append(str(filepath)) # 建議轉成字串
|
|
||||||
results_summary.append(f"✅ 成功: {url} → {filepath}")
|
|
||||||
|
|
||||||
else:
|
|
||||||
status = getattr(result, 'status_code', '未知错误')
|
|
||||||
results_summary.append(f"❌ 失败: {url} (状态码: {status})")
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
return f"🚨 爬虫系统崩溃: {str(e)}"
|
|
||||||
|
|
||||||
# 回傳給 agent 的結果
|
|
||||||
final_output = (
|
|
||||||
f"### 批量抓取完成 ###\n"
|
|
||||||
f"已成功保存 {len(saved_files)} 个文件。\n"
|
|
||||||
f"儲存目錄: {SAVE_DIR}\n"
|
|
||||||
f"详情:\n" + "\n".join(results_summary)
|
|
||||||
)
|
|
||||||
|
|
||||||
if saved_files:
|
|
||||||
final_output += "\n\n已保存的文件列表(可供後續讀取):\n" + "\n".join(saved_files)
|
|
||||||
|
|
||||||
return final_output
|
|
||||||
@@ -1,69 +0,0 @@
|
|||||||
import uuid
|
|
||||||
from google.oauth2 import service_account
|
|
||||||
from langchain_core.tools import tool
|
|
||||||
from google import genai
|
|
||||||
from google.genai.types import GenerateContentConfig, Modality
|
|
||||||
|
|
||||||
from minio import Minio
|
|
||||||
|
|
||||||
from src.core.config import settings
|
|
||||||
from src.server.utils.new_oss_client import oss_upload_image
|
|
||||||
|
|
||||||
# 初始化全局凭证和客户端
|
|
||||||
creds = service_account.Credentials.from_service_account_file(
|
|
||||||
settings.GOOGLE_GENAI_USE_VERTEXAI,
|
|
||||||
scopes=["https://www.googleapis.com/auth/cloud-platform"],
|
|
||||||
)
|
|
||||||
|
|
||||||
minio_client = Minio(settings.MINIO_URL, access_key=settings.MINIO_ACCESS, secret_key=settings.MINIO_SECRET, secure=settings.MINIO_SECURE)
|
|
||||||
client = genai.Client(
|
|
||||||
credentials=creds,
|
|
||||||
project=settings.GOOGLE_CLOUD_PROJECT,
|
|
||||||
location=settings.GOOGLE_CLOUD_LOCATION,
|
|
||||||
vertexai=True
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
@tool
|
|
||||||
def generate_furniture(prompt: str) -> str:
|
|
||||||
"""
|
|
||||||
使用 Gemini 图像生成模型根据详细的英文提示词生成家具设计草图。
|
|
||||||
"""
|
|
||||||
print(f"\n[系统日志] 正在调用 Nano Banana (Gemini Image Gen) ...")
|
|
||||||
|
|
||||||
try:
|
|
||||||
response = client.models.generate_content(
|
|
||||||
model="gemini-2.5-flash-image",
|
|
||||||
contents=(f"Generate a professional furniture design sketch: {prompt}"),
|
|
||||||
config=GenerateContentConfig(
|
|
||||||
response_modalities=[Modality.TEXT, Modality.IMAGE],
|
|
||||||
),
|
|
||||||
)
|
|
||||||
|
|
||||||
image_bytes = None
|
|
||||||
for part in response.candidates[0].content.parts:
|
|
||||||
if part.inline_data:
|
|
||||||
image_bytes = part.inline_data.data
|
|
||||||
break
|
|
||||||
|
|
||||||
if not image_bytes:
|
|
||||||
return "未能生成图像数据。"
|
|
||||||
object_name = f"furniture/sketches/{uuid.uuid4()}.png"
|
|
||||||
bucket = "fida-test" # 替换为你的 bucket 名称
|
|
||||||
# 3. 调用你的上传函数
|
|
||||||
upload_res = oss_upload_image(
|
|
||||||
oss_client=minio_client,
|
|
||||||
bucket=bucket,
|
|
||||||
object_name=object_name,
|
|
||||||
image_bytes=image_bytes
|
|
||||||
)
|
|
||||||
|
|
||||||
if upload_res:
|
|
||||||
# 4. 构造访问链接 (如果是私有 bucket,需使用 presigned_get_object)
|
|
||||||
# 这里简单示例为直接访问地址
|
|
||||||
image_url = f"{bucket}/{object_name}"
|
|
||||||
return image_url
|
|
||||||
else:
|
|
||||||
return "图片生成成功,但上传至存储服务器失败。"
|
|
||||||
except Exception as e:
|
|
||||||
return f"绘图流程异常: {str(e)}"
|
|
||||||
@@ -1,36 +0,0 @@
|
|||||||
import os
|
|
||||||
from langchain_core.tools import tool
|
|
||||||
|
|
||||||
|
|
||||||
@tool
|
|
||||||
def read_file(file_path: str) -> str:
|
|
||||||
"""
|
|
||||||
读取本地文件的万能工具。支持绝对路径和相对路径。
|
|
||||||
"""
|
|
||||||
# 1. 极端清洗:去掉 Agent 可能误加的引号、空格或转义符
|
|
||||||
path = file_path.strip().strip("'").strip('"').replace("\\", "/")
|
|
||||||
|
|
||||||
# 2. 打印当前环境真相(在你的 Python 控制台可见)
|
|
||||||
print(f"\n--- 🛠️ READ_FILE 调试信息 ---")
|
|
||||||
print(f"待读路径: {path}")
|
|
||||||
print(f"当前工作目录 (CWD): {os.getcwd()}")
|
|
||||||
print(f"是否存在: {os.path.exists(path)}")
|
|
||||||
|
|
||||||
# 3. 尝试直接读取(跳过任何沙箱逻辑)
|
|
||||||
try:
|
|
||||||
# 如果是相对路径,尝试转为绝对路径再读
|
|
||||||
abs_path = os.path.abspath(path)
|
|
||||||
if os.path.exists(abs_path):
|
|
||||||
with open(abs_path, 'r', encoding='utf-8') as f:
|
|
||||||
content = f.read()
|
|
||||||
return content
|
|
||||||
else:
|
|
||||||
# 如果读不到,列出父目录内容作为线索
|
|
||||||
parent = os.path.dirname(abs_path)
|
|
||||||
if os.path.exists(parent):
|
|
||||||
files = os.listdir(parent)
|
|
||||||
return f"错误:文件不存在。该目录下现有的文件有: {files[:5]}..."
|
|
||||||
return f"错误:路径不存在,且连父目录 {parent} 都找不到。"
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
return f"读取失败,系统异常: {str(e)}"
|
|
||||||
@@ -1,144 +0,0 @@
|
|||||||
import os
|
|
||||||
import json
|
|
||||||
import re
|
|
||||||
from typing import Optional, List, Dict
|
|
||||||
from langchain_qwq import ChatQwen
|
|
||||||
from langgraph.config import get_stream_writer
|
|
||||||
from pydantic import BaseModel, Field
|
|
||||||
from langchain_core.tools import tool
|
|
||||||
from langchain_core.messages import SystemMessage, HumanMessage
|
|
||||||
|
|
||||||
from src.core.config import settings
|
|
||||||
|
|
||||||
# =========================
|
|
||||||
# LLM 初始化
|
|
||||||
# =========================
|
|
||||||
|
|
||||||
|
|
||||||
llm = ChatQwen(
|
|
||||||
enable_thinking=False,
|
|
||||||
model="qwen3.5-flash",
|
|
||||||
temperature=0.2,
|
|
||||||
max_tokens=3_000,
|
|
||||||
timeout=None,
|
|
||||||
max_retries=2,
|
|
||||||
api_key=settings.QWEN_API_KEY)
|
|
||||||
|
|
||||||
|
|
||||||
# =========================
|
|
||||||
# Tool 输入 Schema
|
|
||||||
# =========================
|
|
||||||
|
|
||||||
class ReportInput(BaseModel):
|
|
||||||
report_topic: str = Field(
|
|
||||||
...,
|
|
||||||
description="Main topic of the report, e.g. '2026 Sofa Design Trends'"
|
|
||||||
)
|
|
||||||
structured_data: List[Dict] = Field(
|
|
||||||
...,
|
|
||||||
description="Structured retrieval result items"
|
|
||||||
)
|
|
||||||
language: Optional[str] = Field(
|
|
||||||
default="English",
|
|
||||||
description="Output language"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
# =========================
|
|
||||||
# LangGraph Tool
|
|
||||||
# =========================
|
|
||||||
|
|
||||||
@tool("report_generator", args_schema=ReportInput)
|
|
||||||
def report_generator(
|
|
||||||
report_topic: str,
|
|
||||||
structured_data: List[Dict],
|
|
||||||
language: str = "English"
|
|
||||||
) -> dict:
|
|
||||||
"""
|
|
||||||
Generate a professional design/market report
|
|
||||||
directly from structured retrieval results.
|
|
||||||
"""
|
|
||||||
|
|
||||||
writer = get_stream_writer()
|
|
||||||
if not structured_data:
|
|
||||||
error_msg = "Error: No structured data provided."
|
|
||||||
writer({"type": "report_error", "message": error_msg})
|
|
||||||
return error_msg
|
|
||||||
|
|
||||||
collected_data_str = json.dumps(
|
|
||||||
structured_data,
|
|
||||||
ensure_ascii=False,
|
|
||||||
indent=2
|
|
||||||
)
|
|
||||||
|
|
||||||
# =========================
|
|
||||||
# Prompt
|
|
||||||
# =========================
|
|
||||||
|
|
||||||
system_prompt = f"""
|
|
||||||
You are a professional design trend analyst.
|
|
||||||
|
|
||||||
Generate a long, structured Markdown report.
|
|
||||||
|
|
||||||
REQUIREMENTS:
|
|
||||||
|
|
||||||
1. Follow MECE principle.
|
|
||||||
2. Embed images ONLY if they start with https://
|
|
||||||
using: 
|
|
||||||
3. Insert images inline.
|
|
||||||
4. Every key insight must cite source:
|
|
||||||
[Website Name](url)
|
|
||||||
5. Use Markdown headings.
|
|
||||||
6. Start directly with title.
|
|
||||||
7. Be detailed and analytical.
|
|
||||||
|
|
||||||
Output Language: {language}
|
|
||||||
"""
|
|
||||||
|
|
||||||
user_prompt = f"""
|
|
||||||
Topic: {report_topic}
|
|
||||||
|
|
||||||
Input Data:
|
|
||||||
{collected_data_str}
|
|
||||||
"""
|
|
||||||
|
|
||||||
# =========================
|
|
||||||
# 调用 LLM
|
|
||||||
# =========================
|
|
||||||
writer({"type": "report_start", "topic": report_topic, "language": language})
|
|
||||||
|
|
||||||
full_report = ""
|
|
||||||
try:
|
|
||||||
for chunk in llm.stream([
|
|
||||||
SystemMessage(content=system_prompt),
|
|
||||||
HumanMessage(content=user_prompt)
|
|
||||||
]):
|
|
||||||
if chunk.content: # Gemini 返回的 chunk.content
|
|
||||||
delta = chunk.content
|
|
||||||
full_report += delta
|
|
||||||
writer({"type": "report_delta", "delta": delta}) # ← 实时推送给前端
|
|
||||||
except Exception as e:
|
|
||||||
error_msg = f"LLM generation failed: {str(e)}"
|
|
||||||
writer({"type": "report_error", "message": error_msg})
|
|
||||||
return error_msg
|
|
||||||
|
|
||||||
report_content = full_report.strip()
|
|
||||||
|
|
||||||
# =========================
|
|
||||||
# 保存报告
|
|
||||||
# =========================
|
|
||||||
output_dir = "workspace/reports"
|
|
||||||
os.makedirs(output_dir, exist_ok=True)
|
|
||||||
|
|
||||||
safe_topic = re.sub(r'[\\/*?:"<>|]', "", report_topic.replace(" ", "_"))
|
|
||||||
filename = f"{output_dir}/{safe_topic}.md"
|
|
||||||
|
|
||||||
try:
|
|
||||||
with open(filename, "w", encoding="utf-8") as f:
|
|
||||||
f.write(report_content)
|
|
||||||
writer({"type": "report_complete", "file_path": filename})
|
|
||||||
except Exception as e:
|
|
||||||
writer({"type": "report_save_warning", "message": str(e)})
|
|
||||||
|
|
||||||
# 返回完整内容(作为 tool result),同时正文已通过 delta 流式输出
|
|
||||||
return report_content + f"\n\n✅ Report saved to: {filename}"
|
|
||||||
@@ -1,74 +0,0 @@
|
|||||||
import asyncio
|
|
||||||
import json
|
|
||||||
from datetime import datetime
|
|
||||||
from typing import List, Set, Optional
|
|
||||||
from langchain_core.tools import tool
|
|
||||||
from tavily import TavilyClient
|
|
||||||
|
|
||||||
from src.core.config import settings
|
|
||||||
|
|
||||||
# 模拟配置加载
|
|
||||||
TAVILY_API_KEY = settings.TAVILY_API_KEY
|
|
||||||
|
|
||||||
|
|
||||||
@tool
|
|
||||||
async def topic_research(topic: str, max_urls: int = 15) -> str:
|
|
||||||
"""
|
|
||||||
深度调研工具。该工具会利用 Tavily 搜索引擎针对特定主题进行多维度搜索。
|
|
||||||
它会自动生成针对性的搜索词(包含年份和趋势),并返回去重后的高质量 URL 列表。
|
|
||||||
"""
|
|
||||||
if not TAVILY_API_KEY:
|
|
||||||
return "❌ 错误: 未配置 TAVILY_API_KEY。"
|
|
||||||
|
|
||||||
client = TavilyClient(api_key=TAVILY_API_KEY)
|
|
||||||
|
|
||||||
# 1. 自动生成多维度搜索词 (在工具内部快速生成)
|
|
||||||
current_year = datetime.now().strftime("%Y")
|
|
||||||
queries = [
|
|
||||||
f"{topic} trends {current_year}",
|
|
||||||
f"{topic} market analysis {current_year}",
|
|
||||||
f"top selling {topic} styles {current_year}",
|
|
||||||
f"best {topic} materials and colors {current_year}"
|
|
||||||
]
|
|
||||||
|
|
||||||
# 2. 并行执行搜索
|
|
||||||
async def perform_search(q: str):
|
|
||||||
# 使用 asyncio.to_thread 运行同步的 Tavily SDK
|
|
||||||
def sync_search():
|
|
||||||
try:
|
|
||||||
response = client.search(
|
|
||||||
query=q,
|
|
||||||
search_depth="advanced",
|
|
||||||
max_results=5,
|
|
||||||
include_answer=False
|
|
||||||
)
|
|
||||||
return response.get('results', [])
|
|
||||||
except Exception as e:
|
|
||||||
print(f"Search error: {e}")
|
|
||||||
return []
|
|
||||||
|
|
||||||
return await asyncio.to_thread(sync_search)
|
|
||||||
|
|
||||||
search_tasks = [perform_search(q) for q in queries]
|
|
||||||
search_results_list = await asyncio.gather(*search_tasks)
|
|
||||||
|
|
||||||
# 3. 结果去重与过滤
|
|
||||||
seen_urls: Set[str] = set()
|
|
||||||
final_urls = []
|
|
||||||
|
|
||||||
# 常见的非内容页面过滤
|
|
||||||
skip_extensions = ('.pdf', '.jpg', '.png', '.zip', '.exe')
|
|
||||||
|
|
||||||
for results in search_results_list:
|
|
||||||
for item in results:
|
|
||||||
url = item.get('url')
|
|
||||||
if url and url not in seen_urls:
|
|
||||||
if not url.lower().endswith(skip_extensions):
|
|
||||||
seen_urls.add(url)
|
|
||||||
final_urls.append(url)
|
|
||||||
|
|
||||||
# 4. 结果截断
|
|
||||||
selected_urls = final_urls[:max_urls]
|
|
||||||
|
|
||||||
# 返回 JSON 字符串,便于 Agent 下一步调用批量爬虫 (Crawl4ai)
|
|
||||||
return json.dumps(selected_urls, ensure_ascii=False)
|
|
||||||
@@ -1,27 +0,0 @@
|
|||||||
import os
|
|
||||||
from langchain_core.tools import tool
|
|
||||||
|
|
||||||
# 定义本地保存路径
|
|
||||||
OUTPUT_DIR = "./research_reports"
|
|
||||||
if not os.path.exists(OUTPUT_DIR):
|
|
||||||
os.makedirs(OUTPUT_DIR)
|
|
||||||
|
|
||||||
|
|
||||||
@tool
|
|
||||||
def save_to_local_disk(filename: str, content: str) -> str:
|
|
||||||
"""
|
|
||||||
将内容保存到本地物理磁盘。
|
|
||||||
filename: 文件名(例如 'sofa_report.md')
|
|
||||||
content: 调研报告或数据的文本内容
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
# 移除非法路径字符,确保安全
|
|
||||||
safe_filename = os.path.basename(filename)
|
|
||||||
file_path = os.path.join(OUTPUT_DIR, safe_filename)
|
|
||||||
|
|
||||||
with open(file_path, "w", encoding="utf-8") as f:
|
|
||||||
f.write(content)
|
|
||||||
|
|
||||||
return f"✅ 成功!文件已保存至本地物理路径: {os.path.abspath(file_path)}"
|
|
||||||
except Exception as e:
|
|
||||||
return f"❌ 保存失败,错误原因: {str(e)}"
|
|
||||||
@@ -1,225 +0,0 @@
|
|||||||
import os
|
|
||||||
import re
|
|
||||||
import json
|
|
||||||
from datetime import datetime
|
|
||||||
from typing import List, Dict, Optional
|
|
||||||
|
|
||||||
from pydantic import BaseModel, Field
|
|
||||||
from langchain_core.tools import tool
|
|
||||||
from langchain_core.documents import Document
|
|
||||||
|
|
||||||
# RAG
|
|
||||||
from langchain_community.vectorstores import FAISS
|
|
||||||
from langchain_huggingface import HuggingFaceEmbeddings
|
|
||||||
from sentence_transformers import CrossEncoder
|
|
||||||
|
|
||||||
# =========================
|
|
||||||
# 全局模型(单例)
|
|
||||||
# =========================
|
|
||||||
|
|
||||||
_EMBEDDING_MODEL = HuggingFaceEmbeddings(
|
|
||||||
model_name="sentence-transformers/all-MiniLM-L6-v2"
|
|
||||||
)
|
|
||||||
|
|
||||||
_RERANK_MODEL = CrossEncoder(
|
|
||||||
"cross-encoder/ms-marco-MiniLM-L-6-v2"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class StructuredRetrievalInput(BaseModel):
|
|
||||||
file_paths: List[str] = Field(..., description="List of local markdown file paths.")
|
|
||||||
query: str = Field(..., description="Extraction query")
|
|
||||||
source_url: Optional[str] = Field(None, description="Optional global source URL")
|
|
||||||
|
|
||||||
|
|
||||||
@tool("structured_retrieval", args_schema=StructuredRetrievalInput)
|
|
||||||
def structured_retrieval(
|
|
||||||
file_paths: List[str],
|
|
||||||
query: str,
|
|
||||||
source_url: Optional[str] = None
|
|
||||||
) -> Dict:
|
|
||||||
"""
|
|
||||||
Batch structured extraction from markdown files.
|
|
||||||
- Performs vector search + re-ranking
|
|
||||||
- Saves extracted structured data as JSON file to disk
|
|
||||||
- Returns ONLY summary (status, count, file path)
|
|
||||||
"""
|
|
||||||
|
|
||||||
# ── 1. 收集所有文件內容 ──────────────────────────────────────
|
|
||||||
all_docs_pool: List[Document] = []
|
|
||||||
|
|
||||||
for path in file_paths:
|
|
||||||
if not os.path.exists(path) or not path.endswith((".md", ".markdown")):
|
|
||||||
continue
|
|
||||||
|
|
||||||
file_name = os.path.basename(path)
|
|
||||||
|
|
||||||
with open(path, "r", encoding="utf-8") as f:
|
|
||||||
content = f.read()
|
|
||||||
|
|
||||||
current_source = source_url or _extract_source_from_md(content) or "unknown"
|
|
||||||
|
|
||||||
sections = _split_markdown_by_headers(content)
|
|
||||||
|
|
||||||
for sec in sections:
|
|
||||||
all_docs_pool.append(
|
|
||||||
Document(
|
|
||||||
page_content=sec,
|
|
||||||
metadata={"source_url": current_source, "file_name": file_name}
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
if not all_docs_pool:
|
|
||||||
return {"status": "no_documents_found", "items_count": 0, "json_path": None}
|
|
||||||
|
|
||||||
# ── 2. Vector search ────────────────────────────────────────────
|
|
||||||
vector_store = FAISS.from_documents(all_docs_pool, _EMBEDDING_MODEL)
|
|
||||||
retrieved = vector_store.similarity_search(query, k=200)
|
|
||||||
|
|
||||||
# ── 3. 提取結構化片段 ──────────────────────────────────────────
|
|
||||||
structured_items = []
|
|
||||||
|
|
||||||
for doc in retrieved:
|
|
||||||
text = doc.page_content.strip()
|
|
||||||
if len(text) < 30:
|
|
||||||
continue
|
|
||||||
|
|
||||||
images = list(set(re.findall(r"!\[.*?\]\((.*?)\)", text)))
|
|
||||||
|
|
||||||
structured_items.append(
|
|
||||||
{
|
|
||||||
"text": text,
|
|
||||||
"images": images,
|
|
||||||
"source_url": doc.metadata.get("source_url"),
|
|
||||||
"file_name": doc.metadata.get("file_name")
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
# ── 4. Re-rank ──────────────────────────────────────────────────
|
|
||||||
if structured_items:
|
|
||||||
unique_items = {item["text"]: item for item in structured_items}.values()
|
|
||||||
pairs = [[query, item["text"]] for item in unique_items]
|
|
||||||
scores = _RERANK_MODEL.predict(pairs)
|
|
||||||
|
|
||||||
sorted_items = sorted(
|
|
||||||
zip(scores, unique_items),
|
|
||||||
key=lambda x: x[0],
|
|
||||||
reverse=True
|
|
||||||
)
|
|
||||||
top_items = [item for _, item in sorted_items[:50]]
|
|
||||||
else:
|
|
||||||
top_items = []
|
|
||||||
|
|
||||||
# ── 5. 寫入 JSON 文件 ──────────────────────────────────────────
|
|
||||||
if not top_items:
|
|
||||||
return {"status": "no_relevant_content", "items_count": 0, "json_path": None}
|
|
||||||
|
|
||||||
# 產生有意義的檔名
|
|
||||||
safe_query = re.sub(r'[^a-zA-Z0-9\u4e00-\u9fa5]', '_', query)[:40]
|
|
||||||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
||||||
json_filename = f"extracted_{safe_query}_{timestamp}.json"
|
|
||||||
|
|
||||||
# 建議的儲存目錄(與 crawl4ai_batch 對齊)
|
|
||||||
output_dir = os.path.join(os.path.dirname(file_paths[0]), "..", "extracted")
|
|
||||||
os.makedirs(output_dir, exist_ok=True)
|
|
||||||
|
|
||||||
json_path = os.path.join(output_dir, json_filename)
|
|
||||||
|
|
||||||
with open(json_path, "w", encoding="utf-8") as f:
|
|
||||||
json.dump(
|
|
||||||
{
|
|
||||||
"query": query,
|
|
||||||
"extracted_at": timestamp,
|
|
||||||
"item_count": len(top_items),
|
|
||||||
"items": top_items
|
|
||||||
},
|
|
||||||
f,
|
|
||||||
ensure_ascii=False,
|
|
||||||
indent=2
|
|
||||||
)
|
|
||||||
|
|
||||||
# ── 6. 只回傳摘要 ──────────────────────────────────────────────
|
|
||||||
return {
|
|
||||||
"status": "success",
|
|
||||||
"items_count": len(top_items),
|
|
||||||
"json_path": json_path,
|
|
||||||
"summary": f"已提取 {len(top_items)} 個高相關片段,儲存於 {json_path}"
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def _extract_source_from_md(content: str) -> Optional[str]:
|
|
||||||
match = re.search(r"<!--\s*Source:\s*(.*?)\s*-->", content)
|
|
||||||
return match.group(1).strip() if match else None
|
|
||||||
|
|
||||||
|
|
||||||
# =========================
|
|
||||||
# Markdown Header Split
|
|
||||||
# =========================
|
|
||||||
|
|
||||||
def _split_markdown_by_headers(
|
|
||||||
content: str,
|
|
||||||
max_chars: int = 2000,
|
|
||||||
overlap: int = 150,
|
|
||||||
):
|
|
||||||
header_re = re.compile(
|
|
||||||
r'^(#{1,6})\s+(.+?)\s*$',
|
|
||||||
re.MULTILINE
|
|
||||||
)
|
|
||||||
|
|
||||||
matches = list(header_re.finditer(content))
|
|
||||||
|
|
||||||
if not matches:
|
|
||||||
return _chunk_text(content, max_chars, overlap)
|
|
||||||
|
|
||||||
sections = []
|
|
||||||
|
|
||||||
for i, m in enumerate(matches):
|
|
||||||
start = m.start()
|
|
||||||
end = (
|
|
||||||
matches[i + 1].start()
|
|
||||||
if i + 1 < len(matches)
|
|
||||||
else len(content)
|
|
||||||
)
|
|
||||||
|
|
||||||
block = content[start:end].strip()
|
|
||||||
if block:
|
|
||||||
sections.append(block)
|
|
||||||
|
|
||||||
final_sections = []
|
|
||||||
|
|
||||||
for s in sections:
|
|
||||||
if len(s) > max_chars:
|
|
||||||
final_sections.extend(
|
|
||||||
_chunk_text(s, max_chars, overlap)
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
final_sections.append(s)
|
|
||||||
|
|
||||||
return final_sections
|
|
||||||
|
|
||||||
|
|
||||||
def _chunk_text(
|
|
||||||
text: str,
|
|
||||||
max_chars: int = 2000,
|
|
||||||
overlap: int = 150
|
|
||||||
):
|
|
||||||
text = text.strip()
|
|
||||||
if len(text) <= max_chars:
|
|
||||||
return [text]
|
|
||||||
|
|
||||||
chunks = []
|
|
||||||
start = 0
|
|
||||||
|
|
||||||
while start < len(text):
|
|
||||||
end = min(len(text), start + max_chars)
|
|
||||||
chunk = text[start:end].strip()
|
|
||||||
|
|
||||||
if chunk:
|
|
||||||
chunks.append(chunk)
|
|
||||||
|
|
||||||
if end == len(text):
|
|
||||||
break
|
|
||||||
|
|
||||||
start = max(0, end - overlap)
|
|
||||||
|
|
||||||
return chunks
|
|
||||||
@@ -1,38 +0,0 @@
|
|||||||
from typing import Literal
|
|
||||||
from langchain_core.tools import tool
|
|
||||||
from pydantic import BaseModel, Field
|
|
||||||
|
|
||||||
|
|
||||||
class TerminateInput(BaseModel):
|
|
||||||
"""終止對話的輸入參數"""
|
|
||||||
status: Literal["success", "failure"] = Field(
|
|
||||||
description="互動結束的狀態:'success' 表示任務完成,'failure' 表示無法繼續",
|
|
||||||
examples=["success", "failure"]
|
|
||||||
)
|
|
||||||
reason: str = Field(
|
|
||||||
default="",
|
|
||||||
description="可選:簡單說明為什麼結束(例如 '報告已生成' 或 '缺少關鍵資訊')",
|
|
||||||
examples=["報告已成功生成", "無法取得足夠資料"]
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
@tool(args_schema=TerminateInput)
|
|
||||||
def terminate(status: str, reason: str = "") -> str:
|
|
||||||
"""
|
|
||||||
當任務完成、報告已生成,或無法繼續進行時,呼叫此工具來結束本次互動。
|
|
||||||
|
|
||||||
使用時機:
|
|
||||||
- 已經成功產生最終報告(report_generator 已完成)
|
|
||||||
- 遇到無法解決的錯誤或缺少關鍵資訊
|
|
||||||
- 用戶需求已完全滿足
|
|
||||||
|
|
||||||
請在呼叫前確保所有必要步驟已完成,並在 reason 中簡單說明結束原因。
|
|
||||||
"""
|
|
||||||
if status not in ("success", "failure"):
|
|
||||||
status = "failure" # 防呆
|
|
||||||
|
|
||||||
msg = f"互動已終止,狀態:{status.upper()}"
|
|
||||||
if reason:
|
|
||||||
msg += f"\n原因:{reason}"
|
|
||||||
|
|
||||||
return msg
|
|
||||||
@@ -1,96 +0,0 @@
|
|||||||
import json
|
|
||||||
import os
|
|
||||||
from typing import List, Literal, Optional, Dict, Any
|
|
||||||
from langchain_core.tools import tool
|
|
||||||
|
|
||||||
# 定义存储路径
|
|
||||||
DB_PATH = os.path.join("workspace", "user_persona.json")
|
|
||||||
|
|
||||||
|
|
||||||
def _load_store() -> Dict[str, Any]:
|
|
||||||
"""从本地文件加载画像数据"""
|
|
||||||
if os.path.exists(DB_PATH):
|
|
||||||
try:
|
|
||||||
with open(DB_PATH, "r", encoding="utf-8") as f:
|
|
||||||
return json.load(f)
|
|
||||||
except Exception:
|
|
||||||
return {}
|
|
||||||
return {}
|
|
||||||
|
|
||||||
|
|
||||||
def _save_store(data: Dict[str, Any]):
|
|
||||||
"""将画像数据保存到本地文件"""
|
|
||||||
os.makedirs(os.path.dirname(DB_PATH), exist_ok=True)
|
|
||||||
with open(DB_PATH, "w", encoding="utf-8") as f:
|
|
||||||
json.dump(data, f, ensure_ascii=False, indent=2)
|
|
||||||
|
|
||||||
|
|
||||||
@tool
|
|
||||||
def manage_user_persona(
|
|
||||||
command: Literal["set", "update", "get", "clear"],
|
|
||||||
design_type: Optional[str] = None,
|
|
||||||
style_preference: Optional[str] = None,
|
|
||||||
budget_range: Optional[str] = None,
|
|
||||||
color_palette: Optional[List[str]] = None,
|
|
||||||
target_audience: Optional[str] = None,
|
|
||||||
extra_requirements: Optional[str] = None
|
|
||||||
) -> str:
|
|
||||||
"""
|
|
||||||
用户画像与设计偏好管理工具。
|
|
||||||
用于设定、更新、获取或重置用户的设计上下文(如风格、预算、颜色)。
|
|
||||||
Agent 在开始调研前必须先调用 get 获取画像,若关键信息缺失需引导用户补充。
|
|
||||||
"""
|
|
||||||
# 每次调用都重新读取,确保多进程或重启后数据一致
|
|
||||||
store = _load_store()
|
|
||||||
|
|
||||||
if command == "clear":
|
|
||||||
if os.path.exists(DB_PATH):
|
|
||||||
os.remove(DB_PATH)
|
|
||||||
return "✅ 用户个性化模板已从本地文件清空。"
|
|
||||||
|
|
||||||
if command == "get":
|
|
||||||
if not store:
|
|
||||||
return "⚠️ [缺失信息] 当前尚未配置画像。请询问用户:设计类型(如沙发)、风格偏好(如极简)等。"
|
|
||||||
|
|
||||||
# 格式化输出供 Agent 阅读
|
|
||||||
res = [
|
|
||||||
"--- 👤 实时用户画像 (本地存储) ---",
|
|
||||||
f"🎯 类型: {store.get('design_type', '未设定')}",
|
|
||||||
f"🎨 风格: {store.get('style_preference', '未设定')}",
|
|
||||||
f"💰 预算: {store.get('budget_range', '未设定')}",
|
|
||||||
f"🌈 色系: {', '.join(store.get('color_palette', [])) or '未设定'}",
|
|
||||||
f"👥 受众: {store.get('target_audience', '未设定')}",
|
|
||||||
f"📝 需求: {store.get('extra_requirements', '未设定')}",
|
|
||||||
"-----------------------"
|
|
||||||
]
|
|
||||||
|
|
||||||
# 逻辑检查
|
|
||||||
if not store.get('design_type') or not store.get('style_preference'):
|
|
||||||
res.append("\n⚠️ 关键信息缺失,建议补充 '设计类型' 和 '风格偏好'。")
|
|
||||||
return "\n".join(res)
|
|
||||||
|
|
||||||
if command in ["set", "update"]:
|
|
||||||
if command == "set":
|
|
||||||
store = {} # 重置内存中的字典
|
|
||||||
|
|
||||||
# 提取传入的非空参数
|
|
||||||
update_data = {
|
|
||||||
"design_type": design_type,
|
|
||||||
"style_preference": style_preference,
|
|
||||||
"budget_range": budget_range,
|
|
||||||
"color_palette": color_palette,
|
|
||||||
"target_audience": target_audience,
|
|
||||||
"extra_requirements": extra_requirements
|
|
||||||
}
|
|
||||||
|
|
||||||
# 更新有效字段
|
|
||||||
for k, v in update_data.items():
|
|
||||||
if v is not None:
|
|
||||||
store[k] = v
|
|
||||||
|
|
||||||
# 保存到文件
|
|
||||||
_save_store(store)
|
|
||||||
|
|
||||||
return f"✅ 本地画像已同步。当前配置:\n{json.dumps(store, ensure_ascii=False, indent=2)}"
|
|
||||||
|
|
||||||
return "❌ 错误:未知命令。"
|
|
||||||
Reference in New Issue
Block a user