diff --git a/app/service/fashion_agent/init_llm.py b/app/service/fashion_agent/init_llm.py index 01c2a9e..04085db 100644 --- a/app/service/fashion_agent/init_llm.py +++ b/app/service/fashion_agent/init_llm.py @@ -1,6 +1,7 @@ import os from dotenv import load_dotenv +from langchain_ollama import ChatOllama from langchain_qwq import ChatQwen load_dotenv() @@ -9,25 +10,40 @@ QWEN_API_KEY_INTL = os.environ.get("QWEN_API_KEY_INTL", "") def build_llm(enable_thinking: bool = False): - llm = ChatQwen( - model="qwen3.6-plus", - timeout=None, - max_retries=2, - enable_thinking=enable_thinking, - streaming=True, - api_key=QWEN_API_KEY_INTL, + # llm = ChatQwen( + # model="qwen3.6-plus", + # timeout=None, + # max_retries=2, + # enable_thinking=enable_thinking, + # streaming=True, + # api_key=QWEN_API_KEY_INTL, + # ) + # return llm + llm = ChatOllama( + model="qwen3.6:27b", + base_url="http://127.0.0.1:11434", + reasoning=enable_thinking, # 核心:开启思考,自动拆分reasoning_content + temperature=0.7, + num_ctx=32768, ) return llm -qwen_plus_llm = ChatQwen( - model="qwen-plus", - timeout=None, - max_retries=2, - streaming=False, - temperature=0.25, - top_p=0.8, - api_key=QWEN_API_KEY_INTL, -) +# qwen_plus_llm = ChatQwen( +# model="qwen-plus", +# timeout=None, +# max_retries=2, +# streaming=False, +# temperature=0.25, +# top_p=0.8, +# api_key=QWEN_API_KEY_INTL, +# ) # response = qwen_plus_llm.invoke("你好") # print(response) + + +qwen_plus_llm = ChatOllama( + model="qwen3.6:27b", base_url="http://127.0.0.1:11434", reasoning=False, temperature=0.7, num_ctx=32768, extra_kwargs={"stream": False} +) +# for chunk in qwen_plus_llm.stream("你好"): +# print(chunk)