From 456298cee717f6f8c3ca2b723cbdbd9a7799a5ef Mon Sep 17 00:00:00 2001 From: zcr Date: Wed, 17 Jun 2026 12:01:04 +0800 Subject: [PATCH] =?UTF-8?q?=E9=87=8D=E6=9E=84=E5=88=9D=E5=A7=8B=E5=8C=96?= =?UTF-8?q?=E9=80=BB=E8=BE=91=E4=BB=A5=E6=94=AF=E6=8C=81=E6=A0=B9=E6=8D=AE?= =?UTF-8?q?=E7=8E=AF=E5=A2=83=E9=80=89=E6=8B=A9=E4=B8=8D=E5=90=8C=E7=9A=84?= =?UTF-8?q?LLM=E6=A8=A1=E5=9E=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app/service/fashion_agent/init_llm.py | 51 +++++++++++++++++---------- 1 file changed, 32 insertions(+), 19 deletions(-) diff --git a/app/service/fashion_agent/init_llm.py b/app/service/fashion_agent/init_llm.py index 04085db..d95a377 100644 --- a/app/service/fashion_agent/init_llm.py +++ b/app/service/fashion_agent/init_llm.py @@ -7,25 +7,27 @@ from langchain_qwq import ChatQwen load_dotenv() QWEN_API_KEY_INTL = os.environ.get("QWEN_API_KEY_INTL", "") +SERVE_ENV = os.environ.get("SERVE_ENV", "dev") # prod 使用 Qwen API,dev 使用本地 Ollama def build_llm(enable_thinking: bool = False): - # llm = ChatQwen( - # model="qwen3.6-plus", - # timeout=None, - # max_retries=2, - # enable_thinking=enable_thinking, - # streaming=True, - # api_key=QWEN_API_KEY_INTL, - # ) - # return llm - llm = ChatOllama( - model="qwen3.6:27b", - base_url="http://127.0.0.1:11434", - reasoning=enable_thinking, # 核心:开启思考,自动拆分reasoning_content - temperature=0.7, - num_ctx=32768, - ) + if SERVE_ENV == "prod": + llm = ChatQwen( + model="qwen3.6-plus", + timeout=None, + max_retries=2, + enable_thinking=enable_thinking, + streaming=True, + api_key=QWEN_API_KEY_INTL, + ) + else: + llm = ChatOllama( + model="qwen3.6:27b", + base_url="http://127.0.0.1:11434", + reasoning=enable_thinking, + temperature=0.7, + num_ctx=32768, + ) return llm @@ -42,8 +44,19 @@ def build_llm(enable_thinking: bool = False): # print(response) -qwen_plus_llm = ChatOllama( - model="qwen3.6:27b", base_url="http://127.0.0.1:11434", reasoning=False, temperature=0.7, num_ctx=32768, extra_kwargs={"stream": False} -) +if SERVE_ENV == "prod": + qwen_plus_llm = ChatQwen( + model="qwen-plus", + timeout=None, + max_retries=2, + streaming=False, + temperature=0.25, + top_p=0.8, + api_key=QWEN_API_KEY_INTL, + ) +else: + qwen_plus_llm = ChatOllama( + model="qwen3.6:27b", base_url="http://127.0.0.1:11434", reasoning=False, temperature=0.7, num_ctx=32768, extra_kwargs={"stream": False} + ) # for chunk in qwen_plus_llm.stream("你好"): # print(chunk)