From f22d8d601318a6aea560b234890d80b228527c0e Mon Sep 17 00:00:00 2001 From: zcr Date: Wed, 17 Jun 2026 12:14:14 +0800 Subject: [PATCH] =?UTF-8?q?=E9=87=8D=E6=9E=84LLM=E6=9E=84=E5=BB=BA?= =?UTF-8?q?=E9=80=BB=E8=BE=91=EF=BC=8C=E8=B0=83=E6=95=B4=E7=8E=AF=E5=A2=83?= =?UTF-8?q?=E5=88=A4=E6=96=AD=E4=BB=A5=E6=94=AF=E6=8C=81=E6=9C=AC=E5=9C=B0?= =?UTF-8?q?=E5=92=8C=E7=94=9F=E4=BA=A7=E6=A8=A1=E5=BC=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app/service/fashion_agent/init_llm.py | 45 +++++++++++---------------- 1 file changed, 19 insertions(+), 26 deletions(-) diff --git a/app/service/fashion_agent/init_llm.py b/app/service/fashion_agent/init_llm.py index d95a377..3f49638 100644 --- a/app/service/fashion_agent/init_llm.py +++ b/app/service/fashion_agent/init_llm.py @@ -11,7 +11,15 @@ SERVE_ENV = os.environ.get("SERVE_ENV", "dev") # prod 使用 Qwen API,dev 使 def build_llm(enable_thinking: bool = False): - if SERVE_ENV == "prod": + if SERVE_ENV == "local": + llm = ChatOllama( + model="qwen3.6:27b", + base_url="http://127.0.0.1:11434", + reasoning=enable_thinking, + temperature=0.7, + num_ctx=32768, + ) + else: llm = ChatQwen( model="qwen3.6-plus", timeout=None, @@ -20,31 +28,20 @@ def build_llm(enable_thinking: bool = False): streaming=True, api_key=QWEN_API_KEY_INTL, ) - else: - llm = ChatOllama( - model="qwen3.6:27b", - base_url="http://127.0.0.1:11434", - reasoning=enable_thinking, - temperature=0.7, - num_ctx=32768, - ) return llm -# qwen_plus_llm = ChatQwen( -# model="qwen-plus", -# timeout=None, -# max_retries=2, -# streaming=False, -# temperature=0.25, -# top_p=0.8, -# api_key=QWEN_API_KEY_INTL, -# ) -# response = qwen_plus_llm.invoke("你好") -# print(response) +if SERVE_ENV == "local": + qwen_plus_llm = ChatOllama( + model="qwen3.6:27b", + base_url="http://127.0.0.1:11434", + reasoning=False, + temperature=0.7, + num_ctx=32768, + extra_kwargs={"stream": False}, + ) - -if SERVE_ENV == "prod": +else: qwen_plus_llm = ChatQwen( model="qwen-plus", timeout=None, @@ -54,9 +51,5 @@ if SERVE_ENV == "prod": top_p=0.8, api_key=QWEN_API_KEY_INTL, ) -else: - qwen_plus_llm = ChatOllama( - model="qwen3.6:27b", base_url="http://127.0.0.1:11434", reasoning=False, temperature=0.7, num_ctx=32768, extra_kwargs={"stream": False} - ) # for chunk in qwen_plus_llm.stream("你好"): # print(chunk)