From f22d8d601318a6aea560b234890d80b228527c0e Mon Sep 17 00:00:00 2001
From: zcr <cgzhou@aidlab.hk>
Date: Wed, 17 Jun 2026 12:14:14 +0800
Subject: [PATCH] =?UTF-8?q?=E9=87=8D=E6=9E=84LLM=E6=9E=84=E5=BB=BA?=
 =?UTF-8?q?=E9=80=BB=E8=BE=91=EF=BC=8C=E8=B0=83=E6=95=B4=E7=8E=AF=E5=A2=83?=
 =?UTF-8?q?=E5=88=A4=E6=96=AD=E4=BB=A5=E6=94=AF=E6=8C=81=E6=9C=AC=E5=9C=B0?=
 =?UTF-8?q?=E5=92=8C=E7=94=9F=E4=BA=A7=E6=A8=A1=E5=BC=8F?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 app/service/fashion_agent/init_llm.py | 45 +++++++++++----------------
 1 file changed, 19 insertions(+), 26 deletions(-)

diff --git a/app/service/fashion_agent/init_llm.py b/app/service/fashion_agent/init_llm.py
index d95a377..3f49638 100644
--- a/app/service/fashion_agent/init_llm.py
+++ b/app/service/fashion_agent/init_llm.py
@@ -11,7 +11,15 @@ SERVE_ENV = os.environ.get("SERVE_ENV", "dev")  # prod 使用 Qwen API，dev 使
 
 
 def build_llm(enable_thinking: bool = False):
-    if SERVE_ENV == "prod":
+    if SERVE_ENV == "local":
+        llm = ChatOllama(
+            model="qwen3.6:27b",
+            base_url="http://127.0.0.1:11434",
+            reasoning=enable_thinking,
+            temperature=0.7,
+            num_ctx=32768,
+        )
+    else:
         llm = ChatQwen(
             model="qwen3.6-plus",
             timeout=None,
@@ -20,31 +28,20 @@ def build_llm(enable_thinking: bool = False):
             streaming=True,
             api_key=QWEN_API_KEY_INTL,
         )
-    else:
-        llm = ChatOllama(
-            model="qwen3.6:27b",
-            base_url="http://127.0.0.1:11434",
-            reasoning=enable_thinking,
-            temperature=0.7,
-            num_ctx=32768,
-        )
     return llm
 
 
-# qwen_plus_llm = ChatQwen(
-#     model="qwen-plus",
-#     timeout=None,
-#     max_retries=2,
-#     streaming=False,
-#     temperature=0.25,
-#     top_p=0.8,
-#     api_key=QWEN_API_KEY_INTL,
-# )
-# response = qwen_plus_llm.invoke("你好")
-# print(response)
+if SERVE_ENV == "local":
+    qwen_plus_llm = ChatOllama(
+        model="qwen3.6:27b",
+        base_url="http://127.0.0.1:11434",
+        reasoning=False,
+        temperature=0.7,
+        num_ctx=32768,
+        extra_kwargs={"stream": False},
+    )
 
-
-if SERVE_ENV == "prod":
+else:
     qwen_plus_llm = ChatQwen(
         model="qwen-plus",
         timeout=None,
@@ -54,9 +51,5 @@ if SERVE_ENV == "prod":
         top_p=0.8,
         api_key=QWEN_API_KEY_INTL,
     )
-else:
-    qwen_plus_llm = ChatOllama(
-        model="qwen3.6:27b", base_url="http://127.0.0.1:11434", reasoning=False, temperature=0.7, num_ctx=32768, extra_kwargs={"stream": False}
-    )
 # for chunk in qwen_plus_llm.stream("你好"):
 #     print(chunk)