From 456298cee717f6f8c3ca2b723cbdbd9a7799a5ef Mon Sep 17 00:00:00 2001
From: zcr <cgzhou@aidlab.hk>
Date: Wed, 17 Jun 2026 12:01:04 +0800
Subject: [PATCH] =?UTF-8?q?=E9=87=8D=E6=9E=84=E5=88=9D=E5=A7=8B=E5=8C=96?=
 =?UTF-8?q?=E9=80=BB=E8=BE=91=E4=BB=A5=E6=94=AF=E6=8C=81=E6=A0=B9=E6=8D=AE?=
 =?UTF-8?q?=E7=8E=AF=E5=A2=83=E9=80=89=E6=8B=A9=E4=B8=8D=E5=90=8C=E7=9A=84?=
 =?UTF-8?q?LLM=E6=A8=A1=E5=9E=8B?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 app/service/fashion_agent/init_llm.py | 51 +++++++++++++++++----------
 1 file changed, 32 insertions(+), 19 deletions(-)

diff --git a/app/service/fashion_agent/init_llm.py b/app/service/fashion_agent/init_llm.py
index 04085db..d95a377 100644
--- a/app/service/fashion_agent/init_llm.py
+++ b/app/service/fashion_agent/init_llm.py
@@ -7,25 +7,27 @@ from langchain_qwq import ChatQwen
 load_dotenv()
 
 QWEN_API_KEY_INTL = os.environ.get("QWEN_API_KEY_INTL", "")
+SERVE_ENV = os.environ.get("SERVE_ENV", "dev")  # prod 使用 Qwen API，dev 使用本地 Ollama
 
 
 def build_llm(enable_thinking: bool = False):
-    # llm = ChatQwen(
-    #     model="qwen3.6-plus",
-    #     timeout=None,
-    #     max_retries=2,
-    #     enable_thinking=enable_thinking,
-    #     streaming=True,
-    #     api_key=QWEN_API_KEY_INTL,
-    # )
-    # return llm
-    llm = ChatOllama(
-        model="qwen3.6:27b",
-        base_url="http://127.0.0.1:11434",
-        reasoning=enable_thinking,  # 核心：开启思考，自动拆分reasoning_content
-        temperature=0.7,
-        num_ctx=32768,
-    )
+    if SERVE_ENV == "prod":
+        llm = ChatQwen(
+            model="qwen3.6-plus",
+            timeout=None,
+            max_retries=2,
+            enable_thinking=enable_thinking,
+            streaming=True,
+            api_key=QWEN_API_KEY_INTL,
+        )
+    else:
+        llm = ChatOllama(
+            model="qwen3.6:27b",
+            base_url="http://127.0.0.1:11434",
+            reasoning=enable_thinking,
+            temperature=0.7,
+            num_ctx=32768,
+        )
     return llm
 
 
@@ -42,8 +44,19 @@ def build_llm(enable_thinking: bool = False):
 # print(response)
 
 
-qwen_plus_llm = ChatOllama(
-    model="qwen3.6:27b", base_url="http://127.0.0.1:11434", reasoning=False, temperature=0.7, num_ctx=32768, extra_kwargs={"stream": False}
-)
+if SERVE_ENV == "prod":
+    qwen_plus_llm = ChatQwen(
+        model="qwen-plus",
+        timeout=None,
+        max_retries=2,
+        streaming=False,
+        temperature=0.25,
+        top_p=0.8,
+        api_key=QWEN_API_KEY_INTL,
+    )
+else:
+    qwen_plus_llm = ChatOllama(
+        model="qwen3.6:27b", base_url="http://127.0.0.1:11434", reasoning=False, temperature=0.7, num_ctx=32768, extra_kwargs={"stream": False}
+    )
 # for chunk in qwen_plus_llm.stream("你好"):
 #     print(chunk)