diff --git a/app/service/fashion_agent/init_llm.py b/app/service/fashion_agent/init_llm.py
index 01c2a9e..04085db 100644
--- a/app/service/fashion_agent/init_llm.py
+++ b/app/service/fashion_agent/init_llm.py
@@ -1,6 +1,7 @@
 import os
 
 from dotenv import load_dotenv
+from langchain_ollama import ChatOllama
 from langchain_qwq import ChatQwen
 
 load_dotenv()
@@ -9,25 +10,40 @@ QWEN_API_KEY_INTL = os.environ.get("QWEN_API_KEY_INTL", "")
 
 
 def build_llm(enable_thinking: bool = False):
-    llm = ChatQwen(
-        model="qwen3.6-plus",
-        timeout=None,
-        max_retries=2,
-        enable_thinking=enable_thinking,
-        streaming=True,
-        api_key=QWEN_API_KEY_INTL,
+    # llm = ChatQwen(
+    #     model="qwen3.6-plus",
+    #     timeout=None,
+    #     max_retries=2,
+    #     enable_thinking=enable_thinking,
+    #     streaming=True,
+    #     api_key=QWEN_API_KEY_INTL,
+    # )
+    # return llm
+    llm = ChatOllama(
+        model="qwen3.6:27b",
+        base_url="http://127.0.0.1:11434",
+        reasoning=enable_thinking,  # 核心：开启思考，自动拆分reasoning_content
+        temperature=0.7,
+        num_ctx=32768,
     )
     return llm
 
 
-qwen_plus_llm = ChatQwen(
-    model="qwen-plus",
-    timeout=None,
-    max_retries=2,
-    streaming=False,
-    temperature=0.25,
-    top_p=0.8,
-    api_key=QWEN_API_KEY_INTL,
-)
+# qwen_plus_llm = ChatQwen(
+#     model="qwen-plus",
+#     timeout=None,
+#     max_retries=2,
+#     streaming=False,
+#     temperature=0.25,
+#     top_p=0.8,
+#     api_key=QWEN_API_KEY_INTL,
+# )
 # response = qwen_plus_llm.invoke("你好")
 # print(response)
+
+
+qwen_plus_llm = ChatOllama(
+    model="qwen3.6:27b", base_url="http://127.0.0.1:11434", reasoning=False, temperature=0.7, num_ctx=32768, extra_kwargs={"stream": False}
+)
+# for chunk in qwen_plus_llm.stream("你好"):
+#     print(chunk)