diff --git a/app/service/fashion_agent/init_llm.py b/app/service/fashion_agent/init_llm.py index d95a377..3f49638 100644 --- a/app/service/fashion_agent/init_llm.py +++ b/app/service/fashion_agent/init_llm.py @@ -11,7 +11,15 @@ SERVE_ENV = os.environ.get("SERVE_ENV", "dev") # prod 使用 Qwen API,dev 使 def build_llm(enable_thinking: bool = False): - if SERVE_ENV == "prod": + if SERVE_ENV == "local": + llm = ChatOllama( + model="qwen3.6:27b", + base_url="http://127.0.0.1:11434", + reasoning=enable_thinking, + temperature=0.7, + num_ctx=32768, + ) + else: llm = ChatQwen( model="qwen3.6-plus", timeout=None, @@ -20,31 +28,20 @@ def build_llm(enable_thinking: bool = False): streaming=True, api_key=QWEN_API_KEY_INTL, ) - else: - llm = ChatOllama( - model="qwen3.6:27b", - base_url="http://127.0.0.1:11434", - reasoning=enable_thinking, - temperature=0.7, - num_ctx=32768, - ) return llm -# qwen_plus_llm = ChatQwen( -# model="qwen-plus", -# timeout=None, -# max_retries=2, -# streaming=False, -# temperature=0.25, -# top_p=0.8, -# api_key=QWEN_API_KEY_INTL, -# ) -# response = qwen_plus_llm.invoke("你好") -# print(response) +if SERVE_ENV == "local": + qwen_plus_llm = ChatOllama( + model="qwen3.6:27b", + base_url="http://127.0.0.1:11434", + reasoning=False, + temperature=0.7, + num_ctx=32768, + extra_kwargs={"stream": False}, + ) - -if SERVE_ENV == "prod": +else: qwen_plus_llm = ChatQwen( model="qwen-plus", timeout=None, @@ -54,9 +51,5 @@ if SERVE_ENV == "prod": top_p=0.8, api_key=QWEN_API_KEY_INTL, ) -else: - qwen_plus_llm = ChatOllama( - model="qwen3.6:27b", base_url="http://127.0.0.1:11434", reasoning=False, temperature=0.7, num_ctx=32768, extra_kwargs={"stream": False} - ) # for chunk in qwen_plus_llm.stream("你好"): # print(chunk)