diff --git a/app/service/fashion_agent/init_llm.py b/app/service/fashion_agent/init_llm.py index 04085db..d95a377 100644 --- a/app/service/fashion_agent/init_llm.py +++ b/app/service/fashion_agent/init_llm.py @@ -7,25 +7,27 @@ from langchain_qwq import ChatQwen load_dotenv() QWEN_API_KEY_INTL = os.environ.get("QWEN_API_KEY_INTL", "") +SERVE_ENV = os.environ.get("SERVE_ENV", "dev") # prod 使用 Qwen API,dev 使用本地 Ollama def build_llm(enable_thinking: bool = False): - # llm = ChatQwen( - # model="qwen3.6-plus", - # timeout=None, - # max_retries=2, - # enable_thinking=enable_thinking, - # streaming=True, - # api_key=QWEN_API_KEY_INTL, - # ) - # return llm - llm = ChatOllama( - model="qwen3.6:27b", - base_url="http://127.0.0.1:11434", - reasoning=enable_thinking, # 核心:开启思考,自动拆分reasoning_content - temperature=0.7, - num_ctx=32768, - ) + if SERVE_ENV == "prod": + llm = ChatQwen( + model="qwen3.6-plus", + timeout=None, + max_retries=2, + enable_thinking=enable_thinking, + streaming=True, + api_key=QWEN_API_KEY_INTL, + ) + else: + llm = ChatOllama( + model="qwen3.6:27b", + base_url="http://127.0.0.1:11434", + reasoning=enable_thinking, + temperature=0.7, + num_ctx=32768, + ) return llm @@ -42,8 +44,19 @@ def build_llm(enable_thinking: bool = False): # print(response) -qwen_plus_llm = ChatOllama( - model="qwen3.6:27b", base_url="http://127.0.0.1:11434", reasoning=False, temperature=0.7, num_ctx=32768, extra_kwargs={"stream": False} -) +if SERVE_ENV == "prod": + qwen_plus_llm = ChatQwen( + model="qwen-plus", + timeout=None, + max_retries=2, + streaming=False, + temperature=0.25, + top_p=0.8, + api_key=QWEN_API_KEY_INTL, + ) +else: + qwen_plus_llm = ChatOllama( + model="qwen3.6:27b", base_url="http://127.0.0.1:11434", reasoning=False, temperature=0.7, num_ctx=32768, extra_kwargs={"stream": False} + ) # for chunk in qwen_plus_llm.stream("你好"): # print(chunk)