import os from dotenv import load_dotenv from langchain_ollama import ChatOllama from langchain_qwq import ChatQwen load_dotenv() QWEN_API_KEY_INTL = os.environ.get("QWEN_API_KEY_INTL", "") SERVE_ENV = os.environ.get("SERVE_ENV", "dev") # prod 使用 Qwen API,dev 使用本地 Ollama def build_llm(enable_thinking: bool = False): if SERVE_ENV == "prod": llm = ChatQwen( model="qwen3.6-plus", timeout=None, max_retries=2, enable_thinking=enable_thinking, streaming=True, api_key=QWEN_API_KEY_INTL, ) else: llm = ChatOllama( model="qwen3.6:27b", base_url="http://127.0.0.1:11434", reasoning=enable_thinking, temperature=0.7, num_ctx=32768, ) return llm # qwen_plus_llm = ChatQwen( # model="qwen-plus", # timeout=None, # max_retries=2, # streaming=False, # temperature=0.25, # top_p=0.8, # api_key=QWEN_API_KEY_INTL, # ) # response = qwen_plus_llm.invoke("你好") # print(response) if SERVE_ENV == "prod": qwen_plus_llm = ChatQwen( model="qwen-plus", timeout=None, max_retries=2, streaming=False, temperature=0.25, top_p=0.8, api_key=QWEN_API_KEY_INTL, ) else: qwen_plus_llm = ChatOllama( model="qwen3.6:27b", base_url="http://127.0.0.1:11434", reasoning=False, temperature=0.7, num_ctx=32768, extra_kwargs={"stream": False} ) # for chunk in qwen_plus_llm.stream("你好"): # print(chunk)