重构初始化逻辑以支持根据环境选择不同的LLM模型

This commit is contained in:
zcr
2026-06-17 12:01:04 +08:00
parent 13d93088e7
commit 456298cee7

View File

@@ -7,22 +7,24 @@ from langchain_qwq import ChatQwen
load_dotenv() load_dotenv()
QWEN_API_KEY_INTL = os.environ.get("QWEN_API_KEY_INTL", "") QWEN_API_KEY_INTL = os.environ.get("QWEN_API_KEY_INTL", "")
SERVE_ENV = os.environ.get("SERVE_ENV", "dev") # prod 使用 Qwen APIdev 使用本地 Ollama
def build_llm(enable_thinking: bool = False): def build_llm(enable_thinking: bool = False):
# llm = ChatQwen( if SERVE_ENV == "prod":
# model="qwen3.6-plus", llm = ChatQwen(
# timeout=None, model="qwen3.6-plus",
# max_retries=2, timeout=None,
# enable_thinking=enable_thinking, max_retries=2,
# streaming=True, enable_thinking=enable_thinking,
# api_key=QWEN_API_KEY_INTL, streaming=True,
# ) api_key=QWEN_API_KEY_INTL,
# return llm )
else:
llm = ChatOllama( llm = ChatOllama(
model="qwen3.6:27b", model="qwen3.6:27b",
base_url="http://127.0.0.1:11434", base_url="http://127.0.0.1:11434",
reasoning=enable_thinking, # 核心开启思考自动拆分reasoning_content reasoning=enable_thinking,
temperature=0.7, temperature=0.7,
num_ctx=32768, num_ctx=32768,
) )
@@ -42,6 +44,17 @@ def build_llm(enable_thinking: bool = False):
# print(response) # print(response)
if SERVE_ENV == "prod":
qwen_plus_llm = ChatQwen(
model="qwen-plus",
timeout=None,
max_retries=2,
streaming=False,
temperature=0.25,
top_p=0.8,
api_key=QWEN_API_KEY_INTL,
)
else:
qwen_plus_llm = ChatOllama( qwen_plus_llm = ChatOllama(
model="qwen3.6:27b", base_url="http://127.0.0.1:11434", reasoning=False, temperature=0.7, num_ctx=32768, extra_kwargs={"stream": False} model="qwen3.6:27b", base_url="http://127.0.0.1:11434", reasoning=False, temperature=0.7, num_ctx=32768, extra_kwargs={"stream": False}
) )