50 lines
1.2 KiB
Python
50 lines
1.2 KiB
Python
import os
|
||
|
||
from dotenv import load_dotenv
|
||
from langchain_ollama import ChatOllama
|
||
from langchain_qwq import ChatQwen
|
||
|
||
load_dotenv()
|
||
|
||
QWEN_API_KEY_INTL = os.environ.get("QWEN_API_KEY_INTL", "")
|
||
|
||
|
||
def build_llm(enable_thinking: bool = False):
|
||
# llm = ChatQwen(
|
||
# model="qwen3.6-plus",
|
||
# timeout=None,
|
||
# max_retries=2,
|
||
# enable_thinking=enable_thinking,
|
||
# streaming=True,
|
||
# api_key=QWEN_API_KEY_INTL,
|
||
# )
|
||
# return llm
|
||
llm = ChatOllama(
|
||
model="qwen3.6:27b",
|
||
base_url="http://127.0.0.1:11434",
|
||
reasoning=enable_thinking, # 核心:开启思考,自动拆分reasoning_content
|
||
temperature=0.7,
|
||
num_ctx=32768,
|
||
)
|
||
return llm
|
||
|
||
|
||
# qwen_plus_llm = ChatQwen(
|
||
# model="qwen-plus",
|
||
# timeout=None,
|
||
# max_retries=2,
|
||
# streaming=False,
|
||
# temperature=0.25,
|
||
# top_p=0.8,
|
||
# api_key=QWEN_API_KEY_INTL,
|
||
# )
|
||
# response = qwen_plus_llm.invoke("你好")
|
||
# print(response)
|
||
|
||
|
||
qwen_plus_llm = ChatOllama(
|
||
model="qwen3.6:27b", base_url="http://127.0.0.1:11434", reasoning=False, temperature=0.7, num_ctx=32768, extra_kwargs={"stream": False}
|
||
)
|
||
# for chunk in qwen_plus_llm.stream("你好"):
|
||
# print(chunk)
|