import multiprocessing import os # 基础配置 bind = "0.0.0.0:80" worker_class = "uvicorn.workers.UvicornWorker" loglevel = "info" accesslog = "-" errorlog = "-" # 关键生产参数 workers = 2 # 先用 2 个(ML 场景推荐 1~4,根据 CPU 核数和内存调整) timeout = 300 # 5 分钟,足够模型加载和慢推理 graceful_timeout = 300 preload_app = True # ★★★ 必须加!模型只加载一次,内存大幅节省 # 防止内存泄漏(ML 服务常见问题) max_requests = 1000 max_requests_jitter = 100 # 其他优化 keepalive = 5 worker_connections = 1000