23 lines
657 B
Python
23 lines
657 B
Python
|
|
import multiprocessing
|
|||
|
|
import os
|
|||
|
|
|
|||
|
|
# 基础配置
|
|||
|
|
bind = "0.0.0.0:80"
|
|||
|
|
worker_class = "uvicorn.workers.UvicornWorker"
|
|||
|
|
loglevel = "info"
|
|||
|
|
accesslog = "-"
|
|||
|
|
errorlog = "-"
|
|||
|
|
|
|||
|
|
# 关键生产参数
|
|||
|
|
workers = 2 # 先用 2 个(ML 场景推荐 1~4,根据 CPU 核数和内存调整)
|
|||
|
|
timeout = 300 # 5 分钟,足够模型加载和慢推理
|
|||
|
|
graceful_timeout = 300
|
|||
|
|
preload_app = True # ★★★ 必须加!模型只加载一次,内存大幅节省
|
|||
|
|
|
|||
|
|
# 防止内存泄漏(ML 服务常见问题)
|
|||
|
|
max_requests = 1000
|
|||
|
|
max_requests_jitter = 100
|
|||
|
|
|
|||
|
|
# 其他优化
|
|||
|
|
keepalive = 5
|
|||
|
|
worker_connections = 1000
|