diff --git a/gunicorn.conf.py b/gunicorn.conf.py new file mode 100644 index 0000000..dbf2b32 --- /dev/null +++ b/gunicorn.conf.py @@ -0,0 +1,23 @@ +import multiprocessing +import os + +# 基础配置 +bind = "0.0.0.0:80" +worker_class = "uvicorn.workers.UvicornWorker" +loglevel = "info" +accesslog = "-" +errorlog = "-" + +# 关键生产参数 +workers = 2 # 先用 2 个(ML 场景推荐 1~4,根据 CPU 核数和内存调整) +timeout = 300 # 5 分钟,足够模型加载和慢推理 +graceful_timeout = 300 +preload_app = True # ★★★ 必须加!模型只加载一次,内存大幅节省 + +# 防止内存泄漏(ML 服务常见问题) +max_requests = 1000 +max_requests_jitter = 100 + +# 其他优化 +keepalive = 5 +worker_connections = 1000 \ No newline at end of file