diff --git a/gunicorn.conf.py b/gunicorn.conf.py
new file mode 100644
index 0000000..dbf2b32
--- /dev/null
+++ b/gunicorn.conf.py
@@ -0,0 +1,23 @@
+import multiprocessing
+import os
+
+# 基础配置
+bind = "0.0.0.0:80"
+worker_class = "uvicorn.workers.UvicornWorker"
+loglevel = "info"
+accesslog = "-"
+errorlog = "-"
+
+# 关键生产参数
+workers = 2                          # 先用 2 个（ML 场景推荐 1~4，根据 CPU 核数和内存调整）
+timeout = 300                        # 5 分钟，足够模型加载和慢推理
+graceful_timeout = 300
+preload_app = True                   # ★★★ 必须加！模型只加载一次，内存大幅节省
+
+# 防止内存泄漏（ML 服务常见问题）
+max_requests = 1000
+max_requests_jitter = 100
+
+# 其他优化
+keepalive = 5
+worker_connections = 1000
\ No newline at end of file