TASK:冷启动热度推荐；

2025-06-10 10:54:20 +08:00
parent a14e6051b1
commit d39dee851f
4 changed files with 400 additions and 16 deletions
--- a/app/api/api_recommendation.py
+++ b/app/api/api_recommendation.py
@@ -3,7 +3,10 @@ import logging
 import sys
 import time
 from typing import List
-
+import os
+import json
+import math
+import random
 import numpy as np
 from apscheduler.schedulers.background import BackgroundScheduler
 from apscheduler.triggers.cron import CronTrigger
@@ -31,18 +34,44 @@ async def startup_event():
    scheduler.start()
    logger.info("定时任务已启动")

-def get_random_recommendations(category: str, num: int) -> List[str]:
-    """全品类随机推荐"""
-    all_iids = list(matrix_data["iid_to_sketch"].keys())
-    # 优先从当前品类选择
-    category_iids = matrix_data["category_to_iids"].get(category, all_iids)
-    # 确保不超出实际数量
-    sample_size = min(num, len(category_iids))
-    sampled = np.random.choice(category_iids, size=sample_size, replace=False)
-    return [matrix_data["iid_to_sketch"][iid] for iid in sampled]
+def softmax(scores):
+    max_score = max(scores)
+    exp_scores = [math.exp(s - max_score) for s in scores]
+    sum_exp = sum(exp_scores)
+    return [s / sum_exp for s in exp_scores]

-@router.get("/recommend/{user_id}/{category}/{num_recommendations}", response_model=List[str])
-async def get_recommendations(user_id: int, category: str, num_recommendations: int = 10):
+def get_random_recommendations(category: str, num: int) -> List[str]:
+    """根据预加载热度向量推荐（冷启动）"""
+    try:
+        heat_data = matrix_data.get("heat_data", {})
+
+        if category not in heat_data:
+            raise ValueError(f"热度数据缺少类别 {category}，使用随机推荐")
+
+        heat_dict = heat_data[category]  # {url: score}
+        urls = list(heat_dict.keys())
+        scores = list(heat_dict.values())
+
+        if not urls:
+            raise ValueError("该类别下无热度记录，使用随机推荐")
+
+        probs = softmax(scores)
+        sample_size = min(num, len(urls))
+        sampled_urls = random.choices(urls, weights=probs, k=sample_size)
+
+        return sampled_urls
+
+    except Exception as e:
+        # 回退：完全随机推荐
+        all_iids = list(matrix_data["iid_to_sketch"].keys())
+        category_iids = matrix_data["category_to_iids"].get(category, all_iids)
+        sample_size = min(num, len(category_iids))
+        sampled = np.random.choice(category_iids, size=sample_size, replace=False)
+        return [matrix_data["iid_to_sketch"][iid] for iid in sampled]
+
+
+@router.get("/recommend/{user_id}/{category}/{num_recommendations}/{brand_id}/{brand_scale}", response_model=List[str])
+async def get_recommendations(user_id: int, category: str, brand_id: int, brand_scale: float, num_recommendations: int = 10):
    """
    :param user_id: 4
    :param category: female_skirt
@@ -95,7 +124,7 @@ async def get_recommendations(user_id: int, category: str, num_recommendations:
                raw_feat_scores = matrix_data["feature_matrix"][user_idx_feature, valid_sketch_idxs_feature]
                raw_feat_scores = (raw_feat_scores - np.min(raw_feat_scores)) / (
                        np.max(raw_feat_scores) - np.min(raw_feat_scores) + 1e-8)
-                processed_feat = raw_feat_scores * 0.3
+                processed_feat = raw_feat_scores
            else:
                processed_feat = np.array([])

@@ -104,7 +133,22 @@ async def get_recommendations(user_id: int, category: str, num_recommendations:
            matrix_data["cached_valid_idxs"][cache_key] = valid_sketch_idxs_inter

        # 合并分数
-        final_scores = processed_inter + processed_feat
+        if brand_id is not None:
+            if brand_id is not None:
+                brand_idx_feature = matrix_data["brand_index_map"].get(brand_id)
+                if brand_idx_feature is not None and valid_sketch_idxs_feature:
+                    raw_brand_feat_scores = matrix_data["brand_feature_matrix"][
+                        brand_idx_feature, valid_sketch_idxs_feature]
+                    raw_brand_feat_scores = (raw_brand_feat_scores - np.min(raw_brand_feat_scores)) / (
+                            np.max(raw_brand_feat_scores) - np.min(raw_brand_feat_scores) + 1e-8)
+                    processed_brand_feat = raw_brand_feat_scores
+                    final_scores = processed_inter + 0.3 * ((1 - brand_scale) * processed_feat + brand_scale * processed_brand_feat)
+                else:
+                    final_scores = processed_inter + 0.3 * processed_feat
+            else:
+                final_scores = processed_inter + 0.3 * processed_feat
+        else:
+            final_scores = processed_inter + 0.3 * processed_feat
        valid_sketch_idxs = matrix_data["cached_valid_idxs"][cache_key]

        # 概率采样