TASK:冷启动热度推荐;

This commit is contained in:
shahaibo
2025-06-10 10:54:20 +08:00
parent a14e6051b1
commit d39dee851f
4 changed files with 400 additions and 16 deletions

View File

@@ -3,7 +3,10 @@ import logging
import sys
import time
from typing import List
import os
import json
import math
import random
import numpy as np
from apscheduler.schedulers.background import BackgroundScheduler
from apscheduler.triggers.cron import CronTrigger
@@ -31,18 +34,44 @@ async def startup_event():
scheduler.start()
logger.info("定时任务已启动")
def get_random_recommendations(category: str, num: int) -> List[str]:
"""全品类随机推荐"""
all_iids = list(matrix_data["iid_to_sketch"].keys())
# 优先从当前品类选择
category_iids = matrix_data["category_to_iids"].get(category, all_iids)
# 确保不超出实际数量
sample_size = min(num, len(category_iids))
sampled = np.random.choice(category_iids, size=sample_size, replace=False)
return [matrix_data["iid_to_sketch"][iid] for iid in sampled]
def softmax(scores):
max_score = max(scores)
exp_scores = [math.exp(s - max_score) for s in scores]
sum_exp = sum(exp_scores)
return [s / sum_exp for s in exp_scores]
@router.get("/recommend/{user_id}/{category}/{num_recommendations}", response_model=List[str])
async def get_recommendations(user_id: int, category: str, num_recommendations: int = 10):
def get_random_recommendations(category: str, num: int) -> List[str]:
"""根据预加载热度向量推荐(冷启动)"""
try:
heat_data = matrix_data.get("heat_data", {})
if category not in heat_data:
raise ValueError(f"热度数据缺少类别 {category},使用随机推荐")
heat_dict = heat_data[category] # {url: score}
urls = list(heat_dict.keys())
scores = list(heat_dict.values())
if not urls:
raise ValueError("该类别下无热度记录,使用随机推荐")
probs = softmax(scores)
sample_size = min(num, len(urls))
sampled_urls = random.choices(urls, weights=probs, k=sample_size)
return sampled_urls
except Exception as e:
# 回退:完全随机推荐
all_iids = list(matrix_data["iid_to_sketch"].keys())
category_iids = matrix_data["category_to_iids"].get(category, all_iids)
sample_size = min(num, len(category_iids))
sampled = np.random.choice(category_iids, size=sample_size, replace=False)
return [matrix_data["iid_to_sketch"][iid] for iid in sampled]
@router.get("/recommend/{user_id}/{category}/{num_recommendations}/{brand_id}/{brand_scale}", response_model=List[str])
async def get_recommendations(user_id: int, category: str, brand_id: int, brand_scale: float, num_recommendations: int = 10):
"""
:param user_id: 4
:param category: female_skirt
@@ -95,7 +124,7 @@ async def get_recommendations(user_id: int, category: str, num_recommendations:
raw_feat_scores = matrix_data["feature_matrix"][user_idx_feature, valid_sketch_idxs_feature]
raw_feat_scores = (raw_feat_scores - np.min(raw_feat_scores)) / (
np.max(raw_feat_scores) - np.min(raw_feat_scores) + 1e-8)
processed_feat = raw_feat_scores * 0.3
processed_feat = raw_feat_scores
else:
processed_feat = np.array([])
@@ -104,7 +133,22 @@ async def get_recommendations(user_id: int, category: str, num_recommendations:
matrix_data["cached_valid_idxs"][cache_key] = valid_sketch_idxs_inter
# 合并分数
final_scores = processed_inter + processed_feat
if brand_id is not None:
if brand_id is not None:
brand_idx_feature = matrix_data["brand_index_map"].get(brand_id)
if brand_idx_feature is not None and valid_sketch_idxs_feature:
raw_brand_feat_scores = matrix_data["brand_feature_matrix"][
brand_idx_feature, valid_sketch_idxs_feature]
raw_brand_feat_scores = (raw_brand_feat_scores - np.min(raw_brand_feat_scores)) / (
np.max(raw_brand_feat_scores) - np.min(raw_brand_feat_scores) + 1e-8)
processed_brand_feat = raw_brand_feat_scores
final_scores = processed_inter + 0.3 * ((1 - brand_scale) * processed_feat + brand_scale * processed_brand_feat)
else:
final_scores = processed_inter + 0.3 * processed_feat
else:
final_scores = processed_inter + 0.3 * processed_feat
else:
final_scores = processed_inter + 0.3 * processed_feat
valid_sketch_idxs = matrix_data["cached_valid_idxs"][cache_key]
# 概率采样