feat : 代码梳理 移除所有敏感密钥 通过环境变量方式配置
All checks were successful
git commit AiDA python develop 分支构建部署 / scheduled_deploy (push) Has been skipped
All checks were successful
git commit AiDA python develop 分支构建部署 / scheduled_deploy (push) Has been skipped
This commit is contained in:
@@ -18,7 +18,8 @@ import pandas as pd
|
||||
from datetime import datetime, timedelta
|
||||
import json
|
||||
|
||||
from app.core.config import DB_CONFIG, TABLE_CATEGORIES, RECOMMEND_PATH_PREFIX
|
||||
from app.core.config import TABLE_CATEGORIES, settings
|
||||
from app.core.mysql_config import DB_CONFIG
|
||||
|
||||
# 自动选择可用字体
|
||||
try:
|
||||
@@ -51,7 +52,7 @@ minio_client = Minio(
|
||||
)
|
||||
|
||||
# 预加载系统sketch特征向量
|
||||
SYSTEM_FEATURES = np.load(f'{RECOMMEND_PATH_PREFIX}sketch_feature_dict.npy', allow_pickle=True).item()
|
||||
SYSTEM_FEATURES = np.load(f'{settings.RECOMMEND_PATH_PREFIX}sketch_feature_dict.npy', allow_pickle=True).item()
|
||||
|
||||
# 行为权重和衰减系数
|
||||
BEHAVIOR_CONFIG = {
|
||||
@@ -61,6 +62,7 @@ BEHAVIOR_CONFIG = {
|
||||
'sketchLike': {'weight': 4, 'decay': 0} # 不衰减
|
||||
}
|
||||
|
||||
|
||||
# 保存sketch_to_iid到文件
|
||||
def save_sketch_to_iid():
|
||||
"""保存sketch到iid的映射"""
|
||||
@@ -147,11 +149,11 @@ def update_user_matrices():
|
||||
cursor = conn.cursor()
|
||||
|
||||
# 修改后的查询语句(移除category过滤)
|
||||
cursor.execute("""
|
||||
SELECT account_id, path, COUNT(*) as like_count
|
||||
FROM user_preference_log_test
|
||||
GROUP BY account_id, path
|
||||
""")
|
||||
cursor.execute("""
|
||||
SELECT account_id, path, COUNT(*) as like_count
|
||||
FROM user_preference_log_test
|
||||
GROUP BY account_id, path
|
||||
""")
|
||||
user_data = cursor.fetchall()
|
||||
logging.info(f"成功读取{len(user_data)}条用户偏好记录")
|
||||
|
||||
@@ -164,17 +166,17 @@ def update_user_matrices():
|
||||
feature_matrix, user_index_feature_matrix, sketch_index_feature_matrix, iid_to_category_feature_matrix = calculate_feature_matrix(user_data)
|
||||
# visualize_sparse_matrix(feature_matrix, '系统sketch与用户category平均特征向量关联度矩阵', 'correlation_matrix.png')
|
||||
# 存储矩阵
|
||||
np.save(f"{RECOMMEND_PATH_PREFIX}interaction_matrix.npy", interaction_matrix)
|
||||
np.save(f"{RECOMMEND_PATH_PREFIX}feature_matrix.npy", feature_matrix)
|
||||
np.save(f"{settings.RECOMMEND_PATH_PREFIX}interaction_matrix.npy", interaction_matrix)
|
||||
np.save(f"{settings.RECOMMEND_PATH_PREFIX}feature_matrix.npy", feature_matrix)
|
||||
#
|
||||
np.save(f"{RECOMMEND_PATH_PREFIX}iid_to_category_interaction_matrix.npy", iid_to_category_interaction_matrix)
|
||||
np.save(f"{RECOMMEND_PATH_PREFIX}user_index_interaction_matrix.npy", user_index_interaction_matrix)
|
||||
np.save(f"{settings.RECOMMEND_PATH_PREFIX}iid_to_category_interaction_matrix.npy", iid_to_category_interaction_matrix)
|
||||
np.save(f"{settings.RECOMMEND_PATH_PREFIX}user_index_interaction_matrix.npy", user_index_interaction_matrix)
|
||||
#
|
||||
np.save(f"{RECOMMEND_PATH_PREFIX}iid_to_category_feature_matrix.npy", iid_to_category_feature_matrix)
|
||||
np.save(f"{RECOMMEND_PATH_PREFIX}user_index_feature_matrix.npy", user_index_feature_matrix)
|
||||
np.save(f"{settings.RECOMMEND_PATH_PREFIX}iid_to_category_feature_matrix.npy", iid_to_category_feature_matrix)
|
||||
np.save(f"{settings.RECOMMEND_PATH_PREFIX}user_index_feature_matrix.npy", user_index_feature_matrix)
|
||||
#
|
||||
np.save(f"{RECOMMEND_PATH_PREFIX}sketch_index_interaction_matrix.npy", sketch_index_interaction_matrix)
|
||||
np.save(f"{RECOMMEND_PATH_PREFIX}sketch_index_feature_matrix.npy", sketch_index_feature_matrix)
|
||||
np.save(f"{settings.RECOMMEND_PATH_PREFIX}sketch_index_interaction_matrix.npy", sketch_index_interaction_matrix)
|
||||
np.save(f"{settings.RECOMMEND_PATH_PREFIX}sketch_index_feature_matrix.npy", sketch_index_feature_matrix)
|
||||
# logging.info("矩阵更新完成")
|
||||
|
||||
except Exception as e:
|
||||
@@ -235,6 +237,7 @@ def plot_interaction_count_matrix(interaction_count_matrix):
|
||||
except Exception as e:
|
||||
logging.error(f"绘图失败: {str(e)}", exc_info=True)
|
||||
|
||||
|
||||
def visualize_sparse_matrix(matrix, title='Non-zero Interactions (Scatter Plot)', filename="scatter_figure_interaction.png"):
|
||||
if not sparse.issparse(matrix):
|
||||
# 转换为稀疏矩阵
|
||||
@@ -253,6 +256,7 @@ def visualize_sparse_matrix(matrix, title='Non-zero Interactions (Scatter Plot)'
|
||||
plt.ylabel('Item Index')
|
||||
plt.savefig(filename)
|
||||
|
||||
|
||||
def calculate_interaction_matrix(user_data):
|
||||
"""基于新表结构的交互次数矩阵计算(仅系统sketch)"""
|
||||
# 获取所有用户ID
|
||||
@@ -475,6 +479,7 @@ def calculate_heat(row, current_date):
|
||||
# 计算热度值 = 权重 * e^(-衰减系数 * 天数)
|
||||
return config['weight'] * np.exp(-config['decay'] * days_passed)
|
||||
|
||||
|
||||
def load_heat_matrix_as_array(file_path):
|
||||
"""
|
||||
直接加载为二维numpy数组
|
||||
@@ -484,10 +489,11 @@ def load_heat_matrix_as_array(file_path):
|
||||
saved = json.load(f)
|
||||
return (
|
||||
np.array(saved['data']), # 二维矩阵
|
||||
saved['row_labels'], # 行标签列表
|
||||
saved['col_labels'] # 列标签列表
|
||||
saved['row_labels'], # 行标签列表
|
||||
saved['col_labels'] # 列标签列表
|
||||
)
|
||||
|
||||
|
||||
def update_heat_matrices():
|
||||
"""每日计算并存储热度矩阵(gender_category × path)"""
|
||||
current_date = datetime.now()
|
||||
|
||||
@@ -1,240 +1,241 @@
|
||||
# # 预加载资源
|
||||
# import logging
|
||||
# import time
|
||||
# from collections import defaultdict
|
||||
# import os
|
||||
# import json
|
||||
# import numpy as np
|
||||
#
|
||||
# from app.core.config import DB_CONFIG, RECOMMEND_PATH_PREFIX
|
||||
#
|
||||
# logger = logging.getLogger()
|
||||
# import pymysql
|
||||
# from concurrent.futures import ThreadPoolExecutor
|
||||
#
|
||||
# HEAT_VECTOR_FILE = 'heat_vectors_data/heat_vectors.json' # 可动态加载或配置
|
||||
#
|
||||
# matrix_data = {
|
||||
# "interaction_matrix": None,
|
||||
# "feature_matrix": None,
|
||||
# "user_index_interaction": None,
|
||||
# "sketch_index_interaction": None,
|
||||
# "user_index_feature": None,
|
||||
# "sketch_index_feature": None,
|
||||
# "iid_to_sketch": None,
|
||||
# "category_to_iids": None,
|
||||
# "cached_scores": {},
|
||||
# "cached_valid_idxs": {},
|
||||
# "category_sketch_idxs_inter": None,
|
||||
# "category_sketch_idxs_feature": None,
|
||||
# "user_inter_full": dict(),
|
||||
# "user_feat_full": dict(),
|
||||
# "brand_feature_matrix": None,
|
||||
# "brand_index_map": None,
|
||||
# "heat_data": {},
|
||||
# }
|
||||
#
|
||||
#
|
||||
# def load_resources():
|
||||
# """加载所有矩阵和映射关系,并触发预缓存"""
|
||||
# try:
|
||||
# start_time = time.time()
|
||||
#
|
||||
# # 清空缓存
|
||||
# matrix_data["cached_scores"].clear()
|
||||
# matrix_data["cached_valid_idxs"].clear()
|
||||
#
|
||||
# # 加载数据
|
||||
# sketch_to_iid = np.load(f'{RECOMMEND_PATH_PREFIX}sketch_to_iid.npy', allow_pickle=True).item()
|
||||
# matrix_data["iid_to_sketch"] = {v: k for k, v in sketch_to_iid.items()}
|
||||
#
|
||||
# matrix_data["interaction_matrix"] = np.load(f"{RECOMMEND_PATH_PREFIX}interaction_matrix.npy", allow_pickle=True)
|
||||
# matrix_data["user_index_interaction"] = np.load(f"{RECOMMEND_PATH_PREFIX}user_index_interaction_matrix.npy", allow_pickle=True).item()
|
||||
# matrix_data["sketch_index_interaction"] = np.load(f"{RECOMMEND_PATH_PREFIX}sketch_index_interaction_matrix.npy",
|
||||
# allow_pickle=True).item()
|
||||
#
|
||||
# matrix_data["feature_matrix"] = np.load(f"{RECOMMEND_PATH_PREFIX}feature_matrix.npy", allow_pickle=True)
|
||||
#
|
||||
# brand_feature_path = f"{RECOMMEND_PATH_PREFIX}brand_feature_matrix.npy"
|
||||
# if os.path.exists(brand_feature_path):
|
||||
# matrix_data["brand_feature_matrix"] = np.load(brand_feature_path, allow_pickle=True)
|
||||
# else:
|
||||
# logger.warning("brand_feature_matrix 文件不存在,使用空数组")
|
||||
# matrix_data["brand_feature_matrix"] = np.array([])
|
||||
#
|
||||
# # brand_index_map
|
||||
# brand_index_path = f"{RECOMMEND_PATH_PREFIX}brand_index_map.npy"
|
||||
# if os.path.exists(brand_index_path):
|
||||
# matrix_data["brand_index_map"] = np.load(brand_index_path, allow_pickle=True).item()
|
||||
# else:
|
||||
# logger.warning("brand_index_map 文件不存在,使用空字典")
|
||||
# matrix_data["brand_index_map"] = {}
|
||||
#
|
||||
# matrix_data["user_index_feature"] = np.load(f"{RECOMMEND_PATH_PREFIX}user_index_feature_matrix.npy", allow_pickle=True).item()
|
||||
#
|
||||
# matrix_data["sketch_index_feature"] = np.load(f"{RECOMMEND_PATH_PREFIX}sketch_index_feature_matrix.npy", allow_pickle=True).item()
|
||||
#
|
||||
# category_to_iid_map = np.load(f"{RECOMMEND_PATH_PREFIX}iid_to_category_interaction_matrix.npy", allow_pickle=True).item()
|
||||
# matrix_data["category_to_iids"] = defaultdict(list)
|
||||
# for iid, cat in category_to_iid_map.items():
|
||||
# matrix_data["category_to_iids"][cat].append(iid)
|
||||
#
|
||||
# logger.info(f"资源加载完成,耗时: {time.time() - start_time:.2f}秒")
|
||||
#
|
||||
# # 触发预缓存
|
||||
# precache_user_category()
|
||||
#
|
||||
# if os.path.exists(HEAT_VECTOR_FILE):
|
||||
# with open(HEAT_VECTOR_FILE, 'r', encoding='utf-8') as f:
|
||||
# heat_json = json.load(f)
|
||||
# matrix_data["heat_data"] = heat_json.get("data", {})
|
||||
# logger.info(f"热度向量数据加载完成,共加载 {len(matrix_data['heat_data'])} 个类别")
|
||||
# else:
|
||||
# matrix_data["heat_data"] = {}
|
||||
#
|
||||
# except Exception as e:
|
||||
# logger.error(f"资源加载失败: {str(e)}")
|
||||
# raise RuntimeError("初始化失败")
|
||||
#
|
||||
#
|
||||
# def precache_user_category():
|
||||
# """优化后的用户分类预缓存(添加耗时统计)"""
|
||||
# if not all([
|
||||
# matrix_data["interaction_matrix"] is not None,
|
||||
# matrix_data["feature_matrix"] is not None,
|
||||
# matrix_data["user_index_interaction"] is not None
|
||||
# ]):
|
||||
# logger.warning("资源未加载完成,跳过预缓存")
|
||||
# return
|
||||
#
|
||||
# start_time = time.perf_counter()
|
||||
# time_stats = {
|
||||
# "get_all_user_categories": 0,
|
||||
# "process_user_category": 0,
|
||||
# "thread_execution": 0,
|
||||
# "cache_update": 0,
|
||||
# "total": 0,
|
||||
# }
|
||||
#
|
||||
# # 统计用户类别获取时间
|
||||
# t1 = time.perf_counter()
|
||||
# user_categories = get_all_user_categories()
|
||||
# time_stats["get_all_user_categories"] = time.perf_counter() - t1
|
||||
#
|
||||
# precached_count = 0
|
||||
#
|
||||
# def process_user_category(user_id, categories):
|
||||
# """单用户类别缓存计算(统计耗时)"""
|
||||
# local_cache = {}
|
||||
# local_valid_idxs = {}
|
||||
# t_start = time.perf_counter()
|
||||
#
|
||||
# for category in categories:
|
||||
# cache_key = (user_id, category)
|
||||
# if cache_key in matrix_data["cached_scores"]:
|
||||
# continue
|
||||
#
|
||||
# try:
|
||||
# user_idx_inter = matrix_data["user_index_interaction"].get(user_id)
|
||||
# user_idx_feature = matrix_data["user_index_feature"].get(user_id)
|
||||
#
|
||||
# # 统计获取类别 IID 耗时
|
||||
# t_iid = time.perf_counter()
|
||||
# category_iids = matrix_data["category_to_iids"].get(category, [])
|
||||
# valid_sketch_idxs_inter = [matrix_data["sketch_index_interaction"][iid]
|
||||
# for iid in category_iids if iid in matrix_data["sketch_index_interaction"]]
|
||||
# valid_sketch_idxs_feature = [matrix_data["sketch_index_feature"][iid]
|
||||
# for iid in category_iids if iid in matrix_data["sketch_index_feature"]]
|
||||
# time_stats["process_user_category"] += time.perf_counter() - t_iid
|
||||
#
|
||||
# # 统计矩阵计算耗时
|
||||
# t_matrix = time.perf_counter()
|
||||
# processed_inter = np.zeros(len(valid_sketch_idxs_inter))
|
||||
# if user_idx_inter is not None and valid_sketch_idxs_inter:
|
||||
# raw_inter_scores = matrix_data["interaction_matrix"][user_idx_inter, valid_sketch_idxs_inter]
|
||||
# processed_inter = raw_inter_scores * 0.7
|
||||
#
|
||||
# processed_feat = np.zeros(len(valid_sketch_idxs_feature))
|
||||
# if user_idx_feature is not None and valid_sketch_idxs_feature:
|
||||
# raw_feat_scores = matrix_data["feature_matrix"][user_idx_feature, valid_sketch_idxs_feature]
|
||||
# raw_feat_scores = (raw_feat_scores - np.min(raw_feat_scores)) / (
|
||||
# np.max(raw_feat_scores) - np.min(raw_feat_scores) + 1e-8)
|
||||
# processed_feat = raw_feat_scores * 0.3
|
||||
# time_stats["process_user_category"] += time.perf_counter() - t_matrix
|
||||
#
|
||||
# if len(processed_inter) == len(processed_feat):
|
||||
# local_cache[cache_key] = (processed_inter, processed_feat)
|
||||
# local_valid_idxs[cache_key] = valid_sketch_idxs_inter
|
||||
#
|
||||
# except Exception as e:
|
||||
# logger.error(f"预缓存失败 (user={user_id}, category={category}): {str(e)}")
|
||||
#
|
||||
# return local_cache, local_valid_idxs
|
||||
#
|
||||
# # 统计线程执行时间
|
||||
# t2 = time.perf_counter()
|
||||
# with ThreadPoolExecutor(max_workers=8) as executor:
|
||||
# futures = {executor.submit(process_user_category, user_id, categories): user_id for user_id, categories in user_categories.items()}
|
||||
# for future in futures:
|
||||
# try:
|
||||
# t_cache = time.perf_counter()
|
||||
# cache_part, valid_idxs_part = future.result()
|
||||
# matrix_data["cached_scores"].update(cache_part)
|
||||
# matrix_data["cached_valid_idxs"].update(valid_idxs_part)
|
||||
# time_stats["cache_update"] += time.perf_counter() - t_cache
|
||||
# precached_count += len(cache_part)
|
||||
# except Exception as e:
|
||||
# logger.error(f"线程执行错误: {str(e)}")
|
||||
# time_stats["thread_execution"] = time.perf_counter() - t2
|
||||
#
|
||||
# time_stats["total"] = time.perf_counter() - start_time
|
||||
#
|
||||
# # 输出统计信息
|
||||
# logger.info(f"""
|
||||
# 预缓存完成,共缓存 {precached_count} 组数据,耗时统计如下:
|
||||
# - 获取用户类别数据: {time_stats["get_all_user_categories"]:.2f}s
|
||||
# - 计算用户类别缓存: {time_stats["process_user_category"]:.2f}s
|
||||
# - 线程任务执行: {time_stats["thread_execution"]:.2f}s
|
||||
# - 更新缓存数据: {time_stats["cache_update"]:.2f}s
|
||||
# - 总耗时: {time_stats["total"]:.2f}s
|
||||
# """)
|
||||
#
|
||||
#
|
||||
# def get_all_user_categories():
|
||||
# """获取所有用户及其对应的分类"""
|
||||
# conn = None
|
||||
# try:
|
||||
# conn = pymysql.connect(**DB_CONFIG)
|
||||
# cursor = conn.cursor()
|
||||
#
|
||||
# query = """
|
||||
# SELECT DISTINCT account_id, path
|
||||
# FROM user_preference_log_prediction
|
||||
# """
|
||||
# cursor.execute(query)
|
||||
# results = cursor.fetchall()
|
||||
#
|
||||
# user_categories = defaultdict(set)
|
||||
# for account_id, path in results:
|
||||
# category = get_category_from_path(path)
|
||||
# user_categories[account_id].add(category)
|
||||
#
|
||||
# return dict(user_categories)
|
||||
#
|
||||
# except Exception as e:
|
||||
# logger.error(f"数据库查询失败: {str(e)}")
|
||||
# return {}
|
||||
# finally:
|
||||
# if conn:
|
||||
# conn.close()
|
||||
#
|
||||
#
|
||||
# def get_category_from_path(path: str) -> str:
|
||||
# """从路径解析类别"""
|
||||
# try:
|
||||
# parts = path.split('/')
|
||||
# if len(parts) >= 4:
|
||||
# return f"{parts[2]}_{parts[3]}"
|
||||
# return "unknown"
|
||||
# except:
|
||||
# return "unknown"
|
||||
# 预加载资源
|
||||
import logging
|
||||
import time
|
||||
from collections import defaultdict
|
||||
import os
|
||||
import json
|
||||
import numpy as np
|
||||
|
||||
from app.core.config import settings
|
||||
from app.core.mysql_config import DB_CONFIG
|
||||
|
||||
logger = logging.getLogger()
|
||||
import pymysql
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
|
||||
HEAT_VECTOR_FILE = 'heat_vectors_data/heat_vectors.json' # 可动态加载或配置
|
||||
|
||||
matrix_data = {
|
||||
"interaction_matrix": None,
|
||||
"feature_matrix": None,
|
||||
"user_index_interaction": None,
|
||||
"sketch_index_interaction": None,
|
||||
"user_index_feature": None,
|
||||
"sketch_index_feature": None,
|
||||
"iid_to_sketch": None,
|
||||
"category_to_iids": None,
|
||||
"cached_scores": {},
|
||||
"cached_valid_idxs": {},
|
||||
"category_sketch_idxs_inter": None,
|
||||
"category_sketch_idxs_feature": None,
|
||||
"user_inter_full": dict(),
|
||||
"user_feat_full": dict(),
|
||||
"brand_feature_matrix": None,
|
||||
"brand_index_map": None,
|
||||
"heat_data": {},
|
||||
}
|
||||
|
||||
|
||||
def load_resources():
|
||||
"""加载所有矩阵和映射关系,并触发预缓存"""
|
||||
try:
|
||||
start_time = time.time()
|
||||
|
||||
# 清空缓存
|
||||
matrix_data["cached_scores"].clear()
|
||||
matrix_data["cached_valid_idxs"].clear()
|
||||
|
||||
# 加载数据
|
||||
sketch_to_iid = np.load(f'{settings.RECOMMEND_PATH_PREFIX}sketch_to_iid.npy', allow_pickle=True).item()
|
||||
matrix_data["iid_to_sketch"] = {v: k for k, v in sketch_to_iid.items()}
|
||||
|
||||
matrix_data["interaction_matrix"] = np.load(f"{settings.RECOMMEND_PATH_PREFIX}interaction_matrix.npy", allow_pickle=True)
|
||||
matrix_data["user_index_interaction"] = np.load(f"{settings.RECOMMEND_PATH_PREFIX}user_index_interaction_matrix.npy", allow_pickle=True).item()
|
||||
matrix_data["sketch_index_interaction"] = np.load(f"{settings.RECOMMEND_PATH_PREFIX}sketch_index_interaction_matrix.npy",
|
||||
allow_pickle=True).item()
|
||||
|
||||
matrix_data["feature_matrix"] = np.load(f"{settings.RECOMMEND_PATH_PREFIX}feature_matrix.npy", allow_pickle=True)
|
||||
|
||||
brand_feature_path = f"{settings.RECOMMEND_PATH_PREFIX}brand_feature_matrix.npy"
|
||||
if os.path.exists(brand_feature_path):
|
||||
matrix_data["brand_feature_matrix"] = np.load(brand_feature_path, allow_pickle=True)
|
||||
else:
|
||||
logger.warning("brand_feature_matrix 文件不存在,使用空数组")
|
||||
matrix_data["brand_feature_matrix"] = np.array([])
|
||||
|
||||
# brand_index_map
|
||||
brand_index_path = f"{settings.RECOMMEND_PATH_PREFIX}brand_index_map.npy"
|
||||
if os.path.exists(brand_index_path):
|
||||
matrix_data["brand_index_map"] = np.load(brand_index_path, allow_pickle=True).item()
|
||||
else:
|
||||
logger.warning("brand_index_map 文件不存在,使用空字典")
|
||||
matrix_data["brand_index_map"] = {}
|
||||
|
||||
matrix_data["user_index_feature"] = np.load(f"{settings.RECOMMEND_PATH_PREFIX}user_index_feature_matrix.npy", allow_pickle=True).item()
|
||||
|
||||
matrix_data["sketch_index_feature"] = np.load(f"{settings.RECOMMEND_PATH_PREFIX}sketch_index_feature_matrix.npy", allow_pickle=True).item()
|
||||
|
||||
category_to_iid_map = np.load(f"{settings.RECOMMEND_PATH_PREFIX}iid_to_category_interaction_matrix.npy", allow_pickle=True).item()
|
||||
matrix_data["category_to_iids"] = defaultdict(list)
|
||||
for iid, cat in category_to_iid_map.items():
|
||||
matrix_data["category_to_iids"][cat].append(iid)
|
||||
|
||||
logger.info(f"资源加载完成,耗时: {time.time() - start_time:.2f}秒")
|
||||
|
||||
# 触发预缓存
|
||||
precache_user_category()
|
||||
|
||||
if os.path.exists(HEAT_VECTOR_FILE):
|
||||
with open(HEAT_VECTOR_FILE, 'r', encoding='utf-8') as f:
|
||||
heat_json = json.load(f)
|
||||
matrix_data["heat_data"] = heat_json.get("data", {})
|
||||
logger.info(f"热度向量数据加载完成,共加载 {len(matrix_data['heat_data'])} 个类别")
|
||||
else:
|
||||
matrix_data["heat_data"] = {}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"资源加载失败: {str(e)}")
|
||||
raise RuntimeError("初始化失败")
|
||||
|
||||
|
||||
def precache_user_category():
|
||||
"""优化后的用户分类预缓存(添加耗时统计)"""
|
||||
if not all([
|
||||
matrix_data["interaction_matrix"] is not None,
|
||||
matrix_data["feature_matrix"] is not None,
|
||||
matrix_data["user_index_interaction"] is not None
|
||||
]):
|
||||
logger.warning("资源未加载完成,跳过预缓存")
|
||||
return
|
||||
|
||||
start_time = time.perf_counter()
|
||||
time_stats = {
|
||||
"get_all_user_categories": 0,
|
||||
"process_user_category": 0,
|
||||
"thread_execution": 0,
|
||||
"cache_update": 0,
|
||||
"total": 0,
|
||||
}
|
||||
|
||||
# 统计用户类别获取时间
|
||||
t1 = time.perf_counter()
|
||||
user_categories = get_all_user_categories()
|
||||
time_stats["get_all_user_categories"] = time.perf_counter() - t1
|
||||
|
||||
precached_count = 0
|
||||
|
||||
def process_user_category(user_id, categories):
|
||||
"""单用户类别缓存计算(统计耗时)"""
|
||||
local_cache = {}
|
||||
local_valid_idxs = {}
|
||||
time.perf_counter()
|
||||
|
||||
for category in categories:
|
||||
cache_key = (user_id, category)
|
||||
if cache_key in matrix_data["cached_scores"]:
|
||||
continue
|
||||
|
||||
try:
|
||||
user_idx_inter = matrix_data["user_index_interaction"].get(user_id)
|
||||
user_idx_feature = matrix_data["user_index_feature"].get(user_id)
|
||||
|
||||
# 统计获取类别 IID 耗时
|
||||
t_iid = time.perf_counter()
|
||||
category_iids = matrix_data["category_to_iids"].get(category, [])
|
||||
valid_sketch_idxs_inter = [matrix_data["sketch_index_interaction"][iid]
|
||||
for iid in category_iids if iid in matrix_data["sketch_index_interaction"]]
|
||||
valid_sketch_idxs_feature = [matrix_data["sketch_index_feature"][iid]
|
||||
for iid in category_iids if iid in matrix_data["sketch_index_feature"]]
|
||||
time_stats["process_user_category"] += time.perf_counter() - t_iid
|
||||
|
||||
# 统计矩阵计算耗时
|
||||
t_matrix = time.perf_counter()
|
||||
processed_inter = np.zeros(len(valid_sketch_idxs_inter))
|
||||
if user_idx_inter is not None and valid_sketch_idxs_inter:
|
||||
raw_inter_scores = matrix_data["interaction_matrix"][user_idx_inter, valid_sketch_idxs_inter]
|
||||
processed_inter = raw_inter_scores * 0.7
|
||||
|
||||
processed_feat = np.zeros(len(valid_sketch_idxs_feature))
|
||||
if user_idx_feature is not None and valid_sketch_idxs_feature:
|
||||
raw_feat_scores = matrix_data["feature_matrix"][user_idx_feature, valid_sketch_idxs_feature]
|
||||
raw_feat_scores = (raw_feat_scores - np.min(raw_feat_scores)) / (
|
||||
np.max(raw_feat_scores) - np.min(raw_feat_scores) + 1e-8)
|
||||
processed_feat = raw_feat_scores * 0.3
|
||||
time_stats["process_user_category"] += time.perf_counter() - t_matrix
|
||||
|
||||
if len(processed_inter) == len(processed_feat):
|
||||
local_cache[cache_key] = (processed_inter, processed_feat)
|
||||
local_valid_idxs[cache_key] = valid_sketch_idxs_inter
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"预缓存失败 (user={user_id}, category={category}): {str(e)}")
|
||||
|
||||
return local_cache, local_valid_idxs
|
||||
|
||||
# 统计线程执行时间
|
||||
t2 = time.perf_counter()
|
||||
with ThreadPoolExecutor(max_workers=8) as executor:
|
||||
futures = {executor.submit(process_user_category, user_id, categories): user_id for user_id, categories in user_categories.items()}
|
||||
for future in futures:
|
||||
try:
|
||||
t_cache = time.perf_counter()
|
||||
cache_part, valid_idxs_part = future.result()
|
||||
matrix_data["cached_scores"].update(cache_part)
|
||||
matrix_data["cached_valid_idxs"].update(valid_idxs_part)
|
||||
time_stats["cache_update"] += time.perf_counter() - t_cache
|
||||
precached_count += len(cache_part)
|
||||
except Exception as e:
|
||||
logger.error(f"线程执行错误: {str(e)}")
|
||||
time_stats["thread_execution"] = time.perf_counter() - t2
|
||||
|
||||
time_stats["total"] = time.perf_counter() - start_time
|
||||
|
||||
# 输出统计信息
|
||||
logger.info(f"""
|
||||
预缓存完成,共缓存 {precached_count} 组数据,耗时统计如下:
|
||||
- 获取用户类别数据: {time_stats["get_all_user_categories"]:.2f}s
|
||||
- 计算用户类别缓存: {time_stats["process_user_category"]:.2f}s
|
||||
- 线程任务执行: {time_stats["thread_execution"]:.2f}s
|
||||
- 更新缓存数据: {time_stats["cache_update"]:.2f}s
|
||||
- 总耗时: {time_stats["total"]:.2f}s
|
||||
""")
|
||||
|
||||
|
||||
def get_all_user_categories():
|
||||
"""获取所有用户及其对应的分类"""
|
||||
conn = None
|
||||
try:
|
||||
conn = pymysql.connect(**DB_CONFIG)
|
||||
cursor = conn.cursor()
|
||||
|
||||
query = """
|
||||
SELECT DISTINCT account_id, path
|
||||
FROM user_preference_log_prediction \
|
||||
"""
|
||||
cursor.execute(query)
|
||||
results = cursor.fetchall()
|
||||
|
||||
user_categories = defaultdict(set)
|
||||
for account_id, path in results:
|
||||
category = get_category_from_path(path)
|
||||
user_categories[account_id].add(category)
|
||||
|
||||
return dict(user_categories)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"数据库查询失败: {str(e)}")
|
||||
return {}
|
||||
finally:
|
||||
if conn:
|
||||
conn.close()
|
||||
|
||||
|
||||
def get_category_from_path(path: str) -> str:
|
||||
"""从路径解析类别"""
|
||||
try:
|
||||
parts = path.split('/')
|
||||
if len(parts) >= 4:
|
||||
return f"{parts[2]}_{parts[3]}"
|
||||
return "unknown"
|
||||
except:
|
||||
return "unknown"
|
||||
|
||||
Reference in New Issue
Block a user