reconstruct whole recommendation pipeline and add new rec mode one-ask-for-all

This commit is contained in:
pangkaicheng
2025-12-12 17:37:07 +08:00
parent 0e9546aa1a
commit 85390d5e6d
12 changed files with 684 additions and 565 deletions

View File

@@ -5,7 +5,7 @@ from PIL import Image
import json
from tqdm import tqdm
from app.taxonomy import OCCASION, CATEGORY, ALL_CATEGORY
from app.taxonomy import OCCASION, FASHION_TAXONOMY, ALL_SUBCATEGORY_LIST
# data config
@@ -42,7 +42,7 @@ Description: Cut from cardinal-red virgin wool, Armarium's Loren skirt wields ta
Tags: armarium, clothing, in-stock, new, loren, wool, blend, tube
"""
EXAMPLE_1_JSON = json.dumps({
"category": "skirts",
"subcategory": "skirts",
"gender": "female",
"applicable_occasions": [
"Business/workwear", "Evening", "Cocktail / Semi-Formal", "Party / Clubbing", "Formal"
@@ -61,7 +61,7 @@ Description: Crafted from 18k yellow gold and rhodium-plated sterling silver, th
Tags: tateossian, accessories, in-stock, new, mayfair, yellow, gold, rhodium
"""
EXAMPLE_2_JSON = json.dumps({
"category": "jewelry",
"subcategory": "jewelry",
"gender": "female",
"applicable_occasions": [
"Formal", "Black Tie / White Tie", "Bridal / Wedding", "Business/workwear", "Cocktail / Semi-Formal"
@@ -94,20 +94,24 @@ def format_product_info(product):
return info
def generate_full_prompt(product_info, raw_category):
def raw_category_mapping(raw_category: str) -> str:
if raw_category == 'Fine Jewellery And Watches':
category = 'accessories'
return 'accessories'
else:
category = raw_category.lower()
subcategory_list = CATEGORY.get(category)
return raw_category.lower()
def generate_full_prompt(product_info, raw_category):
category = raw_category_mapping(raw_category)
subcategory_list = FASHION_TAXONOMY.get(category)
SYSTEM_PROMPT = f"""You are an expert fashion AI assistant. Your task is to analyze the provided product image and product details to:
1. determine the suitable occasions for wearing or using the item. You must choose occasions ONLY from the following strict list: {json.dumps(OCCASION, indent=4)}. Only relevant suitable or inappropriate occasions should be selected.
2. categorize it into suitable category in strict list: {json.dumps(subcategory_list)}.
2. categorize it into suitable subcategory in strict list: {json.dumps(subcategory_list)}.
3. categorize it into appropriate gender in ["female", "male", "unisex"]
Output Format:
Return ONLY a valid JSON object with four keys: "category", "gender", "applicable_occasions" and "inappropriate_occasions". Do not include any analysis or extra text outside of the final JSON object.
Return ONLY a valid JSON object with four keys: "subcategory", "gender", "applicable_occasions" and "inappropriate_occasions". Do not include any analysis or extra text outside of the final JSON object.
"""
# 组合对话序列
@@ -140,37 +144,36 @@ product_list = [
]
def validate_results():
if os.path.exists(OUTPUT_FILE):
with open(OUTPUT_FILE, 'r') as f:
final_results = json.load(f)
else:
final_results = {}
def validate_result(result_dict):
subcategory = result_dict.get("subcategory")
gender = result_dict.get("gender")
unfinished_ids = []
for product in product_list:
item_id = product.get('id')
if item_id not in final_results.keys():
unfinished_ids.append(product)
else:
processed_item = final_results[item_id]
category = processed_item.get("category")
gender = processed_item.get("gender")
if not subcategory or not gender:
return False
if subcategory not in ALL_SUBCATEGORY_LIST:
return False
if category not in ALL_CATEGORY:
unfinished_ids.append(product)
if gender not in ['female', 'male', 'unisex']:
return False
return True
if os.path.exists(OUTPUT_FILE):
with open(OUTPUT_FILE, 'r') as f:
final_results = json.load(f)
else:
final_results = {}
if gender not in ['female', 'male', 'unisex']:
unfinished_ids.append(product)
return unfinished_ids, final_results
attemps = 0
while attemps < 3:
unfinished_products = [product for product in product_list if product.get('id') not in final_results.keys()]
attemps += 1
unfinished_products, final_results = validate_results()
completion_ratio = len(unfinished_products) / len(product_list)
if (completion_ratio > 0.95):
print("valid results surpass 95%. Finish Now.")
completion_ratio = len(final_results) / len(product_list)
if (completion_ratio > 0.85):
print("valid results surpass 85%. Finish Now.")
break
else:
print(f"Start {attemps} categorization process. Current ratio: {completion_ratio * 100}%")
@@ -252,11 +255,11 @@ while attemps < 3:
json_str = generated_text[start_idx:end_idx]
result_dict = json.loads(json_str)
final_results[product_id] = result_dict
if validate_result(result_dict):
final_results[product_id] = result_dict
except Exception as e:
print(f"ID {product_id}: FAILED to parse JSON. Raw Output: {generated_text.strip()}")
final_results[product_id] = {"error": str(e), "raw_output": generated_text.strip()}
# 显存清理(可选,但在长任务中推荐)
del inputs, outputs