From 85390d5e6dd63697a2bb45f8be0682b4d87d3402 Mon Sep 17 00:00:00 2001 From: pangkaicheng <924366729@qq.com> Date: Fri, 12 Dec 2025 17:37:07 +0800 Subject: [PATCH] reconstruct whole recommendation pipeline and add new rec mode one-ask-for-all --- app/config.py | 1 - app/server/ChatbotAgent/agent_server.py | 65 ++- app/server/ChatbotAgent/chatbot_server.py | 4 +- app/server/ChatbotAgent/core/prompt.py | 219 ++++++++ .../ChatbotAgent/core/stylist_agent_server.py | 468 +++++++++--------- app/server/ChatbotAgent/core/system_prompt.py | 32 -- .../ChatbotAgent/core/vector_database.py | 23 +- app/server/utils/img_operation.py | 142 +----- app/taxonomy.py | 91 ++-- data_ingestion/README.md | 59 +-- data_ingestion/process_item.py | 73 +-- data_ingestion/run_ingestion.py | 72 +-- 12 files changed, 684 insertions(+), 565 deletions(-) create mode 100644 app/server/ChatbotAgent/core/prompt.py delete mode 100644 app/server/ChatbotAgent/core/system_prompt.py diff --git a/app/config.py b/app/config.py index af3f070..8ae278c 100644 --- a/app/config.py +++ b/app/config.py @@ -31,7 +31,6 @@ class Settings(BaseSettings): # 路径配置参数 DATA_ROOT: str = Field(default="/workspace/lc_stylist_agent/data", description="数据根目录") - LOCAL_IMAGE_DIR: str = Field(default="/workspace/lc_stylist_agent/Data/image_data", description="图片数据目录") OUTFIT_OUTPUT_DIR: str = Field(default="/workspace/lc_stylist_agent/data/outfit_output", description="生成的搭配图片输出目录") STYLIST_GUIDE_DIR: str = Field(default="/workspace/lc_stylist_agent/data/stylist_guide", description="风格指南文本目录") diff --git a/app/server/ChatbotAgent/agent_server.py b/app/server/ChatbotAgent/agent_server.py index cf8224d..8d357a2 100644 --- a/app/server/ChatbotAgent/agent_server.py +++ b/app/server/ChatbotAgent/agent_server.py @@ -4,6 +4,7 @@ import uuid from enum import Enum from typing import List from pydantic import Field +import time import litserve as ls from pydantic import BaseModel @@ -12,7 +13,7 @@ from app.server.ChatbotAgent.core.data_structure import Message, Role from app.server.ChatbotAgent.core.llm_interface import AsyncGeminiLLM from app.server.ChatbotAgent.core.redis_manager import RedisManager from app.server.ChatbotAgent.core.stylist_agent_server import AsyncStylistAgent -from app.server.ChatbotAgent.core.system_prompt import SUMMARY_PROMPT +from app.server.ChatbotAgent.core.prompt import SUMMARY_PROMPT from app.server.ChatbotAgent.core.vector_database import VectorDatabase logger = logging.getLogger(__name__) @@ -54,7 +55,6 @@ class AgentRequestModel(BaseModel): batch_sources: List[str] callback_url: str gender: str - max_len: int = 9 class LCAgent(ls.LitAPI): @@ -118,7 +118,6 @@ class LCAgent(ls.LitAPI): user_id=request.user_id, gender=request.gender, callback_url=request.callback_url, - max_len=request.max_len, outfit_ids=outfit_ids ) logger.info("--- Final Recommendation Results ---") @@ -162,9 +161,17 @@ class LCAgent(ls.LitAPI): return str(parsed_result.summary), [occ.value for occ in parsed_result.occasions] async def recommend_outfit( - self, request_summary: str, occasions: List[str], stylist_name: str, start_outfit: List = [], batch_sources: List[str] = [], - num_outfits: int = 1, user_id: str = "test", gender: str = "male", - callback_url: str = None, max_len: int = 9, outfit_ids=None + self, + request_summary: str, + occasions: List[str], + stylist_name: str, + start_outfit: List = [], + batch_sources: List[str] = [], + num_outfits: int = 1, + user_id: str = "test", + gender: str = "male", + callback_url: str = None, + outfit_ids=None ): """ 基于用户的对话历史和需求,推荐一套搭配。 @@ -181,17 +188,16 @@ class LCAgent(ls.LitAPI): stylist_agent_kwages = self.stylist_agent_kwages.copy() for i in range(num_outfits): stylist_agent_kwages['outfit_id'] = outfit_ids[i] - stylist_agent_kwages['max_len'] = max_len + stylist_agent_kwages['stylist_name'] = stylist_name + stylist_agent_kwages['gender'] = gender agent = AsyncStylistAgent(**stylist_agent_kwages) - task = agent.run_styling_process( + task = agent.run_iterative_styling( request_summary=request_summary, occasions=occasions, - stylist_name=stylist_name, start_outfit=start_outfit, batch_sources=batch_sources, user_id=user_id, callback_url=callback_url, - gender=gender, ) tasks.append(task) task_map[task] = {"outfit_id": outfit_ids[i], "retries": 0} @@ -223,11 +229,12 @@ class LCAgent(ls.LitAPI): # 重新创建任务 (可能需要短暂延迟,例如 time.sleep(1),但在此异步环境中,我们会通过重新创建 agent/task 来实现) stylist_agent_kwages['outfit_id'] = outfit_id + stylist_agent_kwages['stylist_name'] = stylist_name + stylist_agent_kwages['gender'] = gender agent = AsyncStylistAgent(**stylist_agent_kwages) - new_task = agent.run_styling_process( + new_task = agent.run_iterative_styling( request_summary=request_summary, occasions=occasions, - stylist_name=stylist_name, start_outfit=start_outfit, batch_sources=batch_sources, user_id=user_id, @@ -284,31 +291,45 @@ if __name__ == "__main__": with open("./data/2025_q4/request_test.json", "r") as f: request_data = json.load(f) - tasks = [] - for test_content in request_data[:30]: + tasks_with_metadata = [] + for test_content in request_data[20:25]: occasions = test_content['occasions'] request_summary = test_content['request_summary'] - stylist_agent_kwages['max_len'] = 5 for stylist_name in ["edi", "vera"]: stylist_agent_kwages['outfit_id'] = test_content['test_case_id'] + "_" + "_".join(occasions) + f"_{stylist_name}" + stylist_agent_kwages['stylist_name'] = stylist_name + stylist_agent_kwages['gender'] = "female" agent = AsyncStylistAgent(**stylist_agent_kwages) - task = agent.run_styling_process( + coro = agent.run_iterative_styling( + # coro = agent.run_quick_batch_styling( request_summary=request_summary, occasions=occasions, - stylist_name=stylist_name, start_outfit=[], batch_sources=["2025_q4"], user_id=test_content['test_case_id'], callback_url="http://mock-callback.com/result", - gender="female", ) - tasks.append(task) - - results = await asyncio.gather(*tasks, return_exceptions=True) - for result in results: + # 记录任务开始前的单调时间,并将元数据添加到列表中 + description = f"Batch mode - Case {test_content['test_case_id']} - Stylist {stylist_name}" + + tasks_with_metadata.append((coro, description)) + + tasks_only = [coro for coro, _ in tasks_with_metadata] + print(f"--- Launching {len(tasks_only)} concurrent styling tasks... ---") + results = await asyncio.gather(*tasks_only, return_exceptions=True) + + time_samples = [] + for i, result in enumerate(results): + coro, description = tasks_with_metadata[i] if isinstance(result, Exception): print(f"❌ 任务失败: {type(result).__name__} - {str(result)}") continue + else: + response, duration = result + time_samples.append(duration) + print(f"✅ 任务成功 ({description}) [Time: {duration:.2f}s].") + + print(f"Average time consumption is {sum(time_samples) / len(time_samples)}") try: # 使用 asyncio.run() 来执行顶层异步函数 diff --git a/app/server/ChatbotAgent/chatbot_server.py b/app/server/ChatbotAgent/chatbot_server.py index 87766b1..5cd5005 100644 --- a/app/server/ChatbotAgent/chatbot_server.py +++ b/app/server/ChatbotAgent/chatbot_server.py @@ -8,10 +8,8 @@ from app.config import settings from google.genai import types from app.server.ChatbotAgent.core.data_structure import Message, Role -from app.server.ChatbotAgent.core.llm_interface import AsyncGeminiLLM from app.server.ChatbotAgent.core.redis_manager import RedisManager -from app.server.ChatbotAgent.core.system_prompt import BASIC_PROMPT -from app.server.ChatbotAgent.core.vector_database import VectorDatabase +from app.server.ChatbotAgent.core.prompt import BASIC_PROMPT logger = logging.getLogger(__name__) diff --git a/app/server/ChatbotAgent/core/prompt.py b/app/server/ChatbotAgent/core/prompt.py new file mode 100644 index 0000000..0090b7c --- /dev/null +++ b/app/server/ChatbotAgent/core/prompt.py @@ -0,0 +1,219 @@ +BASIC_PROMPT = """You are a professional, friendly, and insightful AI {gender}'s styling assistant. + +Your primary mission is to engage in a multi-turn conversation with the user to fully understand their dressing intent. You must adopt a professional yet approachable tone. + +CONVERSATION GOALS: +1. **Occasion:** Determine the specific event (e.g., romantic dinner, summer wedding, business meeting). +2. **Style:** Pinpoint the desired aesthetic (e.g., classic elegance, edgy, minimalist, bohemian). +3. **Vibe/Details:** Gather any mood or specific constraints (e.g., needs to be comfortable, requires light colors, no bare shoulders). +4. **Item Preference:** Ask the user if they have any specific preferences for an item type or silhouette (e.g., preference for a dress, skirt, tailored pants, or a particular neckline/length). + +GUIDANCE FOR RESPONSE GENERATION: +- After the user's initial request (e.g., "I want a chic outfit for dinner."), immediately reply with a friendly, targeted follow-up question to elicit the most crucial missing information (usually a combination of **Occasion** and **Style**). +- Be concise. Ask only 1 to 2 essential questions per turn. +- You must gather sufficient, clear intent before proceeding to actual clothing recommendations. + +OUTPUT FORMAT INSTRUCTION: +- **DO NOT** use any Markdown formatting whatsoever (e.g., do not use asterisks (*), bold text (**), lists, or code blocks). +- **ONLY** output the plain text response spoken by the AI Assistant. + +Example Follow-up (mimicking a conversational flow): +User: I want a chic outfit for dinner. +Your Response: Hey there! A chic dinner outfit, I love that! To give you the perfect recommendations, tell me: is this a romantic date, business dinner, or celebration with friends? And what's your go-to style vibe: classic elegance or something with more edge?""" + +SUMMARY_PROMPT = """ +You are an expert fashion request analyzer. Analyze the conversation history provided by the user. +Your task is to: + +1. Identify the most appropriate occasions from the allowed list based on the user's intent. +2. Write a detailed summary string that captures the user's style preferences, specific item requests, disliked items, body concerns, and color preferences. This summary will be used by a stylist to recommend outfits. + +Extract this information accurately from the chat history. +""" + + +from app.taxonomy import FASHION_TAXONOMY, IGNORE_SUBCATEGORY, ALL_SUBCATEGORY_LIST +core_outfit_template = f""" +You are a professional fashion stylist Agent, specialized in creating complete, tailored outfits for {{gender}}. Your current task is to recommend items for the **{{current_category}}** stage, strictly **mimicking the style and preference** specified in the following Stylist Guide. + +Your task is to **create a cohesive and complete outfit**, strictly adhering to **BOTH** the user's explicit **Request Summary** and the **Outfit Style Guide**. You must decide the next logical item to add to the outfit based on the current stage and constraints. Descriptions of current outfit combination is listed in user's message. + +--- +## Request from the User: +{{request_summary}} + +## Core Guidance Document: Outfit Style Guide +{{stylist_guide}} +--- + +## Your Workflow and Constraints + +1. **Style Adherence**: You must strictly observe all rules in the Style Guide concerning **color palette, fit, layering principles, pattern restrictions , shoe coordination**. +2. **Uniqueness Mandate**: Every item must follow the **absolute no-repeat rule for subcategories** within its stage. Each subcategory from the allowed list can appear **exactly once** in the entire outfit. Furthermore, the categories 'dresses' and 'pants' and 'skirts' are mutually exclusive; they NORMALLY cannot be included in the same outfit. +3. **Step Planning**: The styling sequence must follow a logical approach (e.g., top-down, inside-out for clothing). Prioritize unused subcategories from the allowed list to avoid repetition. +4. **Structured Output**: Your output MUST be a valid JSON object. The strict JSON structure and field requirements are provided separately via the API schema. + +You must only output one of two actions: "recommend_item" or "stop". +4.1. **recommend_item**: Use this action to suggest the next single item. + * **subcategory**: Must be strictly no repeats, and drawn from the allowed list. + * **description**: This must be an **extremely detailed and precise** description for the vector search. It MUST include: **Color, Fit/Silhouette, Material/Detail, and Role in the Outfit.** +You must strictly use the **JSON format** for your output, as follows: +```json +{{{{ +"action": "recommend_item", +"subcategory": "YOUR_ITEM_SUBCATEGORY", +"description": "YOUR_DETAILED_DESCRIPTION", +"reason": "YOUR_RECOMMENDATION_REASON" +}}}} + +4.2. **stop**: Use this action when the Termination Condition is met. + * **reason**: This field is mandatory when stopping, and must clearly state why the outfit is complete. +You must strictly use the **JSON format** for your output, as follows: +{{{{ +"action": "stop", +"subcategory": "", +"description": "", +"reason": "CORE_OUTFIT_COMPLETE" +}}}} + +5. **Termination Condition**: Terminate when the below condition is fully met: + 5.1. **CLOTHING Stage**: The core clothing part of the outfit is complete, meaning the combination of items effectively achieves **full body coverage** (e.g., includes both a top/upper garment and a bottom/lower garment, or a single full-body piece like a dress/jumpsuit). Additionally, **all mandatory elements** stipulated in the Style Guide are satisfied. *(Note: Typically, {{max_len}} items are sufficient for this stage.)* + 5.2. **SHOES Stage**: **Exactly one (1) item** has been successfully recommended, as shoes are a **mandatory component** for any complete outfit. + 5.3. **BAGS Stage**: **Exactly one (1) item** has been successfully recommended, **OR** the recommendation is skipped if the Style Guide or the User Request **does not mandate** a bag for the specific occasion (i.e., the bag is considered optional). + +6. **Context Dependency**: The user's next input (if not Start) will contain the **image and description of the selected item**. When recommending the next item: + a) First verify the subcategories of all already selected items to ensure no duplicates; + b) Select an unused subcategory from the allowed list as the priority; + c) Ensure the recommended item coordinates with the already selected items and complies with all rules in the Style Guide. +Now, please start building an outfit (with strictly unique categories for all items) and output the JSON for the first item. +""" + + +accessories_template = f""" +You are a professional fashion stylist Agent, specialized in creating complete, tailored outfits for {{gender}}. Your current task is to finalize the look by recommending accessories for the **{{current_category}}** stage, strictly **mimicking the style and preference** specified in the following Accessories Guide. + +Your final task is to **select the perfect set of accessories** to complete the given outfit. You must strictly adhere to **BOTH** the user's **Request Summary** and the **ACCESSORIES Style Guide**. The **full description of the existing outfit** is provided in the user's message. + +--- +## CONTEXT +[User Request]: {{request_summary}} + +[Accessories Style Guide]: +{{stylist_guide}} + +--- +## ACCESSORIES GENERATION RULES + +1. **Batch Recommendation**: You must output the **COMPLETE LIST of accessories** in a single response using the 'recommended_accessories' list defined in the schema. Do not recommend items one by one. +2. **Quantity Constraint**: The total number of accessories recommended in the list must not exceed **{{max_len}}** items. Typically, 1 to {{max_len}} distinct items are required to complete a look. +3. **Harmony & Guide Compliance**: + - Assess the existing outfit (provided in the user's message) and ensure all accessories complement its style, color palette, and occasion. + - **Strictly follow the [Accessories Style Guide]** regarding material types (e.g., metals like gold/silver), total numbers allowed, and specific layering requirements (e.g., mandated watch or jewelry layering). +4. **Exclusion List**: Subcategories in the following list are strictly excluded from recommendation: ({IGNORE_SUBCATEGORY}). +5. **Description Quality**: The 'description' field for each accessory must be **extremely detailed and precise** for high-accuracy vector search, including: **Color, Material/Detail, and the specific Role in the Outfit.** + +Generate the final, complete accessories list now. +""" + +all_items_template = f""" +You are a professional fashion stylist Agent, specialized in creating complete, tailored outfits for {{gender}}. Your task is to **generate a Complete, Head-to-Toe Outfit** in a **Single Batch**, strictly **mimicking the style and preference** specified in the Stylist Guide. + +You must create a cohesive look that includes **Clothing, Shoes, Bags, and Accessories**. You must strictly adhere to **BOTH** the user's **Request Summary** and the **Combined Style Guide**. + +--- +## Request from the User: +{{request_summary}} + +## Core Guidance Document: Combined Style Guide +{{stylist_guide}} +--- + +## GENERATION WORKFLOW & RULES + +1. **Holistic Styling**: You are NOT recommending items step-by-step. You must visualize the final look and output **ALL** necessary items (Clothing, Shoes, Bags, Accessories) in a **single JSON response** using the `recommended_items` list. + +2. **Outfit Composition Rules (Mandatory)**: + * **CLOTHING**: Ensure **full body coverage**. You must include either [Top + Bottom] OR [One-piece (e.g., Dress/Jumpsuit)]. 'Dresses' and 'Skirts/Pants' are mutually exclusive. + * **SHOES**: **Exactly one (1) pair** of shoes is MANDATORY. + * **BAGS**: Recommend **0 or 1 bag**. Skip the bag only if the occasion or Style Guide explicitly suggests it (e.g., home wear, yoga). + * **ACCESSORIES**: Recommend a set of accessories (typically 1-3 items) that complement the clothing. Follow metal/material constraints in the guide. + Number of items in outfit must not exceed {{max_len}}. + +3. **Uniqueness Mandate**: + * Each **subcategory** belonging to CLOTHING (e.g., 't-shirts', 'sweaters', 'jacket') can appear **EXACTLY ONCE** in the entire list. + * But **subcategory** belonging to ACCESSORIES can repeat. + +4. **Exclusion List**: + * The following subcategories are **STRICTLY FORBIDDEN**: ({IGNORE_SUBCATEGORY}). Do not include them in your recommendation. + +5. **Style Adherence**: + * Ensure all items coordinate in **color, fit, and material**. + * Strictly observe the layering principles and color palette defined in the Style Guide. + +6. **Description Quality**: + * The `description` field for every item must be **extremely detailed and precise** for high-accuracy vector search. + * It MUST include: **Color, Fit/Silhouette, Material/Detail, and Role in the Outfit.** + +## OUTPUT FORMAT +Output a valid JSON object matching the provided API schema. The `recommended_items` array must contain all the items for this outfit. + +Generate the complete outfit list now. +""" + + +def build_iterative_schema(current_category): + schema = { + "type": "object", + "properties": { + "action": {"type": "string", "enum": ["recommend_item", "stop"]}, + "subcategory": { + "type": "string", + "description": "The subcategory this single item. Only present if action is 'recommend_item'", + "enum": FASHION_TAXONOMY[current_category] + }, + "description": { + "type": "string", + "description": "an **extremely detailed and precise** description of the item. This description is used for **high-accuracy vector search** in the database. It should include Color, Fit/Silhouette, Material/Detail, Role in the Outfit." + }, + "reason": {"type": "string", "description": "The reason for the current action. Required if action is 'stop' (to summarize the final outfit)."} + }, + "required": ["action", "subcategory", "description", "reason"] + } + return schema + + +def build_batch_schema(specified_category: str=""): + assert(specified_category in FASHION_TAXONOMY.keys() or specified_category == "") + if not specified_category: + category_range_desc = "the complete final outfit (including all categories)" + subcategory_list = ALL_SUBCATEGORY_LIST + else: + category_range_desc = specified_category + subcategory_list = FASHION_TAXONOMY[specified_category] + schema = { + "type": "object", + "properties": { + "reason": { + "type": "string", + "description": f"The justification for the selection of {category_range_desc}. This summary must explain how the recommended items meet the user's request and style requirements." + }, + "recommended_items": { + "type": "array", + "description": "A list of descriptions of recommended items.", + "items": { + "type": "object", + "properties": { + "description": {"type": "string", "description": f"The detailed description for this {specified_category} item."}, + "subcategory": { + "type": "string", + "description": "The subcategory of the recommended item.", + "enum": subcategory_list + }, + }, + "required": ["subcategory", "description"] + } + } + }, + "required": ["recommended_items", "reason"] + } + return schema diff --git a/app/server/ChatbotAgent/core/stylist_agent_server.py b/app/server/ChatbotAgent/core/stylist_agent_server.py index f546f61..144b321 100644 --- a/app/server/ChatbotAgent/core/stylist_agent_server.py +++ b/app/server/ChatbotAgent/core/stylist_agent_server.py @@ -1,12 +1,10 @@ -import asyncio import io import json import logging import os -import random -import uuid from typing import List, Dict, Any, Optional from copy import deepcopy +import time from google import genai from google.cloud import storage @@ -16,25 +14,28 @@ from app.server.utils.img_operation import merge_images_to_square from app.server.utils.minio_client import minio_client, oss_upload_image from app.server.utils.request_post import post_request from app.config import settings -from app.taxonomy import CATEGORY, ALL_CATEGORY, IGNORE_CATEGORY +from app.server.ChatbotAgent.core.prompt import ( + core_outfit_template, + accessories_template, + all_items_template, + build_iterative_schema, + build_batch_schema +) +from app.taxonomy import FASHION_TAXONOMY, ALL_SUBCATEGORY_LIST logger = logging.getLogger(__name__) -IGNORE_CATEGORY = set(IGNORE_CATEGORY) -CLOTHING_CATEGORY = set(CATEGORY['clothing'] + CATEGORY['shoes'] + CATEGORY['bags']) - IGNORE_CATEGORY -ACCESSORY_CATEGORY = set(CATEGORY['accessories']) - IGNORE_CATEGORY - - class AsyncStylistAgent: - def __init__(self, local_db, max_len: int, gemini_model_name: str, outfit_id=str): + def __init__(self, local_db: str, gemini_model_name: str, outfit_id: str, stylist_name: str, gender: str): # self.outfit_items: List[Dict[str, str]] = [] self.outfit_id = outfit_id + self.stylist_name = stylist_name + self.gender = gender self.gemini_client = genai.Client( vertexai=True, project='aida-461108', location='us-central1' ) self.local_db = local_db - self.max_len = max_len self.gemini_model_name = gemini_model_name self.stop_reason = "" self.headers = { @@ -44,49 +45,6 @@ class AsyncStylistAgent: 'Connection': "keep-alive", 'Content-Type': "application/json" } - self.main_clothing_schema = { - "type": "object", - "properties": { - "action": {"type": "string", "enum": ["recommend_item", "stop"]}, - "category": { - "type": "string", - "description": "The category of the single clothing item being recommended in this step (e.g., 'outerwear', 'bottoms'). Only present if action is 'recommend_item'.", - "enum": CLOTHING_CATEGORY - }, - "description": { - "type": "string", - "description": "an **extremely detailed and precise** description of the item. This description is used for **high-accuracy vector search** in the database. It should include Color, Fit/Silhouette, Material/Detail, Role in the Outfit." - }, - "reason": {"type": "string", "description": "The reason for the current action. Required if action is 'stop' (to summarize the final outfit)."} - }, - "required": ["action"] - } - self.accessory_schema = { - "type": "object", - "properties": { - "reason": { - "type": "string", - "description": "The justification for completing the recommendation and the summary of the final outfit." - }, - "recommended_accessories": { - "type": "array", - "description": "A list of accessories recommended to complete the outfit.", - "items": { - "type": "object", - "properties": { - "category": { - "type": "string", - "description": "The category of the accessory (e.g., jewelry, watches, bags).", - "enum": ACCESSORY_CATEGORY - }, - "description": {"type": "string", "description": "The detailed description for this accessory item."} - }, - "required": ["category", "description"] - } - } - }, - "required": ["recommended_accessories", "reason"] - } # 存储桶配置 try: @@ -115,100 +73,6 @@ class AsyncStylistAgent: except Exception as e: raise Exception(f"Failed to load style guide from {guide_path}, {acc_guide_path}: {e}") - def _build_main_clothing_prompt(self, request_summary: str = "", gender: str = "male", stylist_guide: str = "") -> str: - """Constructs the complete System Prompt.""" - - clothing_gender = "men's clothing" if gender == "male" else "women's clothing" - - # Insert the style_guide content into the template - template = template = f""" - You are a professional fashion stylist Agent, specialized in creating complete, tailored outfits for {clothing_gender}. Only main clothing including 'bags' is needed, excluding accessories like 'jewelry', 'hats', 'belts', etc. - - Your task is to **create a cohesive and complete outfit**, strictly adhering to **BOTH** the user's explicit **Request Summary** and the **Outfit Style Guide**. You must decide the next logical item to add to the outfit based on the currently selected items (if any). - - --- - ## Request from the User: - {request_summary} - - ## Core Guidance Document: Outfit Style Guide - {stylist_guide} - --- - - ## Your Workflow and Constraints - - 1. **Style Adherence**: You must strictly observe all rules in the Style Guide concerning **color palette, fit, layering principles, pattern restrictions , shoe coordination**. - 2. **Category Uniqueness Mandate**: Every outfit must follow the **absolute no-repeat rule for clothing categories** — each category from the allowed list can appear **exactly once** in the entire outfit. This rule is non-negotiable, even if the user explicitly requests repeating a category. Furthermore, the categories 'dresses' and 'pants' and 'skirts' are mutually exclusive; they NORMALLY cannot be included in the same outfit. - 3. **Step Planning**: The styling sequence must follow a **top-down, inside-out** approach: First major garments (tops/outerwear/bottoms/dresses) then shoes. When selecting the next item, prioritize unused categories from the allowed list to avoid repetition. - 4. **Structured Output**: Every response must recommend the **next single item** (from an unused category). You must strictly use the **JSON format** for your output, as follows: - - ```json - {{ - "action": "recommend_item", - "category": "YOUR_ITEM_CATEGORY", - "description": "YOUR_DETAILED_DESCRIPTION" - }} - ``` - - * `action`: Must always be `"recommend_item"` until the outfit is complete. - * `category`: Must be an unused category from the following list: {list(CLOTHING_CATEGORY)} (strictly no repeats, per the Category Uniqueness Mandate). - * `description`: This must be an **extremely detailed and precise** description of the item. This description is used for **high-accuracy vector search** in the database and must include: - * **Color** (e.g., milk tea, pure white, dark gray) - * **Fit/Silhouette** (e.g., Oversize, loose, slim-fit) - * **Material/Detail** (e.g., 100% cotton, linen, gold clasp, thin stripe, checkered pattern) - * **Role in the Outfit** (e.g., serves as the innermost base layer for layering; acts as the crucial tie accent for the smart casual look) - - 5. **Termination Condition**: Terminate when **both** conditions are fully met: - a) The entire outfit is complete and all mandatory elements stipulated in the Style Guide are satisfied; - b) No duplicate categories are present (strict compliance with the Category Uniqueness Mandate). - When terminating, output the following JSON format: - ```json - {{ - "action": "stop", - "reason": "OUTFIT_COMPLETE_AND_MEETS_ALL_MINI_GUIDELINES" - }} - ``` - Normally, {self.max_len} items are totally enough for an outfit. - - 6. **Context Dependency**: The user's next input (if not Start) will contain the **image and description of the selected item**. When recommending the next item: - a) First verify the categories of all already selected items to ensure no duplicates; - b) Select an unused category from the allowed list as the priority; - c) Ensure the recommended item coordinates with the already selected items and complies with all rules in the Style Guide. - Now, please start building an outfit (with strictly unique categories for all items) and output the JSON for the first item. - """ - return template.strip() - - def _build_accessory_prompt(self, request_summary: str, gender: str, accessories_guide: str) -> str: - """ - 构建配饰推荐 (Accessories) 的 System Prompt。 - 特点:强调基于现有穿搭 (Context Aware),批量推荐 (Batch Recommendation),做最后的点缀。 - """ - clothing_gender = "men's clothing" if gender == "male" else "women's clothing" - - template = f""" - You are an expert Accessories Stylist for {clothing_gender}. - Your task is to select the perfect set of accessories to complete an existing outfit. - - --- - ## CONTEXT - [User Request]: {request_summary} - - [Accessories Style Guide]: - {accessories_guide} - - --- - ## STRICT RULES - 1. **Batch Recommendation**: Do NOT recommend items one by one. You must output the **COMPLETE LIST** of accessories (e.g., jewelry, bag, watch, hat) in a single response using the 'recommended_accessories' list. - 2. **Allowed Categories**: Select only from: {list(ACCESSORY_CATEGORY)}. - 3. **Harmony & Constraints**: - - The accessories must complement the [Current Outfit Base]. - - Strictly follow the [Accessories Style Guide] regarding metals (gold/silver), numbers, and prohibited items. - - If the guide mandates a watch or specific jewelry layering, ensure they are included. - 4. **Quantity**: Typically recommend 2-4 distinct accessory items to complete the look. - - Generate the final accessories list now. - """ - return template.strip() - async def _call_gemini(self, user_input: str, user_id: str, file_name: str, output_schema: Dict[str, Any], image_bytes: bytes = None, system_prompt: str = "") -> str: """ 实际调用 Gemini API 的函数,接受文本和用户的id。 @@ -300,7 +164,7 @@ class AsyncStylistAgent: print(f"Raw response: {response_text}") return None - def _get_next_item(self, item_description: str, category: str, occasions: List[str], batch_sources: List[str] = [], gender: str = "female") -> Optional[Dict[str, str]]: + def _get_next_item(self, item_description: str, category: str, subcategory: str, occasions: List[str], batch_sources: List[str] = [], gender: str = "female") -> Optional[Dict[str, str]]: """ 1. 根据描述生成嵌入。 2. 查询本地数据库以找到最佳匹配项。 @@ -330,165 +194,311 @@ class AsyncStylistAgent: # 3. 模拟 Agent 审核(实际应用中,你需要将图片发回给 Agent进行审核) best_meta = results[0] # 第一个 batch 的第一个 metadata item_id = best_meta['item_id'].replace("_img", "") + batch_source = best_meta['batch_source'] return { "item_id": item_id, # 从 metadata 字典中安全获取 "category": best_meta['category'], - "gpt_description": item_description, 'description': best_meta['description'], + "subcategory": best_meta['subcategory'], + "gpt_description": item_description, + "gpt_subcategory": subcategory, # 假设 'item_path' 存储在 metadata 中,或从 'item_id' 推导 # 这里假设 item_id 就是文件名的一部分 - "image_path": os.path.join(f"{item_id}.jpg") + "image_path": os.path.join(settings.DATA_ROOT, batch_source, 'image_data', f"{item_id}.jpg") } + + def _build_system_prompt(self, template: str, request_summary: str = "", stylist_guide: str = "", current_category: str = "clothing", max_len: int=4) -> str: + # Insert the style_guide content into the template + sys_template = template.format( + gender=self.gender, + current_category=current_category.upper(), + request_summary=request_summary, + stylist_guide=stylist_guide, + max_len=max_len + ) + return sys_template.strip() - def _build_user_input(self, recommend_acc=False) -> str: + def _build_user_input(self, current_category: str, existing_subcategories: str) -> str: """构建发送给 Gemini 的用户输入,包含已选单品信息。""" if not self.outfit_items: - return "Start" - - # 将已选单品的信息作为上下文发回给 Agent - context = "Selected fashion items:\n" - for ii, item in enumerate(self.outfit_items): - context += f"{ii + 1}. Category: {item['category']}. Description: {item['description']}\n" - if not recommend_acc: - context += "\nPlease recommend the next single item based on the selected items, user's request, and style guide." + context = "" else: - context += "\nPlease recommend a complete list of accessories to complement the selected outfit based on the user's request and accessories style guide." + context = "Selected fashion items:\n" + + # 将已选单品的信息作为上下文发回给 Agent + for ii, item in enumerate(self.outfit_items): + context += f"{ii + 1}. Category: {item['category']}. Subcategory: {item['subcategory']}. Description: {item['description']}\n" + + if current_category == 'clothing': + context += f"\nRecommend the next single item based on the selected items, user's request, and style guide. 【CRITICAL CONSTRAINT】You MUST strictly **maintain uniqueness**; do not recommend any clothing whose **Subcategory** is already present in this exclusion list: {existing_subcategories}." + elif current_category in ['shoes', 'bags']: + context += f"\nRecommend the next {current_category} based on the selected items, user's request, and style guide." + elif current_category == 'accessories': + context += f"\nRecommend a complete list of accessories to complement the selected outfit based on the user's request and accessories style guide. 【CRITICAL CONSTRAINT】You MUST strictly **maintain uniqueness**; do not recommend any accessories whose **Subcategory** is already present in this exclusion list: {existing_subcategories}." + elif current_category == 'all': + context += "\nRecommend a **complete, full outfit**, including all items (clothing, shoes, bags, and accessories), strictly following the Request Summary and Style Guide. Output the **complete list** of items in a single JSON response." return context - def post_operation(self, response_data: Dict[str, Any], status: str, message: str, callback_url: str): + def post_operation(self, status: str, message: str, callback_url: str, img_path: str): """处理完成后的回调操作。""" if settings.LOCAL == 0: - response_data['items'] = deepcopy(self.outfit_items) - response_data['status'] = status - response_data['message'] = message + response_data = { + 'items': deepcopy(self.outfit_items), + 'status': status, + 'message': message, + 'path': img_path, + 'outfit_id': self.outfit_id + } response = post_request(url=callback_url, data=json.dumps(response_data), headers=self.headers) logger.info(f"request data :{response_data} | JAVA callback info -> status:{response.status_code} | message:{response.text}") + return response_data + else: + return {} - async def run_styling_process(self, request_summary, occasions, stylist_name, start_outfit=[], batch_sources=[], user_id="test", callback_url="", gender: str = "male"): - self.outfit_items = start_outfit - """主流程控制循环。""" - print(f"--- Starting Agent (Outfit ID: {self.outfit_id}) ---") - - stylist_guide, accessories_guide = self._load_style_guide(stylist_name) - system_prompt = self._build_main_clothing_prompt(request_summary, gender, stylist_guide) - - response_data = { - "status": "", - "message": "", - "path": "", - "outfit_id": self.outfit_id, - "items": [] - } - logger.info(response_data) - url = f'{callback_url}/api/style/callback' - file_name = self.outfit_id - + async def _execute_iterative_recommendation( + self, + current_category: str, + system_prompt: str, + schema: Dict, + max_len: int, + occasions: List[str], + batch_sources: List[str], + user_id: str, + url: str + ): recommend_timestep = 0 gemini_data = {'action': 'start'} - while recommend_timestep < self.max_len and gemini_data.get('action') != 'stop': + existing_subcategories = [] + while recommend_timestep < max_len and gemini_data.get('action') != 'stop': recommend_timestep += 1 # 1. 准备用户输入(上下文) - user_input = self._build_user_input() + user_input = self._build_user_input(current_category, ", ".join(existing_subcategories)) # 2. 把图片组装起来供api调用 - response_data['path'], image_bytes = await self._merge_images(file_name, user_id, stylist_name) + merged_image_path, image_bytes = await self._merge_images(self.outfit_id, user_id, self.stylist_name) # 3. 调用 Gemini Agent - gemini_response_text = await self._call_gemini(user_input, user_id, file_name, self.main_clothing_schema, image_bytes, system_prompt) + gemini_response_text = await self._call_gemini( + user_input, + user_id, + self.outfit_id, + schema, + image_bytes, + system_prompt + ) gemini_data = self._parse_gemini_response(gemini_response_text) if not gemini_data: print("Agent 返回无效响应,终止流程。") self.post_operation( - response_data, status="failed", message="Agent returned invalid response, terminating process.", - callback_url=url + callback_url=url, + img_path=merged_image_path, ) break # 处理推荐单品 if gemini_data.get('action') == 'recommend_item': - category = gemini_data.get('category') + subcategory = gemini_data.get('subcategory') description = gemini_data.get('description') # 4a. 检查类别是否有效 (重要步骤) - if category not in CLOTHING_CATEGORY: + if subcategory not in FASHION_TAXONOMY[current_category]: self.post_operation( - response_data, status="continue", - message=f"Invalid category recommended by Agent: {category}. Requesting Agent to re-output.", - callback_url=url + message=f"Invalid subcategory recommended by Agent: {subcategory}. Requesting Agent to re-output.", + callback_url=url, + img_path=merged_image_path, ) continue # 4b. 在本地 DB 中查询单品 - new_item = self._get_next_item(description, category, occasions, batch_sources, gender) - if not new_item or new_item['item_id'] in [x['item_id'] for x in self.outfit_items]: + new_item = self._get_next_item(description, current_category, subcategory, occasions, batch_sources, self.gender) + if not new_item: self.post_operation( - response_data, status="continue", - message=f"No matching item is found or item duplicated. Ask Gemini to re-output.", - callback_url=url + message=f"No matching item is found. Ask Gemini to re-output.", + callback_url=url, + img_path=merged_image_path, + ) + continue + elif new_item['subcategory'] in [x['subcategory'] for x in self.outfit_items]: + self.post_operation( + status="continue", + message=f"{new_item['item_id']}'s subcategory {new_item['subcategory']} duplicated. Ask Gemini to re-output.", + callback_url=url, + img_path=merged_image_path, + ) + continue + elif new_item['item_id'] in [x['item_id'] for x in self.outfit_items]: + self.post_operation( + status="continue", + message=f"Item {new_item['item_id']} duplicated. Ask Gemini to re-output.", + callback_url=url, + img_path=merged_image_path, ) continue else: self.outfit_items.append(new_item) + existing_subcategories.append(new_item["subcategory"]) self.post_operation( - response_data, status="ok", message=f"Add new item {new_item['item_id']} in category {new_item['category']} successfully.", - callback_url=url + callback_url=url, + img_path=merged_image_path, ) - print(f"Step {recommend_timestep}: {gemini_data}, found item: {new_item}") + print(f"Stage {current_category.upper()}, Step {recommend_timestep}: {gemini_data}, found item: {new_item['item_id']}") - # When action is stop or timestep limit reached - logger.info(f"Main clothing stylist process finished: {gemini_data.get('reason')}") - # 根据stylist要求随机增加配饰 3-4个配饰 - response_data['path'], image_bytes = await self._merge_images(file_name, user_id, stylist_name) - accessory_system_prompt = self._build_accessory_prompt(request_summary, gender, accessories_guide) - user_input = self._build_user_input(recommend_acc=True) - gemini_response_text = await self._call_gemini(user_input, user_id, file_name, self.accessory_schema, image_bytes, accessory_system_prompt) + + async def _execute_batch_recommendation( + self, + current_category: str, # this can be any category or all + system_prompt: str, + schema: Dict, + occasions: List[str], + batch_sources: List[str], + user_id: str, + url: str + ): + user_input = self._build_user_input(current_category, existing_subcategories=", ".join([x['subcategory'] for x in self.outfit_items])) + merged_image_path, image_bytes = await self._merge_images(self.outfit_id, user_id, self.stylist_name) + + gemini_response_text = await self._call_gemini( + user_input, + user_id, + self.outfit_id, + schema, + image_bytes, + system_prompt + ) gemini_data = self._parse_gemini_response(gemini_response_text) - - recommended_accessories = gemini_data.get('recommended_accessories', []) + recommended_items = gemini_data.get('recommended_items', []) reason = gemini_data.get('reason', '') - if not recommended_accessories or not isinstance(recommended_accessories, List): - print("No accessory data from Gemini, terminating process.") + if not recommended_items or not isinstance(recommended_items, List): + print("No recommended item from Gemini, terminating process.") self.post_operation( - response_data, status="failed", message="Agent returned invalid response, terminating process.", - callback_url=url + callback_url=url, + img_path=merged_image_path ) else: - for idx, rec_accessory in enumerate(recommended_accessories): - category = rec_accessory.get('category') - description = rec_accessory.get('description') + for idx, rec_item in enumerate(recommended_items): + subcategory = rec_item.get('subcategory') + description = rec_item.get('description') # 4a. 检查类别是否有效 (重要步骤) - if category not in ACCESSORY_CATEGORY: + if subcategory not in ALL_SUBCATEGORY_LIST: continue # 4b. 在本地 DB 中查询单品 - new_item = self._get_next_item(description, category, occasions, batch_sources, gender) + # we need first determine the category if current category is 'all' + if current_category == "all": + for category, subcategories_list in FASHION_TAXONOMY.items(): + # 将子类别列表转换为集合 (set) 可以提高查找效率, + # 特别是当列表很长时。 + if subcategory in subcategories_list: + break + + new_item = self._get_next_item(description, category, subcategory, occasions, batch_sources, self.gender) if not new_item or new_item['item_id'] in [x['item_id'] for x in self.outfit_items]: continue else: self.outfit_items.append(new_item) - print(f"Accessory {idx + 1}: {rec_accessory}, found item: {new_item}") + print(f"Item {idx + 1}: ({subcategory}) {rec_item}, found item: {new_item}") + return reason + - response_data['path'] = await self._merge_images(file_name, user_id, stylist_name) - self.post_operation( - response_data, + async def run_iterative_styling(self, request_summary, occasions, start_outfit=[], batch_sources=[], user_id="test", callback_url=""): + start_time = time.monotonic() + STAGES = ['clothing', 'shoes', 'bags'] + self.outfit_items = start_outfit + stylist_guide, accessories_guide = self._load_style_guide(self.stylist_name) + url = f'{callback_url}/api/style/callback' + + """主流程控制循环。""" + print(f"--- Starting Agent (Outfit ID: {self.outfit_id}) ---") + for current_category in STAGES: + max_len = 4 if current_category == 'clothing' else 1 + system_prompt = self._build_system_prompt(core_outfit_template, request_summary, stylist_guide, current_category, max_len) + + await self._execute_iterative_recommendation( + current_category, + system_prompt, + build_iterative_schema(current_category), + max_len, + occasions, + batch_sources, + user_id, + url + ) + + # 根据stylist要求增加配饰 3-4个配饰 + MAX_LEN_ACC = 3 + acc_system_prompt = self._build_system_prompt(accessories_template, request_summary, accessories_guide, 'accessories', MAX_LEN_ACC) + reason = await self._execute_batch_recommendation( + current_category, # can be 'accessories' or 'all' + acc_system_prompt, + build_batch_schema(current_category), + occasions, + batch_sources, + user_id, + url + ) + + final_image_path = await self._merge_images(self.outfit_id, user_id, self.stylist_name) + response_data = self.post_operation( status="stop", message=reason, - callback_url=url + callback_url=url, + img_path=final_image_path ) - with open(os.path.join(settings.OUTFIT_OUTPUT_DIR, stylist_name, f'{file_name}.json'), 'w') as f: - json.dump(self.outfit_items, f, indent=2) + if settings.LOCAL == 1: + with open(os.path.join(settings.OUTFIT_OUTPUT_DIR, self.stylist_name, f'{self.outfit_id}.json'), 'w') as f: + json.dump({"request_summary": request_summary,"occasions": occasions, "items": self.outfit_items}, f, indent=2) + + end_time = time.monotonic() + total_duration = end_time - start_time - return response_data + return response_data, total_duration + + async def run_quick_batch_styling(self, request_summary, occasions, start_outfit=[], batch_sources=[], user_id="test", callback_url=""): + start_time = time.monotonic() + + self.outfit_items = start_outfit + stylist_guide, accessories_guide = self._load_style_guide(self.stylist_name) + url = f'{callback_url}/api/style/callback' + + print(f"--- Starting Agent (Outfit ID: {self.outfit_id}) ---") + + MAX_LEN = 9 + system_prompt = self._build_system_prompt(all_items_template, request_summary, stylist_guide + accessories_guide, "", MAX_LEN) + reason = await self._execute_batch_recommendation( + 'all', # can be 'accessories' or 'all' + system_prompt, + build_batch_schema(), + occasions, + batch_sources, + user_id, + url + ) + + final_image_path = await self._merge_images(self.outfit_id, user_id, self.stylist_name) + response_data = self.post_operation( + status="stop", + message=reason, + callback_url=url, + img_path=final_image_path + ) + if settings.LOCAL == 1: + with open(os.path.join(settings.OUTFIT_OUTPUT_DIR, self.stylist_name, f'{self.outfit_id}.json'), 'w') as f: + json.dump({"request_summary": request_summary,"occasions": occasions, "items": self.outfit_items}, f, indent=2) + + end_time = time.monotonic() + total_duration = end_time - start_time + + return response_data, total_duration def _upload_to_gcs(self, bucket_name: str, blob_name: str, mime_type, image_bytes) -> str: """同步方法:将文件上传到 GCS 并返回 GCS URI。""" diff --git a/app/server/ChatbotAgent/core/system_prompt.py b/app/server/ChatbotAgent/core/system_prompt.py deleted file mode 100644 index 7f742d9..0000000 --- a/app/server/ChatbotAgent/core/system_prompt.py +++ /dev/null @@ -1,32 +0,0 @@ -BASIC_PROMPT = """You are a professional, friendly, and insightful AI {gender}'s styling assistant. - -Your primary mission is to engage in a multi-turn conversation with the user to fully understand their dressing intent. You must adopt a professional yet approachable tone. - -CONVERSATION GOALS: -1. **Occasion:** Determine the specific event (e.g., romantic dinner, summer wedding, business meeting). -2. **Style:** Pinpoint the desired aesthetic (e.g., classic elegance, edgy, minimalist, bohemian). -3. **Vibe/Details:** Gather any mood or specific constraints (e.g., needs to be comfortable, requires light colors, no bare shoulders). -4. **Item Preference:** Ask the user if they have any specific preferences for an item type or silhouette (e.g., preference for a dress, skirt, tailored pants, or a particular neckline/length). - -GUIDANCE FOR RESPONSE GENERATION: -- After the user's initial request (e.g., "I want a chic outfit for dinner."), immediately reply with a friendly, targeted follow-up question to elicit the most crucial missing information (usually a combination of **Occasion** and **Style**). -- Be concise. Ask only 1 to 2 essential questions per turn. -- You must gather sufficient, clear intent before proceeding to actual clothing recommendations. - -OUTPUT FORMAT INSTRUCTION: -- **DO NOT** use any Markdown formatting whatsoever (e.g., do not use asterisks (*), bold text (**), lists, or code blocks). -- **ONLY** output the plain text response spoken by the AI Assistant. - -Example Follow-up (mimicking a conversational flow): -User: I want a chic outfit for dinner. -Your Response: Hey there! A chic dinner outfit, I love that! To give you the perfect recommendations, tell me: is this a romantic date, business dinner, or celebration with friends? And what's your go-to style vibe: classic elegance or something with more edge?""" - -SUMMARY_PROMPT = """ -You are an expert fashion request analyzer. Analyze the conversation history provided by the user. -Your task is to: - -1. Identify the most appropriate occasions from the allowed list based on the user's intent. -2. Write a detailed summary string that captures the user's style preferences, specific item requests, disliked items, body concerns, and color preferences. This summary will be used by a stylist to recommend outfits. - -Extract this information accurately from the chat history. -""" \ No newline at end of file diff --git a/app/server/ChatbotAgent/core/vector_database.py b/app/server/ChatbotAgent/core/vector_database.py index 998b697..c86ef4e 100644 --- a/app/server/ChatbotAgent/core/vector_database.py +++ b/app/server/ChatbotAgent/core/vector_database.py @@ -1,6 +1,3 @@ -import random -import time - import numpy as np import torch import chromadb @@ -8,7 +5,7 @@ from PIL import Image from typing import List, Dict, Any from transformers import CLIPProcessor, CLIPModel -from app.taxonomy import OCCASION, ALL_CATEGORY +from app.taxonomy import OCCASION, CATEGORY_LIST, IGNORE_SUBCATEGORY class VectorDatabase(): @@ -50,7 +47,7 @@ class VectorDatabase(): return features.cpu().numpy().flatten().tolist() def get_matched_item(self, embedding: List[float], category: str, occasions: List[str] = [], batch_sources: List[str] = [], gender: str = 'female', n_results: int = 1) -> List[Dict[str, Any]]: - if category not in ALL_CATEGORY: + if category not in CATEGORY_LIST: raise ValueError(f"Recommended {category} is not valid.") and_conditions = [ @@ -59,15 +56,19 @@ class VectorDatabase(): {"$or": [ {"gender": gender}, {"gender": "unisex"}, - ]} + ]}, + {"subcategory": {"$nin": IGNORE_SUBCATEGORY}} ] if batch_sources and len(batch_sources) > 0: - source_conditions = [] - for source in batch_sources: - source_conditions.append({"batch_source": source}) + if len(batch_sources) == 1: + and_conditions.append({"batch_source": batch_sources[0]}) + else: + source_conditions = [] + for source in batch_sources: + source_conditions.append({"batch_source": source}) - # 将 Batch Source 的 OR 子句添加到主 AND 条件中 - and_conditions.append({"$or": source_conditions}) + # 将 Batch Source 的 OR 子句添加到主 AND 条件中 + and_conditions.append({"$or": source_conditions}) results = self.collection.query( query_embeddings=[embedding], diff --git a/app/server/utils/img_operation.py b/app/server/utils/img_operation.py index 68a6f21..1d9b60c 100644 --- a/app/server/utils/img_operation.py +++ b/app/server/utils/img_operation.py @@ -77,8 +77,7 @@ def merge_images_to_square(outfit_items: List[Dict[str, str]], max_len=9, add_te # We use Image.open() and convert to 'RGB' to handle potential transparency (RGBA) # and ensure compatibility with the final 'RGB' canvas and JPG output. if settings.LOCAL == 1: - image_file_path = os.path.join(settings.LOCAL_IMAGE_DIR, path) - img = Image.open(image_file_path).convert('RGB') + img = Image.open(path).convert('RGB') else: img = oss_get_image(oss_client=minio_client, path=f"{MINIO_LC_DATA_PATH}/{path}", data_type="PIL").convert('RGB') # img = Image.open(path).convert('RGB') @@ -185,142 +184,3 @@ def merge_images_to_square(outfit_items: List[Dict[str, str]], max_len=9, add_te # canvas.save(output_path, 'JPEG', quality=90) return canvas - -# def merge_images_to_square(outfit_items: List[Dict[str, str]], max_len=9, add_text=True): -# """ -# Loads up to 4 images from the given paths, resizes them while maintaining -# aspect ratio, and merges them onto a 1024x1024 white background JPG. -# -# The layout depends on the number of images: -# 1: Center the single image on the 1024x1024 canvas. -# 2: Place side-by-side, each scaled to fit a 512x1024 half. -# 3: Place in top-left (512x512), top-right (512x512), and bottom-left (512x512). -# 4: Place in all four 512x512 quadrants. -# -# Args: -# outfit_items: A list of item metadata (max length 9). -# -# Returns: -# The file path of the temporary merged JPG image. -# """ -# -# # Define the final canvas size -# CANVAS_SIZE = 1024 -# -# # 1. Create the final white canvas -# # Using 'RGB' mode for JPG output -# canvas = Image.new('RGB', (CANVAS_SIZE, CANVAS_SIZE), 'white') -# draw = ImageDraw.Draw(canvas) -# font = ImageFont.load_default() -# -# # 2. Define the quadrants/target areas (x, y, w, h) -# # The positions are based on a 512x512 quadrant size -# quadrants = { -# 1: [(0, 0, CANVAS_SIZE, CANVAS_SIZE)], # Single full-size placement -# 2: [(0, 0, 512, CANVAS_SIZE), (512, 0, 512, CANVAS_SIZE)], # Left, Right -# 3: [(0, 0, 512, 512), (512, 0, 512, 512), (0, 512, 512, 512)], # Top-Left, Top-Right, Bottom-Left -# 4: [(0, 0, 512, 512), (512, 0, 512, 512), (0, 512, 512, 512), (512, 512, 512, 512)], # All Four -# 5: ALL_9_CELLS[:5], # 布局前5个单元格 (1-5) -# 6: ALL_9_CELLS[:6], # 布局前6个单元格 (1-6) -# 7: ALL_9_CELLS[:7], # 布局前7个单元格 (1-7) -# 8: ALL_9_CELLS[:8], # 布局前8个单元格 (1-8) -# 9: ALL_9_CELLS[:9] # 布局全部9个单元格 (1-9) -# } -# -# # 3. Load and Filter Images -# valid_images = [] -# image_paths = [item['image_path'] for item in outfit_items] -# for path in image_paths: -# try: -# # We use Image.open() and convert to 'RGB' to handle potential transparency (RGBA) -# # and ensure compatibility with the final 'RGB' canvas and JPG output. -# img = oss_get_image(oss_client=minio_client, path=f"{MINIO_LC_DATA_PATH}/{path}", data_type="PIL").convert('RGB') -# # img = Image.open(path).convert('RGB') -# valid_images.append(img) -# except Exception as e: -# logger.error(f"Error loading image {path}. Skipping: {e}") -# -# num_images = len(valid_images) -# -# if num_images == 0: -# raise ValueError("No valid images were loaded.") -# -# if num_images > max_len: -# raise ValueError(f"Valid item number {num_images} exceed max limit {max_len}") -# -# # Get the correct list of target areas based on the number of valid images -# target_areas = quadrants.get(num_images, []) -# -# # 4. Resize and Paste -# for i, (img, item) in enumerate(zip(valid_images, outfit_items)): -# item_id = item['item_id'] -# category = item['category'] -# if i >= len(target_areas): -# # This should not happen if num_images <= 4 -# break -# -# # Target area dimensions (x_start, y_start, width, height) -# x_start, y_start, target_w, target_h = target_areas[i] -# -# # Calculate new size while maintaining aspect ratio -# original_w, original_h = img.size -# -# # Calculate the ratio needed to fit within the target area -# ratio_w = target_w / original_w -# ratio_h = target_h / original_h -# -# # Use the *smaller* of the two ratios to ensure the image fits entirely -# resize_ratio = min(ratio_w, ratio_h) -# -# # Calculate the new dimensions -# new_w = int(original_w * resize_ratio) -# new_h = int(original_h * resize_ratio) -# -# # Resize the image. Image.Resampling.LANCZOS provides high-quality scaling. -# # Pillow documentation recommends ANTIALIAS or BICUBIC for downscaling, -# # but LANCZOS is a good general high-quality filter. -# # Note: In Pillow versions > 9.0.0, Image.LANCZOS is now Image.Resampling.LANCZOS -# resized_img = img.resize((new_w, new_h), Image.Resampling.LANCZOS) -# -# # Calculate the paste position to center the resized image within its target area -# # Center X: (Target Width - New Width) / 2 + X Start -# paste_x = (target_w - new_w) // 2 + x_start -# # Center Y: (Target Height - New Height) / 2 + Y Start -# # paste_y = (target_h - new_h) // 2 + y_start -# -# TEXT_RESERVE_HEIGHT = 30 -# paste_y = (target_h - new_h - TEXT_RESERVE_HEIGHT) // 2 + y_start -# paste_y = max(paste_y, y_start) -# -# # Paste the resized image onto the canvas -# canvas.paste(resized_img, (paste_x, paste_y)) -# -# full_text = f"ID: {item_id}, Category: {category}" -# try: -# # 推荐使用:计算文本的实际尺寸 (width, height) -# bbox = draw.textbbox((0, 0), full_text, font=font) -# text_w = bbox[2] - bbox[0] -# text_h = bbox[3] - bbox[1] -# except AttributeError: -# # 兼容旧版本 Pillow -# text_w, text_h = draw.textsize(full_text, font=font) -# -# # 计算 X 轴起始位置:使其在目标区域 (target_w) 中居中 -# text_x_center = x_start + target_w // 2 -# text_x_start = text_x_center - text_w // 2 -# -# # 计算 Y 轴起始位置:将其放在目标区域的底部 -# # (目标区域的起始Y + 目标区域的高度 - 文本行的高度) -# text_y_start = y_start + target_h - text_h - 5 # 减去 5 像素作为边距 -# -# # 3. 绘制合并后的文本 -# if add_text: -# draw.text((text_x_start, text_y_start), -# full_text, -# fill='black', -# font=font) -# -# # Save as a high-quality JPG (quality=90 is a good balance) -# # canvas.save(output_path, 'JPEG', quality=90) -# -# return canvas diff --git a/app/taxonomy.py b/app/taxonomy.py index f73b1e5..4236eca 100644 --- a/app/taxonomy.py +++ b/app/taxonomy.py @@ -7,26 +7,46 @@ OCCASION = [ "Garden Party / Daytime Event" ] -CATEGORY = { +FASHION_TAXONOMY = { 'clothing': [ - 'coats', - 'jackets', - 'blazers', - 'puffer', - 'cardigan', - 'sweater', - 'shirts', - 't-shirts', - 'pullover', - 'polos', - 'bodysuits', - 'dresses', - 'skirts', - 'jeans', - 'shorts', - 'leggings', - 'jumpsuits', - 'swimwear', + # --- Tops --- + 't-shirts', # T恤 + 'shirts', # 衬衫 (泛指梭织) + 'blouses', # 女式衬衫 + 'polo shirts', # Polo衫 + 'tank tops', # 背心/坎肩 + 'camisoles', # 吊带背心 + # --- Knits/Sweaters --- + 'sweaters', # 毛衣 (泛指) + 'cardigans', # 开衫 + 'pullovers', # 套头衫 + 'hoodies', # 连帽衫 + 'sweatshirts', # 圆领卫衣 + 'vests', # 马甲/背心 (外穿) + # --- Outerwear --- + 'coats', # 大衣 (长款) + 'jackets', # 夹克 (短款) + 'blazers', # 西装外套 + # --- Bottoms --- + 'jeans', # 牛仔裤 (虽是材质,但在时尚界视为独立大类) + 'trousers', # 西裤/正装长裤 + 'pants', # 长裤 (泛指休闲) + 'joggers', # 束脚裤 + 'leggings', # 打底裤/紧身裤 + 'shorts', # 短裤 + 'skirts', # 半身裙 + 'skorts', # 裙裤 + # --- One-Piece --- + 'dresses', # 连衣裙 + 'jumpsuits', # 连体长裤 + 'bodysuits', # 连体紧身衣 + 'suits', # 套装 (西装套) + # --- Intimates/Swim --- + 'bras', # 文胸 + 'underwear', # 内衣 + 'lingerie', # 性感内衣 + 'pajamas', # 睡衣套装 + 'swimwear', # 泳装 ], 'shoes': [ 'sneakers', @@ -38,22 +58,37 @@ CATEGORY = { 'boots', ], 'bags': [ - 'bags' + 'shoulder bags', + 'crossbody', + 'bucket bags', + 'tote bags', + 'clutch bags', + 'backpacks', + 'travel bags', + 'luggage', ], 'accessories': [ + # --- Jewelry & Watches --- 'necklaces', + 'earrings', 'bracelets', - 'jewellery', - 'eyewear', - 'scarves', + 'rings', + 'cufflinks', + 'watches', + # --- Head/Face --- 'hats', - 'gloves', + 'eyewear', + # --- Body/Textile --- 'belts', - 'socks', - 'watches' + 'scarves', + 'gloves', 'ties', + 'bow ties', + 'pocket squares', + 'socks', ] } -ALL_CATEGORY = sum(CATEGORY.values(), []) +CATEGORY_LIST = list(FASHION_TAXONOMY.keys()) +ALL_SUBCATEGORY_LIST = sum(FASHION_TAXONOMY.values(), []) -IGNORE_CATEGORY = ['socks'] \ No newline at end of file +IGNORE_SUBCATEGORY = ['socks'] \ No newline at end of file diff --git a/data_ingestion/README.md b/data_ingestion/README.md index 5f68ccf..97ba534 100644 --- a/data_ingestion/README.md +++ b/data_ingestion/README.md @@ -40,7 +40,7 @@ ## Example in `metadata_extraction.json` ```json "EOJ367": { - "category": "shoes", + "subcategory": "necklaces", "gender": "female", "applicable_occasions": [ "Casual", @@ -60,33 +60,34 @@ ## Metadata in Vector Database ```json { - 'item_id': 'EOJ128', - 'category': 'sunglasses', - 'gender': 'unisex', - 'modality': 'image', - 'brand': 'CELINE', - 'color': 'BROWN', - 'description': "Immerse yourself in the depth of classic style with CELINE\'s Tortoiseshell Logo Sunglasses. Featuring a rich, tortoiseshell acetate frame and adorned with the iconic CELINE logo in gold, these sunglasses are a testament to timeless elegance and luxury. Perfect for those who appreciate a sophisticated aesthetic, they offer optimal UV protection while ensuring you remain at the forefront of fashion.", - 'tags': 'celine,accessories,in-stock,new,maxi,triomphe,acetate,round', - 'price': 4500, - 'url': 'https://www.lanecrawford.com.hk/product/celine/maxi-triomphe-acetate-round-sunglasses/_/EOJ128/product.lc?utm_medium=embed&utm_source=ai-recommended&utm_campaign=2025-christmas_lc_ai-recommended', - 'batch_source': '2025_q4', - 'Outdoor': 0, - 'Ski / Snow / Mountain': 0, - 'Festival / Concert': 0, - 'Activewear': 0, - 'Casual': 1, - 'Cocktail / Semi-Formal': -1, - 'Formal': -1, - 'Party / Clubbing': 0, - 'Evening': 0, - 'Travel / Transit': 0, - 'Beach / Swim': 0, - 'Garden Party / Daytime Event': 1, - 'Black Tie / White Tie': -1, - 'Resort': 1, - 'Athleisure': 0, - 'Business / workwear': -1, - 'Bridal / Wedding': -1, + "item_id": "EOJ128", + "category": "accessories", + "subcategory": "eyewear", + "gender": "unisex", + "modality": "image", + "brand": "CELINE", + "color": "BROWN", + "description": "Immerse yourself in the depth of classic style with CELINE's Tortoiseshell Logo Sunglasses. Featuring a rich, tortoiseshell acetate frame and adorned with the iconic CELINE logo in gold, these sunglasses are a testament to timeless elegance and luxury. Perfect for those who appreciate a sophisticated aesthetic, they offer optimal UV protection while ensuring you remain at the forefront of fashion.", + "tags": "celine,accessories,in-stock,new,maxi,triomphe,acetate,round", + "price": 4500, + "url": "https://www.lanecrawford.com.hk/product/celine/maxi-triomphe-acetate-round-sunglasses/_/EOJ128/product.lc?utm_medium=embed&utm_source=ai-recommended&utm_campaign=2025-christmas_lc_ai-recommended", + "batch_source": "2025_q4", + "Outdoor": 0, + "Ski / Snow / Mountain": 0, + "Festival / Concert": 0, + "Activewear": 0, + "Casual": 1, + "Cocktail / Semi-Formal": -1, + "Formal": -1, + "Party / Clubbing": 0, + "Evening": 0, + "Travel / Transit": 0, + "Beach / Swim": 0, + "Garden Party / Daytime Event": 1, + "Black Tie / White Tie": -1, + "Resort": 1, + "Athleisure": 0, + "Business / workwear": -1, + "Bridal / Wedding": -1, } ``` \ No newline at end of file diff --git a/data_ingestion/process_item.py b/data_ingestion/process_item.py index 4a26c61..22444a6 100644 --- a/data_ingestion/process_item.py +++ b/data_ingestion/process_item.py @@ -5,7 +5,7 @@ from PIL import Image import json from tqdm import tqdm -from app.taxonomy import OCCASION, CATEGORY, ALL_CATEGORY +from app.taxonomy import OCCASION, FASHION_TAXONOMY, ALL_SUBCATEGORY_LIST # data config @@ -42,7 +42,7 @@ Description: Cut from cardinal-red virgin wool, Armarium's Loren skirt wields ta Tags: armarium, clothing, in-stock, new, loren, wool, blend, tube """ EXAMPLE_1_JSON = json.dumps({ - "category": "skirts", + "subcategory": "skirts", "gender": "female", "applicable_occasions": [ "Business/workwear", "Evening", "Cocktail / Semi-Formal", "Party / Clubbing", "Formal" @@ -61,7 +61,7 @@ Description: Crafted from 18k yellow gold and rhodium-plated sterling silver, th Tags: tateossian, accessories, in-stock, new, mayfair, yellow, gold, rhodium """ EXAMPLE_2_JSON = json.dumps({ - "category": "jewelry", + "subcategory": "jewelry", "gender": "female", "applicable_occasions": [ "Formal", "Black Tie / White Tie", "Bridal / Wedding", "Business/workwear", "Cocktail / Semi-Formal" @@ -94,20 +94,24 @@ def format_product_info(product): return info -def generate_full_prompt(product_info, raw_category): +def raw_category_mapping(raw_category: str) -> str: if raw_category == 'Fine Jewellery And Watches': - category = 'accessories' + return 'accessories' else: - category = raw_category.lower() - subcategory_list = CATEGORY.get(category) + return raw_category.lower() + + +def generate_full_prompt(product_info, raw_category): + category = raw_category_mapping(raw_category) + subcategory_list = FASHION_TAXONOMY.get(category) SYSTEM_PROMPT = f"""You are an expert fashion AI assistant. Your task is to analyze the provided product image and product details to: 1. determine the suitable occasions for wearing or using the item. You must choose occasions ONLY from the following strict list: {json.dumps(OCCASION, indent=4)}. Only relevant suitable or inappropriate occasions should be selected. - 2. categorize it into suitable category in strict list: {json.dumps(subcategory_list)}. + 2. categorize it into suitable subcategory in strict list: {json.dumps(subcategory_list)}. 3. categorize it into appropriate gender in ["female", "male", "unisex"] Output Format: - Return ONLY a valid JSON object with four keys: "category", "gender", "applicable_occasions" and "inappropriate_occasions". Do not include any analysis or extra text outside of the final JSON object. + Return ONLY a valid JSON object with four keys: "subcategory", "gender", "applicable_occasions" and "inappropriate_occasions". Do not include any analysis or extra text outside of the final JSON object. """ # 组合对话序列 @@ -140,37 +144,36 @@ product_list = [ ] -def validate_results(): - if os.path.exists(OUTPUT_FILE): - with open(OUTPUT_FILE, 'r') as f: - final_results = json.load(f) - else: - final_results = {} +def validate_result(result_dict): + subcategory = result_dict.get("subcategory") + gender = result_dict.get("gender") - unfinished_ids = [] - for product in product_list: - item_id = product.get('id') - if item_id not in final_results.keys(): - unfinished_ids.append(product) - else: - processed_item = final_results[item_id] - category = processed_item.get("category") - gender = processed_item.get("gender") + if not subcategory or not gender: + return False + + if subcategory not in ALL_SUBCATEGORY_LIST: + return False - if category not in ALL_CATEGORY: - unfinished_ids.append(product) + if gender not in ['female', 'male', 'unisex']: + return False + + return True + + +if os.path.exists(OUTPUT_FILE): + with open(OUTPUT_FILE, 'r') as f: + final_results = json.load(f) +else: + final_results = {} - if gender not in ['female', 'male', 'unisex']: - unfinished_ids.append(product) - return unfinished_ids, final_results attemps = 0 while attemps < 3: + unfinished_products = [product for product in product_list if product.get('id') not in final_results.keys()] attemps += 1 - unfinished_products, final_results = validate_results() - completion_ratio = len(unfinished_products) / len(product_list) - if (completion_ratio > 0.95): - print("valid results surpass 95%. Finish Now.") + completion_ratio = len(final_results) / len(product_list) + if (completion_ratio > 0.85): + print("valid results surpass 85%. Finish Now.") break else: print(f"Start {attemps} categorization process. Current ratio: {completion_ratio * 100}%") @@ -252,11 +255,11 @@ while attemps < 3: json_str = generated_text[start_idx:end_idx] result_dict = json.loads(json_str) - final_results[product_id] = result_dict + if validate_result(result_dict): + final_results[product_id] = result_dict except Exception as e: print(f"ID {product_id}: FAILED to parse JSON. Raw Output: {generated_text.strip()}") - final_results[product_id] = {"error": str(e), "raw_output": generated_text.strip()} # 显存清理(可选,但在长任务中推荐) del inputs, outputs diff --git a/data_ingestion/run_ingestion.py b/data_ingestion/run_ingestion.py index 76d97e3..771d2d0 100644 --- a/data_ingestion/run_ingestion.py +++ b/data_ingestion/run_ingestion.py @@ -1,6 +1,3 @@ - - - import chromadb import os import json @@ -11,7 +8,7 @@ from tqdm import tqdm from PIL import Image from transformers import CLIPProcessor, CLIPModel -from app.taxonomy import CATEGORY, ALL_CATEGORY, OCCASION +from app.taxonomy import ALL_SUBCATEGORY_LIST, OCCASION BATCH_SOURCE = '2025_q4' @@ -20,6 +17,7 @@ IMAGE_DIR = f'./data/{BATCH_SOURCE}/image_data' RAW_DATA_PATH = f'{DATA_DIR}/products-all.json' CATEGORIZED_METADATA_PATH = f'{DATA_DIR}/metadata_extraction.json' +ADD_TEXT_EMBEDDING = False ## Load data with open(RAW_DATA_PATH, 'r', encoding='utf-8') as file: @@ -36,11 +34,11 @@ collection = client.get_or_create_collection( ) # if you wish to delete some item, uncomment following -# results = collection.delete( -# where={ -# "batch_source": BATCH_SOURCE -# } -# ) +results = collection.delete( + where={ + "batch_source": BATCH_SOURCE + } +) # Load model processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32") @@ -63,9 +61,13 @@ def format_product_info(product): ) return info +def raw_category_mapping(raw_category: str) -> str: + if raw_category == 'Fine Jewellery And Watches': + return 'accessories' + else: + return raw_category.lower() # Combine all data together -new_category = {} valid_count = 0 all_count = 0 for raw_item in tqdm(raw_data['products']): @@ -91,18 +93,14 @@ for raw_item in tqdm(raw_data['products']): print(f"{item_id} has not been categorized. It does not exist in {CATEGORIZED_METADATA_PATH}") continue - category = processed_item.get("category") + category = raw_category_mapping(raw_category) + subcategory = processed_item.get("subcategory") gender = processed_item.get("gender") applicable_occasions = processed_item.get("applicable_occasions", []) inappropriate_occasions = processed_item.get("inappropriate_occasions", []) - if category not in ALL_CATEGORY: + if subcategory not in ALL_SUBCATEGORY_LIST: print(f"{item_id}'s category, {category}, does not valid.") - if category not in new_category: - new_category[category] = [item_id] - else: - new_category[category].append(item_id) - continue if gender not in ['female', 'male', 'unisex']: print(f"{item_id}'s gender is not valid in {['female', 'male', 'unisex']}") @@ -129,6 +127,7 @@ for raw_item in tqdm(raw_data['products']): item_img_metadata = { "item_id": item_id, "category": category, + "subcategory": subcategory, "description": description, "gender": gender, 'brand': raw_item.get('brand', ''), @@ -146,10 +145,6 @@ for raw_item in tqdm(raw_data['products']): for occasion in inappropriate_occasions: item_img_metadata[occasion] = -1 - item_txt_metadata = deepcopy(item_img_metadata) - item_txt_metadata["modality"] = "text" - - # Get image feature image = Image.open(image_path).convert("RGB") inputs = processor(images=image, return_tensors="pt").to(device) @@ -158,21 +153,30 @@ for raw_item in tqdm(raw_data['products']): img_features = img_features / img_features.norm(p=2, dim=-1, keepdim=True) img_embedding = img_features.cpu().numpy().flatten().tolist() - # Get text feature - inputs = processor(text=[description], return_tensors="pt", padding=True, truncation=True).to(device) - with torch.no_grad(): - txt_features = model.get_text_features(**inputs) - txt_features = txt_features / txt_features.norm(p=2, dim=-1, keepdim=True) - txt_embedding = txt_features.cpu().numpy().flatten().tolist() - product_info = format_product_info(raw_item) # 插入到 ChromaDB collection.add( - ids=[f'{item_id}_img', f'{item_id}_txt'], - documents=[product_info, product_info], - embeddings=[img_embedding, txt_embedding], - metadatas=[item_img_metadata, item_txt_metadata], + ids=[f'{item_id}_img'], + documents=[product_info], + embeddings=[img_embedding], + metadatas=[item_img_metadata], ) -print(f"Final valid ratio is {valid_count / all_count * 100}%. Total number is {all_count}, Valid number is {valid_count}") -print(f'Found new category for consideration: {new_category}') + if ADD_TEXT_EMBEDDING: + item_txt_metadata = deepcopy(item_img_metadata) + item_txt_metadata["modality"] = "text" + + # Get text feature + inputs = processor(text=[description], return_tensors="pt", padding=True, truncation=True).to(device) + with torch.no_grad(): + txt_features = model.get_text_features(**inputs) + txt_features = txt_features / txt_features.norm(p=2, dim=-1, keepdim=True) + txt_embedding = txt_features.cpu().numpy().flatten().tolist() + collection.add( + ids=[f'{item_id}_txt'], + documents=[product_info], + embeddings=[txt_embedding], + metadatas=[item_txt_metadata], + ) + +print(f"Final valid ratio is {valid_count / all_count * 100}%. Total number is {all_count}, Valid number is {valid_count}") \ No newline at end of file