reconstruct whole recommendation pipeline and add new rec mode one-ask-for-all

2025-12-12 17:37:07 +08:00
parent 0e9546aa1a
commit 85390d5e6d
12 changed files with 684 additions and 565 deletions
--- a/app/config.py
+++ b/app/config.py
@@ -31,7 +31,6 @@ class Settings(BaseSettings):

    # 路径配置参数
    DATA_ROOT: str = Field(default="/workspace/lc_stylist_agent/data", description="数据根目录")
-    LOCAL_IMAGE_DIR: str = Field(default="/workspace/lc_stylist_agent/Data/image_data", description="图片数据目录")
    OUTFIT_OUTPUT_DIR: str = Field(default="/workspace/lc_stylist_agent/data/outfit_output", description="生成的搭配图片输出目录")
    STYLIST_GUIDE_DIR: str = Field(default="/workspace/lc_stylist_agent/data/stylist_guide", description="风格指南文本目录")

--- a/app/server/ChatbotAgent/agent_server.py
+++ b/app/server/ChatbotAgent/agent_server.py
@@ -4,6 +4,7 @@ import uuid
 from enum import Enum
 from typing import List
 from pydantic import Field
+import time

 import litserve as ls
 from pydantic import BaseModel
@@ -12,7 +13,7 @@ from app.server.ChatbotAgent.core.data_structure import Message, Role
 from app.server.ChatbotAgent.core.llm_interface import AsyncGeminiLLM
 from app.server.ChatbotAgent.core.redis_manager import RedisManager
 from app.server.ChatbotAgent.core.stylist_agent_server import AsyncStylistAgent
-from app.server.ChatbotAgent.core.system_prompt import SUMMARY_PROMPT
+from app.server.ChatbotAgent.core.prompt import SUMMARY_PROMPT
 from app.server.ChatbotAgent.core.vector_database import VectorDatabase

 logger = logging.getLogger(__name__)
@@ -54,7 +55,6 @@ class AgentRequestModel(BaseModel):
    batch_sources: List[str]
    callback_url: str
    gender: str
-    max_len: int = 9


 class LCAgent(ls.LitAPI):
@@ -118,7 +118,6 @@ class LCAgent(ls.LitAPI):
            user_id=request.user_id,
            gender=request.gender,
            callback_url=request.callback_url,
-            max_len=request.max_len,
            outfit_ids=outfit_ids
        )
        logger.info("--- Final Recommendation Results ---")
@@ -162,9 +161,17 @@ class LCAgent(ls.LitAPI):
        return str(parsed_result.summary), [occ.value for occ in parsed_result.occasions]

    async def recommend_outfit(
-        self, request_summary: str, occasions: List[str], stylist_name: str, start_outfit: List = [], batch_sources: List[str] = [], 
-        num_outfits: int = 1, user_id: str = "test", gender: str = "male", 
-        callback_url: str = None, max_len: int = 9, outfit_ids=None
+        self, 
+        request_summary: str, 
+        occasions: List[str], 
+        stylist_name: str, 
+        start_outfit: List = [], 
+        batch_sources: List[str] = [], 
+        num_outfits: int = 1, 
+        user_id: str = "test", 
+        gender: str = "male", 
+        callback_url: str = None, 
+        outfit_ids=None
    ):
        """
        基于用户的对话历史和需求，推荐一套搭配。
@@ -181,17 +188,16 @@ class LCAgent(ls.LitAPI):
        stylist_agent_kwages = self.stylist_agent_kwages.copy()
        for i in range(num_outfits):
            stylist_agent_kwages['outfit_id'] = outfit_ids[i]
-            stylist_agent_kwages['max_len'] = max_len
+            stylist_agent_kwages['stylist_name'] = stylist_name
+            stylist_agent_kwages['gender'] = gender
            agent = AsyncStylistAgent(**stylist_agent_kwages)
-            task = agent.run_styling_process(
+            task = agent.run_iterative_styling(
                request_summary=request_summary,
                occasions=occasions,
-                stylist_name=stylist_name,
                start_outfit=start_outfit,
                batch_sources=batch_sources,
                user_id=user_id,
                callback_url=callback_url,
-                gender=gender,
            )
            tasks.append(task)
            task_map[task] = {"outfit_id": outfit_ids[i], "retries": 0}
@@ -223,11 +229,12 @@ class LCAgent(ls.LitAPI):

                            # 重新创建任务 (可能需要短暂延迟，例如 time.sleep(1)，但在此异步环境中，我们会通过重新创建 agent/task 来实现)
                            stylist_agent_kwages['outfit_id'] = outfit_id
+                            stylist_agent_kwages['stylist_name'] = stylist_name
+                            stylist_agent_kwages['gender'] = gender
                            agent = AsyncStylistAgent(**stylist_agent_kwages)
-                            new_task = agent.run_styling_process(
+                            new_task = agent.run_iterative_styling(
                                request_summary=request_summary,
                                occasions=occasions,
-                                stylist_name=stylist_name,
                                start_outfit=start_outfit,
                                batch_sources=batch_sources,
                                user_id=user_id,
@@ -284,31 +291,45 @@ if __name__ == "__main__":
        with open("./data/2025_q4/request_test.json", "r") as f:
            request_data = json.load(f)

-        tasks = []
-        for test_content in request_data[:30]:
+        tasks_with_metadata = []
+        for test_content in request_data[20:25]:
            occasions = test_content['occasions']
            request_summary = test_content['request_summary']
-            stylist_agent_kwages['max_len'] = 5
            for stylist_name in ["edi", "vera"]:
                stylist_agent_kwages['outfit_id'] = test_content['test_case_id'] + "_" + "_".join(occasions) + f"_{stylist_name}"
+                stylist_agent_kwages['stylist_name'] = stylist_name
+                stylist_agent_kwages['gender'] = "female"
                agent = AsyncStylistAgent(**stylist_agent_kwages)
-                task = agent.run_styling_process(
+                coro = agent.run_iterative_styling(
+                # coro = agent.run_quick_batch_styling(
                    request_summary=request_summary,
                    occasions=occasions,
-                    stylist_name=stylist_name,
                    start_outfit=[],
                    batch_sources=["2025_q4"],
                    user_id=test_content['test_case_id'],
                    callback_url="http://mock-callback.com/result",
-                    gender="female",
                )
-                tasks.append(task)
-        
-        results = await asyncio.gather(*tasks, return_exceptions=True)
-        for result in results:
+                # 记录任务开始前的单调时间，并将元数据添加到列表中
+                description = f"Batch mode - Case {test_content['test_case_id']} - Stylist {stylist_name}"
+                
+                tasks_with_metadata.append((coro, description))
+
+        tasks_only = [coro for coro, _ in tasks_with_metadata]
+        print(f"--- Launching {len(tasks_only)} concurrent styling tasks... ---")
+        results = await asyncio.gather(*tasks_only, return_exceptions=True)
+
+        time_samples = []
+        for i, result in enumerate(results):
+            coro, description = tasks_with_metadata[i]
            if isinstance(result, Exception):
                print(f"❌ 任务失败: {type(result).__name__} - {str(result)}")
                continue
+            else:
+                response, duration = result
+                time_samples.append(duration)
+                print(f"✅ 任务成功 ({description}) [Time: {duration:.2f}s].")
+
+        print(f"Average time consumption is {sum(time_samples) / len(time_samples)}")

    try:
        # 使用 asyncio.run() 来执行顶层异步函数
--- a/app/server/ChatbotAgent/chatbot_server.py
+++ b/app/server/ChatbotAgent/chatbot_server.py
@@ -8,10 +8,8 @@ from app.config import settings
 from google.genai import types

 from app.server.ChatbotAgent.core.data_structure import Message, Role
-from app.server.ChatbotAgent.core.llm_interface import AsyncGeminiLLM
 from app.server.ChatbotAgent.core.redis_manager import RedisManager
-from app.server.ChatbotAgent.core.system_prompt import BASIC_PROMPT
-from app.server.ChatbotAgent.core.vector_database import VectorDatabase
+from app.server.ChatbotAgent.core.prompt import BASIC_PROMPT

 logger = logging.getLogger(__name__)

--- a/app/server/ChatbotAgent/core/prompt.py
+++ b/app/server/ChatbotAgent/core/prompt.py
@@ -0,0 +1,219 @@
+BASIC_PROMPT = """You are a professional, friendly, and insightful AI {gender}'s styling assistant.
+
+Your primary mission is to engage in a multi-turn conversation with the user to fully understand their dressing intent. You must adopt a professional yet approachable tone.
+
+CONVERSATION GOALS:
+1.  **Occasion:** Determine the specific event (e.g., romantic dinner, summer wedding, business meeting).
+2.  **Style:** Pinpoint the desired aesthetic (e.g., classic elegance, edgy, minimalist, bohemian).
+3.  **Vibe/Details:** Gather any mood or specific constraints (e.g., needs to be comfortable, requires light colors, no bare shoulders).
+4.  **Item Preference:** Ask the user if they have any specific preferences for an item type or silhouette (e.g., preference for a dress, skirt, tailored pants, or a particular neckline/length).
+
+GUIDANCE FOR RESPONSE GENERATION:
+-   After the user's initial request (e.g., "I want a chic outfit for dinner."), immediately reply with a friendly, targeted follow-up question to elicit the most crucial missing information (usually a combination of **Occasion** and **Style**).
+-   Be concise. Ask only 1 to 2 essential questions per turn.
+-   You must gather sufficient, clear intent before proceeding to actual clothing recommendations.
+
+OUTPUT FORMAT INSTRUCTION:
+-   **DO NOT** use any Markdown formatting whatsoever (e.g., do not use asterisks (*), bold text (**), lists, or code blocks).
+-   **ONLY** output the plain text response spoken by the AI Assistant.
+
+Example Follow-up (mimicking a conversational flow):
+User: I want a chic outfit for dinner.
+Your Response: Hey there! A chic dinner outfit, I love that! To give you the perfect recommendations, tell me: is this a romantic date, business dinner, or celebration with friends? And what's your go-to style vibe: classic elegance or something with more edge?"""
+
+SUMMARY_PROMPT = """
+You are an expert fashion request analyzer. Analyze the conversation history provided by the user.
+Your task is to:
+
+1. Identify the most appropriate occasions from the allowed list based on the user's intent.
+2. Write a detailed summary string that captures the user's style preferences, specific item requests, disliked items, body concerns, and color preferences. This summary will be used by a stylist to recommend outfits.
+
+Extract this information accurately from the chat history.
+"""
+
+
+from app.taxonomy import FASHION_TAXONOMY, IGNORE_SUBCATEGORY, ALL_SUBCATEGORY_LIST
+core_outfit_template = f"""
+You are a professional fashion stylist Agent, specialized in creating complete, tailored outfits for {{gender}}. Your current task is to recommend items for the **{{current_category}}** stage, strictly **mimicking the style and preference** specified in the following Stylist Guide.
+
+Your task is to **create a cohesive and complete outfit**, strictly adhering to **BOTH** the user's explicit **Request Summary** and the **Outfit Style Guide**. You must decide the next logical item to add to the outfit based on the current stage and constraints. Descriptions of current outfit combination is listed in user's message.
+
+---
+## Request from the User:
+{{request_summary}}
+
+## Core Guidance Document: Outfit Style Guide
+{{stylist_guide}}
+---
+
+## Your Workflow and Constraints
+
+1.  **Style Adherence**: You must strictly observe all rules in the Style Guide concerning **color palette, fit, layering principles, pattern restrictions , shoe coordination**.
+2.  **Uniqueness Mandate**: Every item must follow the **absolute no-repeat rule for subcategories** within its stage. Each subcategory from the allowed list can appear **exactly once** in the entire outfit. Furthermore, the categories 'dresses' and 'pants' and 'skirts' are mutually exclusive; they NORMALLY cannot be included in the same outfit.
+3.  **Step Planning**: The styling sequence must follow a logical approach (e.g., top-down, inside-out for clothing). Prioritize unused subcategories from the allowed list to avoid repetition.
+4.  **Structured Output**: Your output MUST be a valid JSON object. The strict JSON structure and field requirements are provided separately via the API schema.
+
+You must only output one of two actions: "recommend_item" or "stop".
+4.1. **recommend_item**: Use this action to suggest the next single item.
+   * **subcategory**: Must be strictly no repeats, and drawn from the allowed list.
+   * **description**: This must be an **extremely detailed and precise** description for the vector search. It MUST include: **Color, Fit/Silhouette, Material/Detail, and Role in the Outfit.**
+You must strictly use the **JSON format** for your output, as follows:
+```json
+{{{{
+"action": "recommend_item",
+"subcategory": "YOUR_ITEM_SUBCATEGORY",
+"description": "YOUR_DETAILED_DESCRIPTION",
+"reason": "YOUR_RECOMMENDATION_REASON"
+}}}}
+
+4.2. **stop**: Use this action when the Termination Condition is met.
+   * **reason**: This field is mandatory when stopping, and must clearly state why the outfit is complete.
+You must strictly use the **JSON format** for your output, as follows:
+{{{{
+"action": "stop",
+"subcategory": "",
+"description": "",
+"reason": "CORE_OUTFIT_COMPLETE"
+}}}}
+
+5.  **Termination Condition**: Terminate when the below condition is fully met：
+    5.1. **CLOTHING Stage**: The core clothing part of the outfit is complete, meaning the combination of items effectively achieves **full body coverage** (e.g., includes both a top/upper garment and a bottom/lower garment, or a single full-body piece like a dress/jumpsuit). Additionally, **all mandatory elements** stipulated in the Style Guide are satisfied. *(Note: Typically, {{max_len}} items are sufficient for this stage.)*
+    5.2. **SHOES Stage**: **Exactly one (1) item** has been successfully recommended, as shoes are a **mandatory component** for any complete outfit.
+    5.3. **BAGS Stage**: **Exactly one (1) item** has been successfully recommended, **OR** the recommendation is skipped if the Style Guide or the User Request **does not mandate** a bag for the specific occasion (i.e., the bag is considered optional).
+
+6.  **Context Dependency**: The user's next input (if not Start) will contain the **image and description of the selected item**. When recommending the next item:
+    a) First verify the subcategories of all already selected items to ensure no duplicates;
+    b) Select an unused subcategory from the allowed list as the priority;
+    c) Ensure the recommended item coordinates with the already selected items and complies with all rules in the Style Guide.
+Now, please start building an outfit (with strictly unique categories for all items) and output the JSON for the first item.
+"""
+
+
+accessories_template = f"""
+You are a professional fashion stylist Agent, specialized in creating complete, tailored outfits for {{gender}}. Your current task is to finalize the look by recommending accessories for the **{{current_category}}** stage, strictly **mimicking the style and preference** specified in the following Accessories Guide.
+
+Your final task is to **select the perfect set of accessories** to complete the given outfit. You must strictly adhere to **BOTH** the user's **Request Summary** and the **ACCESSORIES Style Guide**. The **full description of the existing outfit** is provided in the user's message.
+
+---
+## CONTEXT
+[User Request]: {{request_summary}}
+
+[Accessories Style Guide]:
+{{stylist_guide}}
+
+---
+## ACCESSORIES GENERATION RULES
+
+1. **Batch Recommendation**: You must output the **COMPLETE LIST of accessories** in a single response using the 'recommended_accessories' list defined in the schema. Do not recommend items one by one.
+2. **Quantity Constraint**: The total number of accessories recommended in the list must not exceed **{{max_len}}** items. Typically, 1 to {{max_len}} distinct items are required to complete a look.
+3. **Harmony & Guide Compliance**: 
+    - Assess the existing outfit (provided in the user's message) and ensure all accessories complement its style, color palette, and occasion.
+    - **Strictly follow the [Accessories Style Guide]** regarding material types (e.g., metals like gold/silver), total numbers allowed, and specific layering requirements (e.g., mandated watch or jewelry layering).
+4. **Exclusion List**: Subcategories in the following list are strictly excluded from recommendation: ({IGNORE_SUBCATEGORY}).
+5. **Description Quality**: The 'description' field for each accessory must be **extremely detailed and precise** for high-accuracy vector search, including: **Color, Material/Detail, and the specific Role in the Outfit.**
+
+Generate the final, complete accessories list now.
+"""
+
+all_items_template = f"""
+You are a professional fashion stylist Agent, specialized in creating complete, tailored outfits for {{gender}}. Your task is to **generate a Complete, Head-to-Toe Outfit** in a **Single Batch**, strictly **mimicking the style and preference** specified in the Stylist Guide.
+
+You must create a cohesive look that includes **Clothing, Shoes, Bags, and Accessories**. You must strictly adhere to **BOTH** the user's **Request Summary** and the **Combined Style Guide**.
+
+---
+## Request from the User:
+{{request_summary}}
+
+## Core Guidance Document: Combined Style Guide
+{{stylist_guide}}
+---
+
+## GENERATION WORKFLOW & RULES
+
+1.  **Holistic Styling**: You are NOT recommending items step-by-step. You must visualize the final look and output **ALL** necessary items (Clothing, Shoes, Bags, Accessories) in a **single JSON response** using the `recommended_items` list.
+
+2.  **Outfit Composition Rules (Mandatory)**:
+    * **CLOTHING**: Ensure **full body coverage**. You must include either [Top + Bottom] OR [One-piece (e.g., Dress/Jumpsuit)]. 'Dresses' and 'Skirts/Pants' are mutually exclusive.
+    * **SHOES**: **Exactly one (1) pair** of shoes is MANDATORY.
+    * **BAGS**: Recommend **0 or 1 bag**. Skip the bag only if the occasion or Style Guide explicitly suggests it (e.g., home wear, yoga).
+    * **ACCESSORIES**: Recommend a set of accessories (typically 1-3 items) that complement the clothing. Follow metal/material constraints in the guide.
+    Number of items in outfit must not exceed {{max_len}}.
+
+3.  **Uniqueness Mandate**: 
+    * Each **subcategory** belonging to CLOTHING (e.g., 't-shirts', 'sweaters', 'jacket') can appear **EXACTLY ONCE** in the entire list. 
+    * But **subcategory** belonging to ACCESSORIES can repeat.
+
+4.  **Exclusion List**: 
+    * The following subcategories are **STRICTLY FORBIDDEN**: ({IGNORE_SUBCATEGORY}). Do not include them in your recommendation.
+
+5.  **Style Adherence**: 
+    * Ensure all items coordinate in **color, fit, and material**.
+    * Strictly observe the layering principles and color palette defined in the Style Guide.
+
+6.  **Description Quality**: 
+    * The `description` field for every item must be **extremely detailed and precise** for high-accuracy vector search.
+    * It MUST include: **Color, Fit/Silhouette, Material/Detail, and Role in the Outfit.**
+
+## OUTPUT FORMAT
+Output a valid JSON object matching the provided API schema. The `recommended_items` array must contain all the items for this outfit.
+
+Generate the complete outfit list now.
+"""
+
+
+def build_iterative_schema(current_category):
+    schema = {
+        "type": "object",
+        "properties": {
+            "action": {"type": "string", "enum": ["recommend_item", "stop"]},
+            "subcategory": {
+                "type": "string",
+                "description": "The subcategory this single item. Only present if action is 'recommend_item'",
+                "enum": FASHION_TAXONOMY[current_category]
+            },
+            "description": {
+                "type": "string", 
+                "description": "an **extremely detailed and precise** description of the item. This description is used for **high-accuracy vector search** in the database. It should include Color, Fit/Silhouette, Material/Detail, Role in the Outfit."
+            },
+            "reason": {"type": "string", "description": "The reason for the current action. Required if action is 'stop' (to summarize the final outfit)."}
+        },
+        "required": ["action", "subcategory", "description", "reason"]
+    }
+    return schema
+
+
+def build_batch_schema(specified_category: str=""):
+    assert(specified_category in FASHION_TAXONOMY.keys() or specified_category == "")
+    if not specified_category:
+        category_range_desc = "the complete final outfit (including all categories)"
+        subcategory_list = ALL_SUBCATEGORY_LIST
+    else:
+        category_range_desc = specified_category
+        subcategory_list = FASHION_TAXONOMY[specified_category]
+    schema = {
+        "type": "object",
+        "properties": {
+            "reason": {
+                "type": "string",
+                "description": f"The justification for the selection of {category_range_desc}. This summary must explain how the recommended items meet the user's request and style requirements."
+            },
+            "recommended_items": {
+                "type": "array",
+                "description": "A list of descriptions of recommended items.",
+                "items": {
+                    "type": "object",
+                    "properties": {
+                        "description": {"type": "string", "description": f"The detailed description for this {specified_category} item."},
+                        "subcategory": {
+                            "type": "string",
+                            "description": "The subcategory of the recommended item.",
+                            "enum": subcategory_list
+                        },
+                    },
+                    "required": ["subcategory", "description"]
+                }
+            }
+        },
+        "required": ["recommended_items", "reason"]
+    }
+    return schema
--- a/app/server/ChatbotAgent/core/stylist_agent_server.py
+++ b/app/server/ChatbotAgent/core/stylist_agent_server.py
@@ -1,12 +1,10 @@
-import asyncio
 import io
 import json
 import logging
 import os
-import random
-import uuid
 from typing import List, Dict, Any, Optional
 from copy import deepcopy
+import time

 from google import genai
 from google.cloud import storage
@@ -16,25 +14,28 @@ from app.server.utils.img_operation import merge_images_to_square
 from app.server.utils.minio_client import minio_client, oss_upload_image
 from app.server.utils.request_post import post_request
 from app.config import settings
-from app.taxonomy import CATEGORY, ALL_CATEGORY, IGNORE_CATEGORY
+from app.server.ChatbotAgent.core.prompt import (
+    core_outfit_template,
+    accessories_template,
+    all_items_template,
+    build_iterative_schema,
+    build_batch_schema
+)
+from app.taxonomy import FASHION_TAXONOMY, ALL_SUBCATEGORY_LIST

 logger = logging.getLogger(__name__)


-IGNORE_CATEGORY = set(IGNORE_CATEGORY)
-CLOTHING_CATEGORY = set(CATEGORY['clothing'] + CATEGORY['shoes'] + CATEGORY['bags']) - IGNORE_CATEGORY
-ACCESSORY_CATEGORY = set(CATEGORY['accessories']) - IGNORE_CATEGORY
-
-
 class AsyncStylistAgent:
-    def __init__(self, local_db, max_len: int, gemini_model_name: str, outfit_id=str):
+    def __init__(self, local_db: str, gemini_model_name: str, outfit_id: str, stylist_name: str, gender: str):
        # self.outfit_items: List[Dict[str, str]] = []
        self.outfit_id = outfit_id
+        self.stylist_name = stylist_name
+        self.gender = gender
        self.gemini_client = genai.Client(
            vertexai=True, project='aida-461108', location='us-central1'
        )
        self.local_db = local_db
-        self.max_len = max_len
        self.gemini_model_name = gemini_model_name
        self.stop_reason = ""
        self.headers = {
@@ -44,49 +45,6 @@ class AsyncStylistAgent:
            'Connection': "keep-alive",
            'Content-Type': "application/json"
        }
-        self.main_clothing_schema = {
-            "type": "object",
-            "properties": {
-                "action": {"type": "string", "enum": ["recommend_item", "stop"]},
-                "category": {
-                    "type": "string",
-                    "description": "The category of the single clothing item being recommended in this step (e.g., 'outerwear', 'bottoms'). Only present if action is 'recommend_item'.",
-                    "enum": CLOTHING_CATEGORY
-                },
-                "description": {
-                    "type": "string", 
-                    "description": "an **extremely detailed and precise** description of the item. This description is used for **high-accuracy vector search** in the database. It should include Color, Fit/Silhouette, Material/Detail, Role in the Outfit."
-                },
-                "reason": {"type": "string", "description": "The reason for the current action. Required if action is 'stop' (to summarize the final outfit)."}
-            },
-            "required": ["action"]
-        }
-        self.accessory_schema = {
-            "type": "object",
-            "properties": {
-                "reason": {
-                    "type": "string", 
-                    "description": "The justification for completing the recommendation and the summary of the final outfit."
-                },
-                "recommended_accessories": {
-                    "type": "array",
-                    "description": "A list of accessories recommended to complete the outfit.",
-                    "items": {
-                        "type": "object",
-                        "properties": {
-                            "category": {
-                                "type": "string", 
-                                "description": "The category of the accessory (e.g., jewelry, watches, bags).",
-                                "enum": ACCESSORY_CATEGORY
-                            },
-                            "description": {"type": "string", "description": "The detailed description for this accessory item."}
-                        },
-                        "required": ["category", "description"]
-                    }
-                }
-            },
-            "required": ["recommended_accessories", "reason"]
-        }

        # 存储桶配置
        try:
@@ -115,100 +73,6 @@ class AsyncStylistAgent:
        except Exception as e:
            raise Exception(f"Failed to load style guide from {guide_path}, {acc_guide_path}: {e}")

-    def _build_main_clothing_prompt(self, request_summary: str = "", gender: str = "male", stylist_guide: str = "") -> str:
-        """Constructs the complete System Prompt."""
-
-        clothing_gender = "men's clothing" if gender == "male" else "women's clothing"
-
-        # Insert the style_guide content into the template
-        template = template = f"""
-        You are a professional fashion stylist Agent, specialized in creating complete, tailored outfits for {clothing_gender}. Only main clothing including 'bags' is needed, excluding accessories like 'jewelry', 'hats', 'belts', etc.
-    
-        Your task is to **create a cohesive and complete outfit**, strictly adhering to **BOTH** the user's explicit **Request Summary** and the **Outfit Style Guide**. You must decide the next logical item to add to the outfit based on the currently selected items (if any).
-
-        ---
-        ## Request from the User:
-        {request_summary}
-    
-        ## Core Guidance Document: Outfit Style Guide
-        {stylist_guide}
-        ---
-        
-        ## Your Workflow and Constraints
-        
-        1.  **Style Adherence**: You must strictly observe all rules in the Style Guide concerning **color palette, fit, layering principles, pattern restrictions , shoe coordination**.
-        2.  **Category Uniqueness Mandate**: Every outfit must follow the **absolute no-repeat rule for clothing categories** — each category from the allowed list can appear **exactly once** in the entire outfit. This rule is non-negotiable, even if the user explicitly requests repeating a category. Furthermore, the categories 'dresses' and 'pants' and 'skirts' are mutually exclusive; they NORMALLY cannot be included in the same outfit.
-        3.  **Step Planning**: The styling sequence must follow a **top-down, inside-out** approach: First major garments (tops/outerwear/bottoms/dresses) then shoes. When selecting the next item, prioritize unused categories from the allowed list to avoid repetition.
-        4.  **Structured Output**: Every response must recommend the **next single item** (from an unused category). You must strictly use the **JSON format** for your output, as follows:
-        
-        ```json
-        {{
-        "action": "recommend_item",
-        "category": "YOUR_ITEM_CATEGORY",
-        "description": "YOUR_DETAILED_DESCRIPTION"
-        }}
-        ```
-        
-        * `action`: Must always be `"recommend_item"` until the outfit is complete.
-        * `category`: Must be an unused category from the following list: {list(CLOTHING_CATEGORY)} (strictly no repeats, per the Category Uniqueness Mandate).
-        * `description`: This must be an **extremely detailed and precise** description of the item. This description is used for **high-accuracy vector search** in the database and must include:
-           * **Color** (e.g., milk tea, pure white, dark gray)
-           * **Fit/Silhouette** (e.g., Oversize, loose, slim-fit)
-           * **Material/Detail** (e.g., 100% cotton, linen, gold clasp, thin stripe, checkered pattern)
-           * **Role in the Outfit** (e.g., serves as the innermost base layer for layering; acts as the crucial tie accent for the smart casual look)
-        
-        5.  **Termination Condition**: Terminate when **both** conditions are fully met：
-            a) The entire outfit is complete and all mandatory elements stipulated in the Style Guide are satisfied;
-            b) No duplicate categories are present (strict compliance with the Category Uniqueness Mandate).
-            When terminating, output the following JSON format:
-        ```json
-        {{
-        "action": "stop",
-        "reason": "OUTFIT_COMPLETE_AND_MEETS_ALL_MINI_GUIDELINES"
-        }}
-        ```
-        Normally, {self.max_len} items are totally enough for an outfit.
-        
-        6.  **Context Dependency**: The user's next input (if not Start) will contain the **image and description of the selected item**. When recommending the next item:
-            a) First verify the categories of all already selected items to ensure no duplicates;
-            b) Select an unused category from the allowed list as the priority;
-            c) Ensure the recommended item coordinates with the already selected items and complies with all rules in the Style Guide.
-        Now, please start building an outfit (with strictly unique categories for all items) and output the JSON for the first item.
-        """
-        return template.strip()
-    
-    def _build_accessory_prompt(self, request_summary: str, gender: str, accessories_guide: str) -> str:
-        """
-        构建配饰推荐 (Accessories) 的 System Prompt。
-        特点：强调基于现有穿搭 (Context Aware)，批量推荐 (Batch Recommendation)，做最后的点缀。
-        """
-        clothing_gender = "men's clothing" if gender == "male" else "women's clothing"
-
-        template = f"""
-        You are an expert Accessories Stylist for {clothing_gender}. 
-        Your task is to select the perfect set of accessories to complete an existing outfit.
-
-        ---
-        ## CONTEXT
-        [User Request]: {request_summary}
-
-        [Accessories Style Guide]:
-        {accessories_guide}
-
-        ---
-        ## STRICT RULES
-        1. **Batch Recommendation**: Do NOT recommend items one by one. You must output the **COMPLETE LIST** of accessories (e.g., jewelry, bag, watch, hat) in a single response using the 'recommended_accessories' list.
-        2. **Allowed Categories**: Select only from: {list(ACCESSORY_CATEGORY)}.
-        3. **Harmony & Constraints**: 
-           - The accessories must complement the [Current Outfit Base].
-           - Strictly follow the [Accessories Style Guide] regarding metals (gold/silver), numbers, and prohibited items.
-           - If the guide mandates a watch or specific jewelry layering, ensure they are included.
-        4. **Quantity**: Typically recommend 2-4 distinct accessory items to complete the look.
-
-        Generate the final accessories list now.
-        """
-        return template.strip()
-
    async def _call_gemini(self, user_input: str, user_id: str, file_name: str, output_schema: Dict[str, Any], image_bytes: bytes = None, system_prompt: str = "") -> str:
        """
        实际调用 Gemini API 的函数，接受文本和用户的id。
@@ -300,7 +164,7 @@ class AsyncStylistAgent:
            print(f"Raw response: {response_text}")
            return None

-    def _get_next_item(self, item_description: str, category: str, occasions: List[str], batch_sources: List[str] = [], gender: str = "female") -> Optional[Dict[str, str]]:
+    def _get_next_item(self, item_description: str, category: str, subcategory: str, occasions: List[str], batch_sources: List[str] = [], gender: str = "female") -> Optional[Dict[str, str]]:
        """
        1. 根据描述生成嵌入。
        2. 查询本地数据库以找到最佳匹配项。
@@ -330,165 +194,311 @@ class AsyncStylistAgent:
        # 3. 模拟 Agent 审核(实际应用中,你需要将图片发回给 Agent进行审核)
        best_meta = results[0]  # 第一个 batch 的第一个 metadata
        item_id = best_meta['item_id'].replace("_img", "")
+        batch_source = best_meta['batch_source']
        return {
            "item_id": item_id,  # 从 metadata 字典中安全获取
            "category": best_meta['category'],
-            "gpt_description": item_description,
            'description': best_meta['description'],
+            "subcategory": best_meta['subcategory'],
+            "gpt_description": item_description,
+            "gpt_subcategory": subcategory,
            # 假设 'item_path' 存储在 metadata 中，或从 'item_id' 推导
            # 这里假设 item_id 就是文件名的一部分
-            "image_path": os.path.join(f"{item_id}.jpg")
+            "image_path": os.path.join(settings.DATA_ROOT, batch_source, 'image_data', f"{item_id}.jpg")
        }
+    
+    def _build_system_prompt(self, template: str, request_summary: str = "", stylist_guide: str = "", current_category: str = "clothing", max_len: int=4) -> str:
+        # Insert the style_guide content into the template
+        sys_template = template.format(
+            gender=self.gender,
+            current_category=current_category.upper(),
+            request_summary=request_summary,
+            stylist_guide=stylist_guide,
+            max_len=max_len
+        )
+        return sys_template.strip()

-    def _build_user_input(self, recommend_acc=False) -> str:
+    def _build_user_input(self, current_category: str, existing_subcategories: str) -> str:
        """构建发送给 Gemini 的用户输入,包含已选单品信息。"""
        if not self.outfit_items:
-            return "Start"
-
-        # 将已选单品的信息作为上下文发回给 Agent
-        context = "Selected fashion items:\n"
-        for ii, item in enumerate(self.outfit_items):
-            context += f"{ii + 1}. Category: {item['category']}. Description: {item['description']}\n"
-        if not recommend_acc:
-            context += "\nPlease recommend the next single item based on the selected items, user's request, and style guide."
+            context = ""
        else:
-            context += "\nPlease recommend a complete list of accessories to complement the selected outfit based on the user's request and accessories style guide."
+            context = "Selected fashion items:\n"
+        
+        # 将已选单品的信息作为上下文发回给 Agent
+        for ii, item in enumerate(self.outfit_items):
+            context += f"{ii + 1}. Category: {item['category']}. Subcategory: {item['subcategory']}. Description: {item['description']}\n"
+    
+        if current_category == 'clothing':
+            context += f"\nRecommend the next single item based on the selected items, user's request, and style guide. 【CRITICAL CONSTRAINT】You MUST strictly **maintain uniqueness**; do not recommend any clothing whose **Subcategory** is already present in this exclusion list: {existing_subcategories}."
+        elif current_category in ['shoes', 'bags']:
+            context += f"\nRecommend the next {current_category} based on the selected items, user's request, and style guide."
+        elif current_category == 'accessories':
+            context += f"\nRecommend a complete list of accessories to complement the selected outfit based on the user's request and accessories style guide. 【CRITICAL CONSTRAINT】You MUST strictly **maintain uniqueness**; do not recommend any accessories whose **Subcategory** is already present in this exclusion list: {existing_subcategories}."
+        elif current_category == 'all':
+            context += "\nRecommend a **complete, full outfit**, including all items (clothing, shoes, bags, and accessories), strictly following the Request Summary and Style Guide. Output the **complete list** of items in a single JSON response."
        return context
    
-    def post_operation(self, response_data: Dict[str, Any], status: str, message: str, callback_url: str):
+    def post_operation(self, status: str, message: str, callback_url: str, img_path: str):
        """处理完成后的回调操作。"""
        if settings.LOCAL == 0:
-            response_data['items'] = deepcopy(self.outfit_items)
-            response_data['status'] = status
-            response_data['message'] = message
+            response_data = {
+                'items': deepcopy(self.outfit_items),
+                'status': status,
+                'message': message,
+                'path': img_path,
+                'outfit_id': self.outfit_id
+            }
            response = post_request(url=callback_url, data=json.dumps(response_data), headers=self.headers)
            logger.info(f"request data ：{response_data} | JAVA callback info -> status:{response.status_code} | message:{response.text}")
+            return response_data
+        else:
+            return {}

-    async def run_styling_process(self, request_summary, occasions, stylist_name, start_outfit=[], batch_sources=[], user_id="test", callback_url="", gender: str = "male"):
-        self.outfit_items = start_outfit
-        """主流程控制循环。"""
-        print(f"--- Starting Agent (Outfit ID: {self.outfit_id}) ---")
-
-        stylist_guide, accessories_guide = self._load_style_guide(stylist_name)
-        system_prompt = self._build_main_clothing_prompt(request_summary, gender, stylist_guide)
-
-        response_data = {
-            "status": "",
-            "message": "",
-            "path": "",
-            "outfit_id": self.outfit_id,
-            "items": []
-        }
-        logger.info(response_data)
-        url = f'{callback_url}/api/style/callback'
-        file_name = self.outfit_id
-
+    async def _execute_iterative_recommendation(
+        self, 
+        current_category: str, 
+        system_prompt: str, 
+        schema: Dict, 
+        max_len: int,
+        occasions: List[str],
+        batch_sources: List[str],
+        user_id: str,
+        url: str
+    ):
        recommend_timestep = 0
        gemini_data = {'action': 'start'}
-        while recommend_timestep < self.max_len and gemini_data.get('action') != 'stop':
+        existing_subcategories = []
+        while recommend_timestep < max_len and gemini_data.get('action') != 'stop':
            recommend_timestep += 1
            # 1. 准备用户输入(上下文)
-            user_input = self._build_user_input()
+            user_input = self._build_user_input(current_category, ", ".join(existing_subcategories))

            # 2. 把图片组装起来供api调用
-            response_data['path'], image_bytes = await self._merge_images(file_name, user_id, stylist_name)
+            merged_image_path, image_bytes = await self._merge_images(self.outfit_id, user_id, self.stylist_name)

            # 3. 调用 Gemini Agent
-            gemini_response_text = await self._call_gemini(user_input, user_id, file_name, self.main_clothing_schema, image_bytes, system_prompt)
+            gemini_response_text = await self._call_gemini(
+                user_input, 
+                user_id,
+                self.outfit_id, 
+                schema,
+                image_bytes, 
+                system_prompt
+            )
            gemini_data = self._parse_gemini_response(gemini_response_text)

            if not gemini_data:
                print("Agent 返回无效响应,终止流程。")
                self.post_operation(
-                    response_data, 
                    status="failed", 
                    message="Agent returned invalid response, terminating process.", 
-                    callback_url=url
+                    callback_url=url,
+                    img_path=merged_image_path,
                )
                break

            # 处理推荐单品
            if gemini_data.get('action') == 'recommend_item':
-                category = gemini_data.get('category')
+                subcategory = gemini_data.get('subcategory')
                description = gemini_data.get('description')

                # 4a. 检查类别是否有效 (重要步骤)
-                if category not in CLOTHING_CATEGORY:
+                if subcategory not in FASHION_TAXONOMY[current_category]:
                    self.post_operation(
-                        response_data, 
                        status="continue", 
-                        message=f"Invalid category recommended by Agent: {category}. Requesting Agent to re-output.", 
-                        callback_url=url
+                        message=f"Invalid subcategory recommended by Agent: {subcategory}. Requesting Agent to re-output.", 
+                        callback_url=url,
+                        img_path=merged_image_path,
                    )
                    continue

                # 4b. 在本地 DB 中查询单品
-                new_item = self._get_next_item(description, category, occasions, batch_sources, gender)
-                if not new_item or new_item['item_id'] in [x['item_id'] for x in self.outfit_items]:
+                new_item = self._get_next_item(description, current_category, subcategory, occasions, batch_sources, self.gender)
+                if not new_item:
                    self.post_operation(
-                        response_data, 
                        status="continue", 
-                        message=f"No matching item is found or item duplicated. Ask Gemini to re-output.", 
-                        callback_url=url
+                        message=f"No matching item is found. Ask Gemini to re-output.", 
+                        callback_url=url,
+                        img_path=merged_image_path,
+                    )
+                    continue
+                elif new_item['subcategory'] in [x['subcategory'] for x in self.outfit_items]:
+                    self.post_operation(
+                        status="continue", 
+                        message=f"{new_item['item_id']}'s subcategory {new_item['subcategory']} duplicated. Ask Gemini to re-output.", 
+                        callback_url=url,
+                        img_path=merged_image_path,
+                    )
+                    continue
+                elif new_item['item_id'] in [x['item_id'] for x in self.outfit_items]:
+                    self.post_operation(
+                        status="continue", 
+                        message=f"Item {new_item['item_id']} duplicated. Ask Gemini to re-output.", 
+                        callback_url=url,
+                        img_path=merged_image_path,
                    )
                    continue
                else:
                    self.outfit_items.append(new_item)
+                    existing_subcategories.append(new_item["subcategory"])
                    self.post_operation(
-                        response_data, 
                        status="ok", 
                        message=f"Add new item {new_item['item_id']} in category {new_item['category']} successfully.",
-                        callback_url=url
+                        callback_url=url,
+                        img_path=merged_image_path,
                    )
-                    print(f"Step {recommend_timestep}: {gemini_data}, found item: {new_item}")
+                    print(f"Stage {current_category.upper()}, Step {recommend_timestep}: {gemini_data}, found item: {new_item['item_id']}")

-        # When action is stop or timestep limit reached
-        logger.info(f"Main clothing stylist process finished: {gemini_data.get('reason')}")
-        # 根据stylist要求随机增加配饰 3-4个配饰
-        response_data['path'], image_bytes = await self._merge_images(file_name, user_id, stylist_name)
-        accessory_system_prompt = self._build_accessory_prompt(request_summary, gender, accessories_guide)
-        user_input = self._build_user_input(recommend_acc=True)
-        gemini_response_text = await self._call_gemini(user_input, user_id, file_name, self.accessory_schema, image_bytes, accessory_system_prompt)
+         
+    async def _execute_batch_recommendation(
+        self, 
+        current_category: str, # this can be any category or all
+        system_prompt: str, 
+        schema: Dict, 
+        occasions: List[str],
+        batch_sources: List[str],
+        user_id: str,
+        url: str
+    ):
+        user_input = self._build_user_input(current_category, existing_subcategories=", ".join([x['subcategory'] for x in self.outfit_items]))
+        merged_image_path, image_bytes = await self._merge_images(self.outfit_id, user_id, self.stylist_name)
+
+        gemini_response_text = await self._call_gemini(
+            user_input, 
+            user_id,
+            self.outfit_id, 
+            schema,
+            image_bytes, 
+            system_prompt
+        )
        gemini_data = self._parse_gemini_response(gemini_response_text)
-
-        recommended_accessories = gemini_data.get('recommended_accessories', [])
+        recommended_items = gemini_data.get('recommended_items', [])
        reason = gemini_data.get('reason', '')
-        if not recommended_accessories or not isinstance(recommended_accessories, List):
-            print("No accessory data from Gemini, terminating process.")
+        if not recommended_items or not isinstance(recommended_items, List):
+            print("No recommended item from Gemini, terminating process.")
            self.post_operation(
-                response_data, 
                status="failed", 
                message="Agent returned invalid response, terminating process.", 
-                callback_url=url
+                callback_url=url,
+                img_path=merged_image_path
            )
        else:
-            for idx, rec_accessory in enumerate(recommended_accessories):
-                category = rec_accessory.get('category')
-                description = rec_accessory.get('description')
+            for idx, rec_item in enumerate(recommended_items):
+                subcategory = rec_item.get('subcategory')
+                description = rec_item.get('description')

                # 4a. 检查类别是否有效 (重要步骤)
-                if category not in ACCESSORY_CATEGORY:
+                if subcategory not in ALL_SUBCATEGORY_LIST:
                    continue

                # 4b. 在本地 DB 中查询单品
-                new_item = self._get_next_item(description, category, occasions, batch_sources, gender)
+                # we need first determine the category if current category is 'all'
+                if current_category == "all":
+                    for category, subcategories_list in FASHION_TAXONOMY.items():
+                        # 将子类别列表转换为集合 (set) 可以提高查找效率，
+                        # 特别是当列表很长时。
+                        if subcategory in subcategories_list:
+                            break
+
+                new_item = self._get_next_item(description, category, subcategory, occasions, batch_sources, self.gender)
                if not new_item or new_item['item_id'] in [x['item_id'] for x in self.outfit_items]:
                    continue
                else:
                    self.outfit_items.append(new_item)
-                    print(f"Accessory {idx + 1}: {rec_accessory}, found item: {new_item}")
+                    print(f"Item {idx + 1}: ({subcategory}) {rec_item}, found item: {new_item}")
+        return reason
+        

-        response_data['path'] = await self._merge_images(file_name, user_id, stylist_name)
-        self.post_operation(
-            response_data,
+    async def run_iterative_styling(self, request_summary, occasions, start_outfit=[], batch_sources=[], user_id="test", callback_url=""):
+        start_time = time.monotonic()
+        STAGES = ['clothing', 'shoes', 'bags']
+        self.outfit_items = start_outfit
+        stylist_guide, accessories_guide = self._load_style_guide(self.stylist_name)
+        url = f'{callback_url}/api/style/callback'
+
+        """主流程控制循环。"""
+        print(f"--- Starting Agent (Outfit ID: {self.outfit_id}) ---")
+        for current_category in STAGES:
+            max_len = 4 if current_category == 'clothing' else 1
+            system_prompt = self._build_system_prompt(core_outfit_template, request_summary, stylist_guide, current_category, max_len)
+
+            await self._execute_iterative_recommendation(
+                current_category, 
+                system_prompt,
+                build_iterative_schema(current_category),
+                max_len,
+                occasions,
+                batch_sources,
+                user_id,
+                url
+            )
+
+        # 根据stylist要求增加配饰 3-4个配饰
+        MAX_LEN_ACC = 3
+        acc_system_prompt = self._build_system_prompt(accessories_template, request_summary, accessories_guide, 'accessories', MAX_LEN_ACC)
+        reason = await self._execute_batch_recommendation(
+                current_category, # can be 'accessories' or 'all'
+                acc_system_prompt,
+                build_batch_schema(current_category),
+                occasions,
+                batch_sources,
+                user_id,
+                url
+            )
+
+        final_image_path = await self._merge_images(self.outfit_id, user_id, self.stylist_name)
+        response_data = self.post_operation(
            status="stop",
            message=reason,
-            callback_url=url
+            callback_url=url,
+            img_path=final_image_path
        )
-        with open(os.path.join(settings.OUTFIT_OUTPUT_DIR, stylist_name, f'{file_name}.json'), 'w') as f:
-            json.dump(self.outfit_items, f, indent=2)
+        if settings.LOCAL == 1:
+            with open(os.path.join(settings.OUTFIT_OUTPUT_DIR, self.stylist_name, f'{self.outfit_id}.json'), 'w') as f:
+                json.dump({"request_summary": request_summary,"occasions": occasions, "items": self.outfit_items}, f, indent=2)
+
+        end_time = time.monotonic()
+        total_duration = end_time - start_time
    
-        return response_data
+        return response_data, total_duration
+    
+    async def run_quick_batch_styling(self, request_summary, occasions, start_outfit=[], batch_sources=[], user_id="test", callback_url=""):
+        start_time = time.monotonic()
+
+        self.outfit_items = start_outfit
+        stylist_guide, accessories_guide = self._load_style_guide(self.stylist_name)
+        url = f'{callback_url}/api/style/callback'
+
+        print(f"--- Starting Agent (Outfit ID: {self.outfit_id}) ---")
+
+        MAX_LEN = 9
+        system_prompt = self._build_system_prompt(all_items_template, request_summary, stylist_guide + accessories_guide, "", MAX_LEN)
+        reason = await self._execute_batch_recommendation(
+            'all', # can be 'accessories' or 'all'
+            system_prompt,
+            build_batch_schema(),
+            occasions,
+            batch_sources,
+            user_id,
+            url
+        )
+
+        final_image_path = await self._merge_images(self.outfit_id, user_id, self.stylist_name)
+        response_data = self.post_operation(
+            status="stop",
+            message=reason,
+            callback_url=url,
+            img_path=final_image_path
+        )
+        if settings.LOCAL == 1:
+            with open(os.path.join(settings.OUTFIT_OUTPUT_DIR, self.stylist_name, f'{self.outfit_id}.json'), 'w') as f:
+                json.dump({"request_summary": request_summary,"occasions": occasions, "items": self.outfit_items}, f, indent=2)
+
+        end_time = time.monotonic()
+        total_duration = end_time - start_time
+    
+        return response_data, total_duration

    def _upload_to_gcs(self, bucket_name: str, blob_name: str, mime_type, image_bytes) -> str:
        """同步方法：将文件上传到 GCS 并返回 GCS URI。"""
--- a/app/server/ChatbotAgent/core/system_prompt.py
+++ b/app/server/ChatbotAgent/core/system_prompt.py
@@ -1,32 +0,0 @@
-BASIC_PROMPT = """You are a professional, friendly, and insightful AI {gender}'s styling assistant.
-
-Your primary mission is to engage in a multi-turn conversation with the user to fully understand their dressing intent. You must adopt a professional yet approachable tone.
-
-CONVERSATION GOALS:
-1.  **Occasion:** Determine the specific event (e.g., romantic dinner, summer wedding, business meeting).
-2.  **Style:** Pinpoint the desired aesthetic (e.g., classic elegance, edgy, minimalist, bohemian).
-3.  **Vibe/Details:** Gather any mood or specific constraints (e.g., needs to be comfortable, requires light colors, no bare shoulders).
-4.  **Item Preference:** Ask the user if they have any specific preferences for an item type or silhouette (e.g., preference for a dress, skirt, tailored pants, or a particular neckline/length).
-
-GUIDANCE FOR RESPONSE GENERATION:
-   After the user's initial request (e.g., "I want a chic outfit for dinner."), immediately reply with a friendly, targeted follow-up question to elicit the most crucial missing information (usually a combination of **Occasion** and **Style**).
-   Be concise. Ask only 1 to 2 essential questions per turn.
-   You must gather sufficient, clear intent before proceeding to actual clothing recommendations.
-
-OUTPUT FORMAT INSTRUCTION:
-   **DO NOT** use any Markdown formatting whatsoever (e.g., do not use asterisks (*), bold text (**), lists, or code blocks).
-   **ONLY** output the plain text response spoken by the AI Assistant.
-
-Example Follow-up (mimicking a conversational flow):
-User: I want a chic outfit for dinner.
-Your Response: Hey there! A chic dinner outfit, I love that! To give you the perfect recommendations, tell me: is this a romantic date, business dinner, or celebration with friends? And what's your go-to style vibe: classic elegance or something with more edge?"""
-
-SUMMARY_PROMPT = """
-You are an expert fashion request analyzer. Analyze the conversation history provided by the user.
-Your task is to:
-
-1. Identify the most appropriate occasions from the allowed list based on the user's intent.
-2. Write a detailed summary string that captures the user's style preferences, specific item requests, disliked items, body concerns, and color preferences. This summary will be used by a stylist to recommend outfits.
-
-Extract this information accurately from the chat history.
-"""
--- a/app/server/ChatbotAgent/core/vector_database.py
+++ b/app/server/ChatbotAgent/core/vector_database.py
@@ -1,6 +1,3 @@
-import random
-import time
-
 import numpy as np
 import torch
 import chromadb
@@ -8,7 +5,7 @@ from PIL import Image
 from typing import List, Dict, Any
 from transformers import CLIPProcessor, CLIPModel

-from app.taxonomy import OCCASION, ALL_CATEGORY
+from app.taxonomy import OCCASION, CATEGORY_LIST, IGNORE_SUBCATEGORY


 class VectorDatabase():
@@ -50,7 +47,7 @@ class VectorDatabase():
        return features.cpu().numpy().flatten().tolist()
        
    def get_matched_item(self, embedding: List[float], category: str, occasions: List[str] = [], batch_sources: List[str] = [], gender: str = 'female', n_results: int = 1) -> List[Dict[str, Any]]:
-        if category not in ALL_CATEGORY:
+        if category not in CATEGORY_LIST:
            raise ValueError(f"Recommended {category} is not valid.")
        
        and_conditions = [
@@ -59,15 +56,19 @@ class VectorDatabase():
            {"$or": [
                {"gender": gender},
                {"gender": "unisex"},
-            ]}
+            ]},
+            {"subcategory": {"$nin": IGNORE_SUBCATEGORY}}
        ]
        if batch_sources and len(batch_sources) > 0:
-            source_conditions = []
-            for source in batch_sources:
-                source_conditions.append({"batch_source": source})
+            if len(batch_sources) == 1:
+                and_conditions.append({"batch_source": batch_sources[0]})
+            else:
+                source_conditions = []
+                for source in batch_sources:
+                    source_conditions.append({"batch_source": source})

-            # 将 Batch Source 的 OR 子句添加到主 AND 条件中
-            and_conditions.append({"$or": source_conditions})
+                # 将 Batch Source 的 OR 子句添加到主 AND 条件中
+                and_conditions.append({"$or": source_conditions})

        results = self.collection.query(
            query_embeddings=[embedding],
--- a/app/server/utils/img_operation.py
+++ b/app/server/utils/img_operation.py
@@ -77,8 +77,7 @@ def merge_images_to_square(outfit_items: List[Dict[str, str]], max_len=9, add_te
            # We use Image.open() and convert to 'RGB' to handle potential transparency (RGBA)
            # and ensure compatibility with the final 'RGB' canvas and JPG output.
            if settings.LOCAL == 1:
-                image_file_path = os.path.join(settings.LOCAL_IMAGE_DIR, path)
-                img = Image.open(image_file_path).convert('RGB')
+                img = Image.open(path).convert('RGB')
            else:
                img = oss_get_image(oss_client=minio_client, path=f"{MINIO_LC_DATA_PATH}/{path}", data_type="PIL").convert('RGB')
            # img = Image.open(path).convert('RGB')
@@ -185,142 +184,3 @@ def merge_images_to_square(outfit_items: List[Dict[str, str]], max_len=9, add_te
    # canvas.save(output_path, 'JPEG', quality=90)

    return canvas
-
-# def merge_images_to_square(outfit_items: List[Dict[str, str]], max_len=9, add_text=True):
-#     """
-#     Loads up to 4 images from the given paths, resizes them while maintaining
-#     aspect ratio, and merges them onto a 1024x1024 white background JPG.
-#
-#     The layout depends on the number of images:
-#     1: Center the single image on the 1024x1024 canvas.
-#     2: Place side-by-side, each scaled to fit a 512x1024 half.
-#     3: Place in top-left (512x512), top-right (512x512), and bottom-left (512x512).
-#     4: Place in all four 512x512 quadrants.
-#
-#     Args:
-#         outfit_items: A list of item metadata (max length 9).
-#
-#     Returns:
-#         The file path of the temporary merged JPG image.
-#     """
-#
-#     # Define the final canvas size
-#     CANVAS_SIZE = 1024
-#
-#     # 1. Create the final white canvas
-#     # Using 'RGB' mode for JPG output
-#     canvas = Image.new('RGB', (CANVAS_SIZE, CANVAS_SIZE), 'white')
-#     draw = ImageDraw.Draw(canvas)
-#     font = ImageFont.load_default()
-#
-#     # 2. Define the quadrants/target areas (x, y, w, h)
-#     # The positions are based on a 512x512 quadrant size
-#     quadrants = {
-#         1: [(0, 0, CANVAS_SIZE, CANVAS_SIZE)],  # Single full-size placement
-#         2: [(0, 0, 512, CANVAS_SIZE), (512, 0, 512, CANVAS_SIZE)],  # Left, Right
-#         3: [(0, 0, 512, 512), (512, 0, 512, 512), (0, 512, 512, 512)],  # Top-Left, Top-Right, Bottom-Left
-#         4: [(0, 0, 512, 512), (512, 0, 512, 512), (0, 512, 512, 512), (512, 512, 512, 512)],  # All Four
-#         5: ALL_9_CELLS[:5],  # 布局前5个单元格 (1-5)
-#         6: ALL_9_CELLS[:6],  # 布局前6个单元格 (1-6)
-#         7: ALL_9_CELLS[:7],  # 布局前7个单元格 (1-7)
-#         8: ALL_9_CELLS[:8],  # 布局前8个单元格 (1-8)
-#         9: ALL_9_CELLS[:9]  # 布局全部9个单元格 (1-9)
-#     }
-#
-#     # 3. Load and Filter Images
-#     valid_images = []
-#     image_paths = [item['image_path'] for item in outfit_items]
-#     for path in image_paths:
-#         try:
-#             # We use Image.open() and convert to 'RGB' to handle potential transparency (RGBA)
-#             # and ensure compatibility with the final 'RGB' canvas and JPG output.
-#             img = oss_get_image(oss_client=minio_client, path=f"{MINIO_LC_DATA_PATH}/{path}", data_type="PIL").convert('RGB')
-#             # img = Image.open(path).convert('RGB')
-#             valid_images.append(img)
-#         except Exception as e:
-#             logger.error(f"Error loading image {path}. Skipping: {e}")
-#
-#     num_images = len(valid_images)
-#
-#     if num_images == 0:
-#         raise ValueError("No valid images were loaded.")
-#
-#     if num_images > max_len:
-#         raise ValueError(f"Valid item number {num_images} exceed max limit {max_len}")
-#
-#     # Get the correct list of target areas based on the number of valid images
-#     target_areas = quadrants.get(num_images, [])
-#
-#     # 4. Resize and Paste
-#     for i, (img, item) in enumerate(zip(valid_images, outfit_items)):
-#         item_id = item['item_id']
-#         category = item['category']
-#         if i >= len(target_areas):
-#             # This should not happen if num_images <= 4
-#             break
-#
-#         # Target area dimensions (x_start, y_start, width, height)
-#         x_start, y_start, target_w, target_h = target_areas[i]
-#
-#         # Calculate new size while maintaining aspect ratio
-#         original_w, original_h = img.size
-#
-#         # Calculate the ratio needed to fit within the target area
-#         ratio_w = target_w / original_w
-#         ratio_h = target_h / original_h
-#
-#         # Use the *smaller* of the two ratios to ensure the image fits entirely
-#         resize_ratio = min(ratio_w, ratio_h)
-#
-#         # Calculate the new dimensions
-#         new_w = int(original_w * resize_ratio)
-#         new_h = int(original_h * resize_ratio)
-#
-#         # Resize the image. Image.Resampling.LANCZOS provides high-quality scaling.
-#         # Pillow documentation recommends ANTIALIAS or BICUBIC for downscaling,
-#         # but LANCZOS is a good general high-quality filter.
-#         # Note: In Pillow versions > 9.0.0, Image.LANCZOS is now Image.Resampling.LANCZOS
-#         resized_img = img.resize((new_w, new_h), Image.Resampling.LANCZOS)
-#
-#         # Calculate the paste position to center the resized image within its target area
-#         # Center X: (Target Width - New Width) / 2 + X Start
-#         paste_x = (target_w - new_w) // 2 + x_start
-#         # Center Y: (Target Height - New Height) / 2 + Y Start
-#         # paste_y = (target_h - new_h) // 2 + y_start
-#
-#         TEXT_RESERVE_HEIGHT = 30
-#         paste_y = (target_h - new_h - TEXT_RESERVE_HEIGHT) // 2 + y_start
-#         paste_y = max(paste_y, y_start)
-#
-#         # Paste the resized image onto the canvas
-#         canvas.paste(resized_img, (paste_x, paste_y))
-#
-#         full_text = f"ID: {item_id}, Category: {category}"
-#         try:
-#             # 推荐使用：计算文本的实际尺寸 (width, height)
-#             bbox = draw.textbbox((0, 0), full_text, font=font)
-#             text_w = bbox[2] - bbox[0]
-#             text_h = bbox[3] - bbox[1]
-#         except AttributeError:
-#             # 兼容旧版本 Pillow
-#             text_w, text_h = draw.textsize(full_text, font=font)
-#
-#         # 计算 X 轴起始位置：使其在目标区域 (target_w) 中居中
-#         text_x_center = x_start + target_w // 2
-#         text_x_start = text_x_center - text_w // 2
-#
-#         # 计算 Y 轴起始位置：将其放在目标区域的底部
-#         # (目标区域的起始Y + 目标区域的高度 - 文本行的高度)
-#         text_y_start = y_start + target_h - text_h - 5  # 减去 5 像素作为边距
-#
-#         # 3. 绘制合并后的文本
-#         if add_text:
-#             draw.text((text_x_start, text_y_start),
-#                       full_text,
-#                       fill='black',
-#                       font=font)
-#
-#     # Save as a high-quality JPG (quality=90 is a good balance)
-#     # canvas.save(output_path, 'JPEG', quality=90)
-#
-#     return canvas
--- a/app/taxonomy.py
+++ b/app/taxonomy.py
@@ -7,26 +7,46 @@ OCCASION = [
    "Garden Party / Daytime Event"
 ]

-CATEGORY = {
+FASHION_TAXONOMY = {
    'clothing': [
-        'coats',
-        'jackets',
-        'blazers',
-        'puffer',
-        'cardigan',
-        'sweater',
-        'shirts',
-        't-shirts',
-        'pullover',
-        'polos',
-        'bodysuits',
-        'dresses',
-        'skirts',
-        'jeans',
-        'shorts',
-        'leggings',
-        'jumpsuits',
-        'swimwear',
+        # --- Tops ---
+        't-shirts',          # T恤
+        'shirts',            # 衬衫 (泛指梭织)
+        'blouses',           # 女式衬衫
+        'polo shirts',       # Polo衫
+        'tank tops',         # 背心/坎肩
+        'camisoles',         # 吊带背心
+        # --- Knits/Sweaters ---
+        'sweaters',          # 毛衣 (泛指)
+        'cardigans',         # 开衫
+        'pullovers',         # 套头衫
+        'hoodies',           # 连帽衫
+        'sweatshirts',       # 圆领卫衣
+        'vests',             # 马甲/背心 (外穿)
+        # --- Outerwear ---
+        'coats',             # 大衣 (长款)
+        'jackets',           # 夹克 (短款)
+        'blazers',           # 西装外套
+        # --- Bottoms ---
+        'jeans',             # 牛仔裤 (虽是材质，但在时尚界视为独立大类)
+        'trousers',          # 西裤/正装长裤
+        'pants',             # 长裤 (泛指休闲)
+        'joggers',           # 束脚裤
+        'leggings',          # 打底裤/紧身裤
+        'shorts',            # 短裤
+        'skirts',            # 半身裙
+        'skorts',            # 裙裤
+        # --- One-Piece ---
+        'dresses',           # 连衣裙
+        'jumpsuits',         # 连体长裤
+        'bodysuits',         # 连体紧身衣
+        'suits',             # 套装 (西装套)
+        # --- Intimates/Swim ---
+        'bras',              # 文胸
+        'underwear',         # 内衣
+        'lingerie',          # 性感内衣
+        'pajamas',           # 睡衣套装
+        'swimwear',          # 泳装
    ],
    'shoes': [
        'sneakers',
@@ -38,22 +58,37 @@ CATEGORY = {
        'boots',
    ],
    'bags': [
-        'bags'
+        'shoulder bags',
+        'crossbody',
+        'bucket bags',
+        'tote bags',
+        'clutch bags',
+        'backpacks',
+        'travel bags',
+        'luggage',
    ],
    'accessories': [
+        # --- Jewelry & Watches ---
        'necklaces',
+        'earrings',
        'bracelets',
-        'jewellery',
-        'eyewear',
-        'scarves',
+        'rings',
+        'cufflinks',
+        'watches',
+        # --- Head/Face ---
        'hats',
-        'gloves',
+        'eyewear',
+        # --- Body/Textile ---
        'belts',
-        'socks',
-        'watches'
+        'scarves',
+        'gloves',
        'ties',
+        'bow ties',
+        'pocket squares',
+        'socks',
    ]
 }
-ALL_CATEGORY = sum(CATEGORY.values(), [])
+CATEGORY_LIST = list(FASHION_TAXONOMY.keys())
+ALL_SUBCATEGORY_LIST = sum(FASHION_TAXONOMY.values(), [])

-IGNORE_CATEGORY = ['socks']
+IGNORE_SUBCATEGORY = ['socks']
--- a/data_ingestion/README.md
+++ b/data_ingestion/README.md
@@ -40,7 +40,7 @@
 ## Example in `metadata_extraction.json`
 ```json
 "EOJ367": {
-    "category": "shoes",
+    "subcategory": "necklaces",
    "gender": "female",
    "applicable_occasions": [
        "Casual",
@@ -60,33 +60,34 @@
 ## Metadata in Vector Database
 ```json
 {
-    'item_id': 'EOJ128',
-    'category': 'sunglasses', 
-    'gender': 'unisex', 
-    'modality': 'image',
-    'brand': 'CELINE',
-    'color': 'BROWN', 
-    'description': "Immerse yourself in the depth of classic style with CELINE\'s Tortoiseshell Logo Sunglasses. Featuring a rich, tortoiseshell acetate frame and adorned with the iconic CELINE logo in gold, these sunglasses are a testament to timeless elegance and luxury. Perfect for those who appreciate a sophisticated aesthetic, they offer optimal UV protection while ensuring you remain at the forefront of fashion.",
-    'tags': 'celine,accessories,in-stock,new,maxi,triomphe,acetate,round', 
-    'price': 4500, 
-    'url': 'https://www.lanecrawford.com.hk/product/celine/maxi-triomphe-acetate-round-sunglasses/_/EOJ128/product.lc?utm_medium=embed&utm_source=ai-recommended&utm_campaign=2025-christmas_lc_ai-recommended',
-    'batch_source': '2025_q4',
-    'Outdoor': 0, 
-    'Ski / Snow / Mountain': 0, 
-    'Festival / Concert': 0, 
-    'Activewear': 0, 
-    'Casual': 1, 
-    'Cocktail / Semi-Formal': -1, 
-    'Formal': -1, 
-    'Party / Clubbing': 0, 
-    'Evening': 0, 
-    'Travel / Transit': 0, 
-    'Beach / Swim': 0, 
-    'Garden Party / Daytime Event': 1, 
-    'Black Tie / White Tie': -1, 
-    'Resort': 1, 
-    'Athleisure': 0, 
-    'Business / workwear': -1, 
-    'Bridal / Wedding': -1, 
+    "item_id": "EOJ128",
+    "category": "accessories",
+    "subcategory": "eyewear",
+    "gender": "unisex", 
+    "modality": "image",
+    "brand": "CELINE",
+    "color": "BROWN", 
+    "description": "Immerse yourself in the depth of classic style with CELINE's Tortoiseshell Logo Sunglasses. Featuring a rich, tortoiseshell acetate frame and adorned with the iconic CELINE logo in gold, these sunglasses are a testament to timeless elegance and luxury. Perfect for those who appreciate a sophisticated aesthetic, they offer optimal UV protection while ensuring you remain at the forefront of fashion.",
+    "tags": "celine,accessories,in-stock,new,maxi,triomphe,acetate,round", 
+    "price": 4500, 
+    "url": "https://www.lanecrawford.com.hk/product/celine/maxi-triomphe-acetate-round-sunglasses/_/EOJ128/product.lc?utm_medium=embed&utm_source=ai-recommended&utm_campaign=2025-christmas_lc_ai-recommended",
+    "batch_source": "2025_q4",
+    "Outdoor": 0, 
+    "Ski / Snow / Mountain": 0, 
+    "Festival / Concert": 0, 
+    "Activewear": 0, 
+    "Casual": 1, 
+    "Cocktail / Semi-Formal": -1, 
+    "Formal": -1, 
+    "Party / Clubbing": 0, 
+    "Evening": 0, 
+    "Travel / Transit": 0, 
+    "Beach / Swim": 0, 
+    "Garden Party / Daytime Event": 1, 
+    "Black Tie / White Tie": -1, 
+    "Resort": 1, 
+    "Athleisure": 0, 
+    "Business / workwear": -1, 
+    "Bridal / Wedding": -1, 
 }
 ```
--- a/data_ingestion/process_item.py
+++ b/data_ingestion/process_item.py
@@ -5,7 +5,7 @@ from PIL import Image
 import json
 from tqdm import tqdm

-from app.taxonomy import OCCASION, CATEGORY, ALL_CATEGORY
+from app.taxonomy import OCCASION, FASHION_TAXONOMY, ALL_SUBCATEGORY_LIST


 # data config
@@ -42,7 +42,7 @@ Description: Cut from cardinal-red virgin wool, Armarium's Loren skirt wields ta
 Tags: armarium, clothing, in-stock, new, loren, wool, blend, tube
 """
 EXAMPLE_1_JSON = json.dumps({
-    "category": "skirts",
+    "subcategory": "skirts",
    "gender": "female",
    "applicable_occasions": [
        "Business/workwear", "Evening", "Cocktail / Semi-Formal", "Party / Clubbing", "Formal"
@@ -61,7 +61,7 @@ Description: Crafted from 18k yellow gold and rhodium-plated sterling silver, th
 Tags: tateossian, accessories, in-stock, new, mayfair, yellow, gold, rhodium
 """
 EXAMPLE_2_JSON = json.dumps({
-    "category": "jewelry",
+    "subcategory": "jewelry",
    "gender": "female",
    "applicable_occasions": [
        "Formal", "Black Tie / White Tie", "Bridal / Wedding", "Business/workwear", "Cocktail / Semi-Formal"
@@ -94,20 +94,24 @@ def format_product_info(product):
    return info


-def generate_full_prompt(product_info, raw_category):
+def raw_category_mapping(raw_category: str) -> str:
    if raw_category == 'Fine Jewellery And Watches':
-        category = 'accessories'
+        return 'accessories'
    else:
-        category = raw_category.lower()
-    subcategory_list = CATEGORY.get(category)
+        return raw_category.lower()
+
+
+def generate_full_prompt(product_info, raw_category):
+    category = raw_category_mapping(raw_category)
+    subcategory_list = FASHION_TAXONOMY.get(category)

    SYSTEM_PROMPT = f"""You are an expert fashion AI assistant. Your task is to analyze the provided product image and product details to: 
    1. determine the suitable occasions for wearing or using the item. You must choose occasions ONLY from the following strict list: {json.dumps(OCCASION, indent=4)}. Only relevant suitable or inappropriate occasions should be selected.
-    2. categorize it into suitable category in strict list: {json.dumps(subcategory_list)}.
+    2. categorize it into suitable subcategory in strict list: {json.dumps(subcategory_list)}.
    3. categorize it into appropriate gender in ["female", "male", "unisex"]

    Output Format:
-    Return ONLY a valid JSON object with four keys: "category", "gender", "applicable_occasions" and "inappropriate_occasions". Do not include any analysis or extra text outside of the final JSON object.
+    Return ONLY a valid JSON object with four keys: "subcategory", "gender", "applicable_occasions" and "inappropriate_occasions". Do not include any analysis or extra text outside of the final JSON object.
    """

    # 组合对话序列
@@ -140,37 +144,36 @@ product_list = [
 ]


-def validate_results():
-    if os.path.exists(OUTPUT_FILE):
-        with open(OUTPUT_FILE, 'r') as f:
-            final_results = json.load(f)
-    else:
-        final_results = {}
+def validate_result(result_dict):
+    subcategory = result_dict.get("subcategory")
+    gender = result_dict.get("gender")

-    unfinished_ids = []
-    for product in product_list:
-        item_id = product.get('id')
-        if item_id not in final_results.keys():
-            unfinished_ids.append(product)
-        else:
-            processed_item = final_results[item_id]
-            category = processed_item.get("category")
-            gender = processed_item.get("gender")
+    if not subcategory or not gender:
+        return False
+    
+    if subcategory not in ALL_SUBCATEGORY_LIST:
+        return False

-            if category not in ALL_CATEGORY:
-                unfinished_ids.append(product)
+    if gender not in ['female', 'male', 'unisex']:
+        return False
+    
+    return True
+
+
+if os.path.exists(OUTPUT_FILE):
+    with open(OUTPUT_FILE, 'r') as f:
+        final_results = json.load(f)
+else:
+    final_results = {}

-            if gender not in ['female', 'male', 'unisex']:
-                unfinished_ids.append(product)
-    return unfinished_ids, final_results

 attemps = 0
 while attemps < 3:
+    unfinished_products = [product for product in product_list if product.get('id') not in final_results.keys()]
    attemps += 1
-    unfinished_products, final_results = validate_results()
-    completion_ratio = len(unfinished_products) / len(product_list)
-    if (completion_ratio > 0.95):
-        print("valid results surpass 95%. Finish Now.")
+    completion_ratio = len(final_results) / len(product_list)
+    if (completion_ratio > 0.85):
+        print("valid results surpass 85%. Finish Now.")
        break
    else:
        print(f"Start {attemps} categorization process. Current ratio: {completion_ratio * 100}%")
@@ -252,11 +255,11 @@ while attemps < 3:
                    json_str = generated_text[start_idx:end_idx]
                    result_dict = json.loads(json_str)
                    
-                    final_results[product_id] = result_dict
+                    if validate_result(result_dict):
+                        final_results[product_id] = result_dict
                    
                except Exception as e:
                    print(f"ID {product_id}: FAILED to parse JSON. Raw Output: {generated_text.strip()}")
-                    final_results[product_id] = {"error": str(e), "raw_output": generated_text.strip()}
            
            # 显存清理（可选，但在长任务中推荐）
            del inputs, outputs
--- a/data_ingestion/run_ingestion.py
+++ b/data_ingestion/run_ingestion.py
@@ -1,6 +1,3 @@
-
-
-
 import chromadb
 import os
 import json
@@ -11,7 +8,7 @@ from tqdm import tqdm
 from PIL import Image
 from transformers import CLIPProcessor, CLIPModel

-from app.taxonomy import CATEGORY, ALL_CATEGORY, OCCASION
+from app.taxonomy import ALL_SUBCATEGORY_LIST, OCCASION


 BATCH_SOURCE = '2025_q4'
@@ -20,6 +17,7 @@ IMAGE_DIR = f'./data/{BATCH_SOURCE}/image_data'

 RAW_DATA_PATH = f'{DATA_DIR}/products-all.json'
 CATEGORIZED_METADATA_PATH = f'{DATA_DIR}/metadata_extraction.json'
+ADD_TEXT_EMBEDDING = False

 ## Load data
 with open(RAW_DATA_PATH, 'r', encoding='utf-8') as file:
@@ -36,11 +34,11 @@ collection = client.get_or_create_collection(
 )

 # if you wish to delete some item, uncomment following
-# results = collection.delete(
-#     where={
-#         "batch_source": BATCH_SOURCE
-#     }
-# )
+results = collection.delete(
+    where={
+        "batch_source": BATCH_SOURCE
+    }
+)

 # Load model
 processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
@@ -63,9 +61,13 @@ def format_product_info(product):
    )
    return info

+def raw_category_mapping(raw_category: str) -> str:
+    if raw_category == 'Fine Jewellery And Watches':
+        return 'accessories'
+    else:
+        return raw_category.lower()

 # Combine all data together
-new_category = {}
 valid_count = 0
 all_count = 0
 for raw_item in tqdm(raw_data['products']):
@@ -91,18 +93,14 @@ for raw_item in tqdm(raw_data['products']):
        print(f"{item_id} has not been categorized. It does not exist in {CATEGORIZED_METADATA_PATH}")
        continue

-    category = processed_item.get("category")
+    category = raw_category_mapping(raw_category)
+    subcategory = processed_item.get("subcategory")
    gender = processed_item.get("gender")
    applicable_occasions = processed_item.get("applicable_occasions", [])
    inappropriate_occasions = processed_item.get("inappropriate_occasions", [])

-    if category not in ALL_CATEGORY:
+    if subcategory not in ALL_SUBCATEGORY_LIST:
        print(f"{item_id}'s category, {category}, does not valid.")
-        if category not in new_category:
-            new_category[category] = [item_id]
-        else:
-            new_category[category].append(item_id)
-        continue

    if gender not in ['female', 'male', 'unisex']:
        print(f"{item_id}'s gender is not valid in {['female', 'male', 'unisex']}")
@@ -129,6 +127,7 @@ for raw_item in tqdm(raw_data['products']):
    item_img_metadata = {
        "item_id": item_id,
        "category": category,
+        "subcategory": subcategory,
        "description": description,
        "gender": gender,
        'brand': raw_item.get('brand', ''),
@@ -146,10 +145,6 @@ for raw_item in tqdm(raw_data['products']):
    for occasion in inappropriate_occasions:
        item_img_metadata[occasion] = -1

-    item_txt_metadata = deepcopy(item_img_metadata)
-    item_txt_metadata["modality"] = "text"
-
-
    # Get image feature
    image = Image.open(image_path).convert("RGB")
    inputs = processor(images=image, return_tensors="pt").to(device)
@@ -158,21 +153,30 @@ for raw_item in tqdm(raw_data['products']):
        img_features = img_features / img_features.norm(p=2, dim=-1, keepdim=True)
        img_embedding = img_features.cpu().numpy().flatten().tolist()

-    # Get text feature
-    inputs = processor(text=[description], return_tensors="pt", padding=True, truncation=True).to(device)
-    with torch.no_grad():
-        txt_features = model.get_text_features(**inputs)
-        txt_features = txt_features / txt_features.norm(p=2, dim=-1, keepdim=True)
-        txt_embedding = txt_features.cpu().numpy().flatten().tolist()
-
    product_info = format_product_info(raw_item)
    # 插入到 ChromaDB
    collection.add(
-        ids=[f'{item_id}_img', f'{item_id}_txt'],
-        documents=[product_info, product_info],
-        embeddings=[img_embedding, txt_embedding],
-        metadatas=[item_img_metadata, item_txt_metadata],
+        ids=[f'{item_id}_img'],
+        documents=[product_info],
+        embeddings=[img_embedding],
+        metadatas=[item_img_metadata],
    )

-print(f"Final valid ratio is {valid_count / all_count * 100}%. Total number is {all_count}, Valid number is {valid_count}")
-print(f'Found new category for consideration: {new_category}')
+    if ADD_TEXT_EMBEDDING:
+        item_txt_metadata = deepcopy(item_img_metadata)
+        item_txt_metadata["modality"] = "text"
+
+        # Get text feature
+        inputs = processor(text=[description], return_tensors="pt", padding=True, truncation=True).to(device)
+        with torch.no_grad():
+            txt_features = model.get_text_features(**inputs)
+            txt_features = txt_features / txt_features.norm(p=2, dim=-1, keepdim=True)
+            txt_embedding = txt_features.cpu().numpy().flatten().tolist()
+            collection.add(
+                ids=[f'{item_id}_txt'],
+                documents=[product_info],
+                embeddings=[txt_embedding],
+                metadatas=[item_txt_metadata],
+            )
+
+print(f"Final valid ratio is {valid_count / all_count * 100}%. Total number is {all_count}, Valid number is {valid_count}")