reconstruct whole recommendation pipeline and add new rec mode one-ask-for-all

This commit is contained in:
pangkaicheng
2025-12-12 17:37:07 +08:00
parent 0e9546aa1a
commit 85390d5e6d
12 changed files with 684 additions and 565 deletions

View File

@@ -31,7 +31,6 @@ class Settings(BaseSettings):
# 路径配置参数
DATA_ROOT: str = Field(default="/workspace/lc_stylist_agent/data", description="数据根目录")
LOCAL_IMAGE_DIR: str = Field(default="/workspace/lc_stylist_agent/Data/image_data", description="图片数据目录")
OUTFIT_OUTPUT_DIR: str = Field(default="/workspace/lc_stylist_agent/data/outfit_output", description="生成的搭配图片输出目录")
STYLIST_GUIDE_DIR: str = Field(default="/workspace/lc_stylist_agent/data/stylist_guide", description="风格指南文本目录")

View File

@@ -4,6 +4,7 @@ import uuid
from enum import Enum
from typing import List
from pydantic import Field
import time
import litserve as ls
from pydantic import BaseModel
@@ -12,7 +13,7 @@ from app.server.ChatbotAgent.core.data_structure import Message, Role
from app.server.ChatbotAgent.core.llm_interface import AsyncGeminiLLM
from app.server.ChatbotAgent.core.redis_manager import RedisManager
from app.server.ChatbotAgent.core.stylist_agent_server import AsyncStylistAgent
from app.server.ChatbotAgent.core.system_prompt import SUMMARY_PROMPT
from app.server.ChatbotAgent.core.prompt import SUMMARY_PROMPT
from app.server.ChatbotAgent.core.vector_database import VectorDatabase
logger = logging.getLogger(__name__)
@@ -54,7 +55,6 @@ class AgentRequestModel(BaseModel):
batch_sources: List[str]
callback_url: str
gender: str
max_len: int = 9
class LCAgent(ls.LitAPI):
@@ -118,7 +118,6 @@ class LCAgent(ls.LitAPI):
user_id=request.user_id,
gender=request.gender,
callback_url=request.callback_url,
max_len=request.max_len,
outfit_ids=outfit_ids
)
logger.info("--- Final Recommendation Results ---")
@@ -162,9 +161,17 @@ class LCAgent(ls.LitAPI):
return str(parsed_result.summary), [occ.value for occ in parsed_result.occasions]
async def recommend_outfit(
self, request_summary: str, occasions: List[str], stylist_name: str, start_outfit: List = [], batch_sources: List[str] = [],
num_outfits: int = 1, user_id: str = "test", gender: str = "male",
callback_url: str = None, max_len: int = 9, outfit_ids=None
self,
request_summary: str,
occasions: List[str],
stylist_name: str,
start_outfit: List = [],
batch_sources: List[str] = [],
num_outfits: int = 1,
user_id: str = "test",
gender: str = "male",
callback_url: str = None,
outfit_ids=None
):
"""
基于用户的对话历史和需求,推荐一套搭配。
@@ -181,17 +188,16 @@ class LCAgent(ls.LitAPI):
stylist_agent_kwages = self.stylist_agent_kwages.copy()
for i in range(num_outfits):
stylist_agent_kwages['outfit_id'] = outfit_ids[i]
stylist_agent_kwages['max_len'] = max_len
stylist_agent_kwages['stylist_name'] = stylist_name
stylist_agent_kwages['gender'] = gender
agent = AsyncStylistAgent(**stylist_agent_kwages)
task = agent.run_styling_process(
task = agent.run_iterative_styling(
request_summary=request_summary,
occasions=occasions,
stylist_name=stylist_name,
start_outfit=start_outfit,
batch_sources=batch_sources,
user_id=user_id,
callback_url=callback_url,
gender=gender,
)
tasks.append(task)
task_map[task] = {"outfit_id": outfit_ids[i], "retries": 0}
@@ -223,11 +229,12 @@ class LCAgent(ls.LitAPI):
# 重新创建任务 (可能需要短暂延迟,例如 time.sleep(1),但在此异步环境中,我们会通过重新创建 agent/task 来实现)
stylist_agent_kwages['outfit_id'] = outfit_id
stylist_agent_kwages['stylist_name'] = stylist_name
stylist_agent_kwages['gender'] = gender
agent = AsyncStylistAgent(**stylist_agent_kwages)
new_task = agent.run_styling_process(
new_task = agent.run_iterative_styling(
request_summary=request_summary,
occasions=occasions,
stylist_name=stylist_name,
start_outfit=start_outfit,
batch_sources=batch_sources,
user_id=user_id,
@@ -284,31 +291,45 @@ if __name__ == "__main__":
with open("./data/2025_q4/request_test.json", "r") as f:
request_data = json.load(f)
tasks = []
for test_content in request_data[:30]:
tasks_with_metadata = []
for test_content in request_data[20:25]:
occasions = test_content['occasions']
request_summary = test_content['request_summary']
stylist_agent_kwages['max_len'] = 5
for stylist_name in ["edi", "vera"]:
stylist_agent_kwages['outfit_id'] = test_content['test_case_id'] + "_" + "_".join(occasions) + f"_{stylist_name}"
stylist_agent_kwages['stylist_name'] = stylist_name
stylist_agent_kwages['gender'] = "female"
agent = AsyncStylistAgent(**stylist_agent_kwages)
task = agent.run_styling_process(
coro = agent.run_iterative_styling(
# coro = agent.run_quick_batch_styling(
request_summary=request_summary,
occasions=occasions,
stylist_name=stylist_name,
start_outfit=[],
batch_sources=["2025_q4"],
user_id=test_content['test_case_id'],
callback_url="http://mock-callback.com/result",
gender="female",
)
tasks.append(task)
results = await asyncio.gather(*tasks, return_exceptions=True)
for result in results:
# 记录任务开始前的单调时间,并将元数据添加到列表中
description = f"Batch mode - Case {test_content['test_case_id']} - Stylist {stylist_name}"
tasks_with_metadata.append((coro, description))
tasks_only = [coro for coro, _ in tasks_with_metadata]
print(f"--- Launching {len(tasks_only)} concurrent styling tasks... ---")
results = await asyncio.gather(*tasks_only, return_exceptions=True)
time_samples = []
for i, result in enumerate(results):
coro, description = tasks_with_metadata[i]
if isinstance(result, Exception):
print(f"❌ 任务失败: {type(result).__name__} - {str(result)}")
continue
else:
response, duration = result
time_samples.append(duration)
print(f"✅ 任务成功 ({description}) [Time: {duration:.2f}s].")
print(f"Average time consumption is {sum(time_samples) / len(time_samples)}")
try:
# 使用 asyncio.run() 来执行顶层异步函数

View File

@@ -8,10 +8,8 @@ from app.config import settings
from google.genai import types
from app.server.ChatbotAgent.core.data_structure import Message, Role
from app.server.ChatbotAgent.core.llm_interface import AsyncGeminiLLM
from app.server.ChatbotAgent.core.redis_manager import RedisManager
from app.server.ChatbotAgent.core.system_prompt import BASIC_PROMPT
from app.server.ChatbotAgent.core.vector_database import VectorDatabase
from app.server.ChatbotAgent.core.prompt import BASIC_PROMPT
logger = logging.getLogger(__name__)

View File

@@ -0,0 +1,219 @@
BASIC_PROMPT = """You are a professional, friendly, and insightful AI {gender}'s styling assistant.
Your primary mission is to engage in a multi-turn conversation with the user to fully understand their dressing intent. You must adopt a professional yet approachable tone.
CONVERSATION GOALS:
1. **Occasion:** Determine the specific event (e.g., romantic dinner, summer wedding, business meeting).
2. **Style:** Pinpoint the desired aesthetic (e.g., classic elegance, edgy, minimalist, bohemian).
3. **Vibe/Details:** Gather any mood or specific constraints (e.g., needs to be comfortable, requires light colors, no bare shoulders).
4. **Item Preference:** Ask the user if they have any specific preferences for an item type or silhouette (e.g., preference for a dress, skirt, tailored pants, or a particular neckline/length).
GUIDANCE FOR RESPONSE GENERATION:
- After the user's initial request (e.g., "I want a chic outfit for dinner."), immediately reply with a friendly, targeted follow-up question to elicit the most crucial missing information (usually a combination of **Occasion** and **Style**).
- Be concise. Ask only 1 to 2 essential questions per turn.
- You must gather sufficient, clear intent before proceeding to actual clothing recommendations.
OUTPUT FORMAT INSTRUCTION:
- **DO NOT** use any Markdown formatting whatsoever (e.g., do not use asterisks (*), bold text (**), lists, or code blocks).
- **ONLY** output the plain text response spoken by the AI Assistant.
Example Follow-up (mimicking a conversational flow):
User: I want a chic outfit for dinner.
Your Response: Hey there! A chic dinner outfit, I love that! To give you the perfect recommendations, tell me: is this a romantic date, business dinner, or celebration with friends? And what's your go-to style vibe: classic elegance or something with more edge?"""
SUMMARY_PROMPT = """
You are an expert fashion request analyzer. Analyze the conversation history provided by the user.
Your task is to:
1. Identify the most appropriate occasions from the allowed list based on the user's intent.
2. Write a detailed summary string that captures the user's style preferences, specific item requests, disliked items, body concerns, and color preferences. This summary will be used by a stylist to recommend outfits.
Extract this information accurately from the chat history.
"""
from app.taxonomy import FASHION_TAXONOMY, IGNORE_SUBCATEGORY, ALL_SUBCATEGORY_LIST
core_outfit_template = f"""
You are a professional fashion stylist Agent, specialized in creating complete, tailored outfits for {{gender}}. Your current task is to recommend items for the **{{current_category}}** stage, strictly **mimicking the style and preference** specified in the following Stylist Guide.
Your task is to **create a cohesive and complete outfit**, strictly adhering to **BOTH** the user's explicit **Request Summary** and the **Outfit Style Guide**. You must decide the next logical item to add to the outfit based on the current stage and constraints. Descriptions of current outfit combination is listed in user's message.
---
## Request from the User:
{{request_summary}}
## Core Guidance Document: Outfit Style Guide
{{stylist_guide}}
---
## Your Workflow and Constraints
1. **Style Adherence**: You must strictly observe all rules in the Style Guide concerning **color palette, fit, layering principles, pattern restrictions , shoe coordination**.
2. **Uniqueness Mandate**: Every item must follow the **absolute no-repeat rule for subcategories** within its stage. Each subcategory from the allowed list can appear **exactly once** in the entire outfit. Furthermore, the categories 'dresses' and 'pants' and 'skirts' are mutually exclusive; they NORMALLY cannot be included in the same outfit.
3. **Step Planning**: The styling sequence must follow a logical approach (e.g., top-down, inside-out for clothing). Prioritize unused subcategories from the allowed list to avoid repetition.
4. **Structured Output**: Your output MUST be a valid JSON object. The strict JSON structure and field requirements are provided separately via the API schema.
You must only output one of two actions: "recommend_item" or "stop".
4.1. **recommend_item**: Use this action to suggest the next single item.
* **subcategory**: Must be strictly no repeats, and drawn from the allowed list.
* **description**: This must be an **extremely detailed and precise** description for the vector search. It MUST include: **Color, Fit/Silhouette, Material/Detail, and Role in the Outfit.**
You must strictly use the **JSON format** for your output, as follows:
```json
{{{{
"action": "recommend_item",
"subcategory": "YOUR_ITEM_SUBCATEGORY",
"description": "YOUR_DETAILED_DESCRIPTION",
"reason": "YOUR_RECOMMENDATION_REASON"
}}}}
4.2. **stop**: Use this action when the Termination Condition is met.
* **reason**: This field is mandatory when stopping, and must clearly state why the outfit is complete.
You must strictly use the **JSON format** for your output, as follows:
{{{{
"action": "stop",
"subcategory": "",
"description": "",
"reason": "CORE_OUTFIT_COMPLETE"
}}}}
5. **Termination Condition**: Terminate when the below condition is fully met
5.1. **CLOTHING Stage**: The core clothing part of the outfit is complete, meaning the combination of items effectively achieves **full body coverage** (e.g., includes both a top/upper garment and a bottom/lower garment, or a single full-body piece like a dress/jumpsuit). Additionally, **all mandatory elements** stipulated in the Style Guide are satisfied. *(Note: Typically, {{max_len}} items are sufficient for this stage.)*
5.2. **SHOES Stage**: **Exactly one (1) item** has been successfully recommended, as shoes are a **mandatory component** for any complete outfit.
5.3. **BAGS Stage**: **Exactly one (1) item** has been successfully recommended, **OR** the recommendation is skipped if the Style Guide or the User Request **does not mandate** a bag for the specific occasion (i.e., the bag is considered optional).
6. **Context Dependency**: The user's next input (if not Start) will contain the **image and description of the selected item**. When recommending the next item:
a) First verify the subcategories of all already selected items to ensure no duplicates;
b) Select an unused subcategory from the allowed list as the priority;
c) Ensure the recommended item coordinates with the already selected items and complies with all rules in the Style Guide.
Now, please start building an outfit (with strictly unique categories for all items) and output the JSON for the first item.
"""
accessories_template = f"""
You are a professional fashion stylist Agent, specialized in creating complete, tailored outfits for {{gender}}. Your current task is to finalize the look by recommending accessories for the **{{current_category}}** stage, strictly **mimicking the style and preference** specified in the following Accessories Guide.
Your final task is to **select the perfect set of accessories** to complete the given outfit. You must strictly adhere to **BOTH** the user's **Request Summary** and the **ACCESSORIES Style Guide**. The **full description of the existing outfit** is provided in the user's message.
---
## CONTEXT
[User Request]: {{request_summary}}
[Accessories Style Guide]:
{{stylist_guide}}
---
## ACCESSORIES GENERATION RULES
1. **Batch Recommendation**: You must output the **COMPLETE LIST of accessories** in a single response using the 'recommended_accessories' list defined in the schema. Do not recommend items one by one.
2. **Quantity Constraint**: The total number of accessories recommended in the list must not exceed **{{max_len}}** items. Typically, 1 to {{max_len}} distinct items are required to complete a look.
3. **Harmony & Guide Compliance**:
- Assess the existing outfit (provided in the user's message) and ensure all accessories complement its style, color palette, and occasion.
- **Strictly follow the [Accessories Style Guide]** regarding material types (e.g., metals like gold/silver), total numbers allowed, and specific layering requirements (e.g., mandated watch or jewelry layering).
4. **Exclusion List**: Subcategories in the following list are strictly excluded from recommendation: ({IGNORE_SUBCATEGORY}).
5. **Description Quality**: The 'description' field for each accessory must be **extremely detailed and precise** for high-accuracy vector search, including: **Color, Material/Detail, and the specific Role in the Outfit.**
Generate the final, complete accessories list now.
"""
all_items_template = f"""
You are a professional fashion stylist Agent, specialized in creating complete, tailored outfits for {{gender}}. Your task is to **generate a Complete, Head-to-Toe Outfit** in a **Single Batch**, strictly **mimicking the style and preference** specified in the Stylist Guide.
You must create a cohesive look that includes **Clothing, Shoes, Bags, and Accessories**. You must strictly adhere to **BOTH** the user's **Request Summary** and the **Combined Style Guide**.
---
## Request from the User:
{{request_summary}}
## Core Guidance Document: Combined Style Guide
{{stylist_guide}}
---
## GENERATION WORKFLOW & RULES
1. **Holistic Styling**: You are NOT recommending items step-by-step. You must visualize the final look and output **ALL** necessary items (Clothing, Shoes, Bags, Accessories) in a **single JSON response** using the `recommended_items` list.
2. **Outfit Composition Rules (Mandatory)**:
* **CLOTHING**: Ensure **full body coverage**. You must include either [Top + Bottom] OR [One-piece (e.g., Dress/Jumpsuit)]. 'Dresses' and 'Skirts/Pants' are mutually exclusive.
* **SHOES**: **Exactly one (1) pair** of shoes is MANDATORY.
* **BAGS**: Recommend **0 or 1 bag**. Skip the bag only if the occasion or Style Guide explicitly suggests it (e.g., home wear, yoga).
* **ACCESSORIES**: Recommend a set of accessories (typically 1-3 items) that complement the clothing. Follow metal/material constraints in the guide.
Number of items in outfit must not exceed {{max_len}}.
3. **Uniqueness Mandate**:
* Each **subcategory** belonging to CLOTHING (e.g., 't-shirts', 'sweaters', 'jacket') can appear **EXACTLY ONCE** in the entire list.
* But **subcategory** belonging to ACCESSORIES can repeat.
4. **Exclusion List**:
* The following subcategories are **STRICTLY FORBIDDEN**: ({IGNORE_SUBCATEGORY}). Do not include them in your recommendation.
5. **Style Adherence**:
* Ensure all items coordinate in **color, fit, and material**.
* Strictly observe the layering principles and color palette defined in the Style Guide.
6. **Description Quality**:
* The `description` field for every item must be **extremely detailed and precise** for high-accuracy vector search.
* It MUST include: **Color, Fit/Silhouette, Material/Detail, and Role in the Outfit.**
## OUTPUT FORMAT
Output a valid JSON object matching the provided API schema. The `recommended_items` array must contain all the items for this outfit.
Generate the complete outfit list now.
"""
def build_iterative_schema(current_category):
schema = {
"type": "object",
"properties": {
"action": {"type": "string", "enum": ["recommend_item", "stop"]},
"subcategory": {
"type": "string",
"description": "The subcategory this single item. Only present if action is 'recommend_item'",
"enum": FASHION_TAXONOMY[current_category]
},
"description": {
"type": "string",
"description": "an **extremely detailed and precise** description of the item. This description is used for **high-accuracy vector search** in the database. It should include Color, Fit/Silhouette, Material/Detail, Role in the Outfit."
},
"reason": {"type": "string", "description": "The reason for the current action. Required if action is 'stop' (to summarize the final outfit)."}
},
"required": ["action", "subcategory", "description", "reason"]
}
return schema
def build_batch_schema(specified_category: str=""):
assert(specified_category in FASHION_TAXONOMY.keys() or specified_category == "")
if not specified_category:
category_range_desc = "the complete final outfit (including all categories)"
subcategory_list = ALL_SUBCATEGORY_LIST
else:
category_range_desc = specified_category
subcategory_list = FASHION_TAXONOMY[specified_category]
schema = {
"type": "object",
"properties": {
"reason": {
"type": "string",
"description": f"The justification for the selection of {category_range_desc}. This summary must explain how the recommended items meet the user's request and style requirements."
},
"recommended_items": {
"type": "array",
"description": "A list of descriptions of recommended items.",
"items": {
"type": "object",
"properties": {
"description": {"type": "string", "description": f"The detailed description for this {specified_category} item."},
"subcategory": {
"type": "string",
"description": "The subcategory of the recommended item.",
"enum": subcategory_list
},
},
"required": ["subcategory", "description"]
}
}
},
"required": ["recommended_items", "reason"]
}
return schema

View File

@@ -1,12 +1,10 @@
import asyncio
import io
import json
import logging
import os
import random
import uuid
from typing import List, Dict, Any, Optional
from copy import deepcopy
import time
from google import genai
from google.cloud import storage
@@ -16,25 +14,28 @@ from app.server.utils.img_operation import merge_images_to_square
from app.server.utils.minio_client import minio_client, oss_upload_image
from app.server.utils.request_post import post_request
from app.config import settings
from app.taxonomy import CATEGORY, ALL_CATEGORY, IGNORE_CATEGORY
from app.server.ChatbotAgent.core.prompt import (
core_outfit_template,
accessories_template,
all_items_template,
build_iterative_schema,
build_batch_schema
)
from app.taxonomy import FASHION_TAXONOMY, ALL_SUBCATEGORY_LIST
logger = logging.getLogger(__name__)
IGNORE_CATEGORY = set(IGNORE_CATEGORY)
CLOTHING_CATEGORY = set(CATEGORY['clothing'] + CATEGORY['shoes'] + CATEGORY['bags']) - IGNORE_CATEGORY
ACCESSORY_CATEGORY = set(CATEGORY['accessories']) - IGNORE_CATEGORY
class AsyncStylistAgent:
def __init__(self, local_db, max_len: int, gemini_model_name: str, outfit_id=str):
def __init__(self, local_db: str, gemini_model_name: str, outfit_id: str, stylist_name: str, gender: str):
# self.outfit_items: List[Dict[str, str]] = []
self.outfit_id = outfit_id
self.stylist_name = stylist_name
self.gender = gender
self.gemini_client = genai.Client(
vertexai=True, project='aida-461108', location='us-central1'
)
self.local_db = local_db
self.max_len = max_len
self.gemini_model_name = gemini_model_name
self.stop_reason = ""
self.headers = {
@@ -44,49 +45,6 @@ class AsyncStylistAgent:
'Connection': "keep-alive",
'Content-Type': "application/json"
}
self.main_clothing_schema = {
"type": "object",
"properties": {
"action": {"type": "string", "enum": ["recommend_item", "stop"]},
"category": {
"type": "string",
"description": "The category of the single clothing item being recommended in this step (e.g., 'outerwear', 'bottoms'). Only present if action is 'recommend_item'.",
"enum": CLOTHING_CATEGORY
},
"description": {
"type": "string",
"description": "an **extremely detailed and precise** description of the item. This description is used for **high-accuracy vector search** in the database. It should include Color, Fit/Silhouette, Material/Detail, Role in the Outfit."
},
"reason": {"type": "string", "description": "The reason for the current action. Required if action is 'stop' (to summarize the final outfit)."}
},
"required": ["action"]
}
self.accessory_schema = {
"type": "object",
"properties": {
"reason": {
"type": "string",
"description": "The justification for completing the recommendation and the summary of the final outfit."
},
"recommended_accessories": {
"type": "array",
"description": "A list of accessories recommended to complete the outfit.",
"items": {
"type": "object",
"properties": {
"category": {
"type": "string",
"description": "The category of the accessory (e.g., jewelry, watches, bags).",
"enum": ACCESSORY_CATEGORY
},
"description": {"type": "string", "description": "The detailed description for this accessory item."}
},
"required": ["category", "description"]
}
}
},
"required": ["recommended_accessories", "reason"]
}
# 存储桶配置
try:
@@ -115,100 +73,6 @@ class AsyncStylistAgent:
except Exception as e:
raise Exception(f"Failed to load style guide from {guide_path}, {acc_guide_path}: {e}")
def _build_main_clothing_prompt(self, request_summary: str = "", gender: str = "male", stylist_guide: str = "") -> str:
"""Constructs the complete System Prompt."""
clothing_gender = "men's clothing" if gender == "male" else "women's clothing"
# Insert the style_guide content into the template
template = template = f"""
You are a professional fashion stylist Agent, specialized in creating complete, tailored outfits for {clothing_gender}. Only main clothing including 'bags' is needed, excluding accessories like 'jewelry', 'hats', 'belts', etc.
Your task is to **create a cohesive and complete outfit**, strictly adhering to **BOTH** the user's explicit **Request Summary** and the **Outfit Style Guide**. You must decide the next logical item to add to the outfit based on the currently selected items (if any).
---
## Request from the User:
{request_summary}
## Core Guidance Document: Outfit Style Guide
{stylist_guide}
---
## Your Workflow and Constraints
1. **Style Adherence**: You must strictly observe all rules in the Style Guide concerning **color palette, fit, layering principles, pattern restrictions , shoe coordination**.
2. **Category Uniqueness Mandate**: Every outfit must follow the **absolute no-repeat rule for clothing categories** — each category from the allowed list can appear **exactly once** in the entire outfit. This rule is non-negotiable, even if the user explicitly requests repeating a category. Furthermore, the categories 'dresses' and 'pants' and 'skirts' are mutually exclusive; they NORMALLY cannot be included in the same outfit.
3. **Step Planning**: The styling sequence must follow a **top-down, inside-out** approach: First major garments (tops/outerwear/bottoms/dresses) then shoes. When selecting the next item, prioritize unused categories from the allowed list to avoid repetition.
4. **Structured Output**: Every response must recommend the **next single item** (from an unused category). You must strictly use the **JSON format** for your output, as follows:
```json
{{
"action": "recommend_item",
"category": "YOUR_ITEM_CATEGORY",
"description": "YOUR_DETAILED_DESCRIPTION"
}}
```
* `action`: Must always be `"recommend_item"` until the outfit is complete.
* `category`: Must be an unused category from the following list: {list(CLOTHING_CATEGORY)} (strictly no repeats, per the Category Uniqueness Mandate).
* `description`: This must be an **extremely detailed and precise** description of the item. This description is used for **high-accuracy vector search** in the database and must include:
* **Color** (e.g., milk tea, pure white, dark gray)
* **Fit/Silhouette** (e.g., Oversize, loose, slim-fit)
* **Material/Detail** (e.g., 100% cotton, linen, gold clasp, thin stripe, checkered pattern)
* **Role in the Outfit** (e.g., serves as the innermost base layer for layering; acts as the crucial tie accent for the smart casual look)
5. **Termination Condition**: Terminate when **both** conditions are fully met
a) The entire outfit is complete and all mandatory elements stipulated in the Style Guide are satisfied;
b) No duplicate categories are present (strict compliance with the Category Uniqueness Mandate).
When terminating, output the following JSON format:
```json
{{
"action": "stop",
"reason": "OUTFIT_COMPLETE_AND_MEETS_ALL_MINI_GUIDELINES"
}}
```
Normally, {self.max_len} items are totally enough for an outfit.
6. **Context Dependency**: The user's next input (if not Start) will contain the **image and description of the selected item**. When recommending the next item:
a) First verify the categories of all already selected items to ensure no duplicates;
b) Select an unused category from the allowed list as the priority;
c) Ensure the recommended item coordinates with the already selected items and complies with all rules in the Style Guide.
Now, please start building an outfit (with strictly unique categories for all items) and output the JSON for the first item.
"""
return template.strip()
def _build_accessory_prompt(self, request_summary: str, gender: str, accessories_guide: str) -> str:
"""
构建配饰推荐 (Accessories) 的 System Prompt。
特点:强调基于现有穿搭 (Context Aware),批量推荐 (Batch Recommendation),做最后的点缀。
"""
clothing_gender = "men's clothing" if gender == "male" else "women's clothing"
template = f"""
You are an expert Accessories Stylist for {clothing_gender}.
Your task is to select the perfect set of accessories to complete an existing outfit.
---
## CONTEXT
[User Request]: {request_summary}
[Accessories Style Guide]:
{accessories_guide}
---
## STRICT RULES
1. **Batch Recommendation**: Do NOT recommend items one by one. You must output the **COMPLETE LIST** of accessories (e.g., jewelry, bag, watch, hat) in a single response using the 'recommended_accessories' list.
2. **Allowed Categories**: Select only from: {list(ACCESSORY_CATEGORY)}.
3. **Harmony & Constraints**:
- The accessories must complement the [Current Outfit Base].
- Strictly follow the [Accessories Style Guide] regarding metals (gold/silver), numbers, and prohibited items.
- If the guide mandates a watch or specific jewelry layering, ensure they are included.
4. **Quantity**: Typically recommend 2-4 distinct accessory items to complete the look.
Generate the final accessories list now.
"""
return template.strip()
async def _call_gemini(self, user_input: str, user_id: str, file_name: str, output_schema: Dict[str, Any], image_bytes: bytes = None, system_prompt: str = "") -> str:
"""
实际调用 Gemini API 的函数接受文本和用户的id。
@@ -300,7 +164,7 @@ class AsyncStylistAgent:
print(f"Raw response: {response_text}")
return None
def _get_next_item(self, item_description: str, category: str, occasions: List[str], batch_sources: List[str] = [], gender: str = "female") -> Optional[Dict[str, str]]:
def _get_next_item(self, item_description: str, category: str, subcategory: str, occasions: List[str], batch_sources: List[str] = [], gender: str = "female") -> Optional[Dict[str, str]]:
"""
1. 根据描述生成嵌入。
2. 查询本地数据库以找到最佳匹配项。
@@ -330,165 +194,311 @@ class AsyncStylistAgent:
# 3. 模拟 Agent 审核(实际应用中,你需要将图片发回给 Agent进行审核)
best_meta = results[0] # 第一个 batch 的第一个 metadata
item_id = best_meta['item_id'].replace("_img", "")
batch_source = best_meta['batch_source']
return {
"item_id": item_id, # 从 metadata 字典中安全获取
"category": best_meta['category'],
"gpt_description": item_description,
'description': best_meta['description'],
"subcategory": best_meta['subcategory'],
"gpt_description": item_description,
"gpt_subcategory": subcategory,
# 假设 'item_path' 存储在 metadata 中,或从 'item_id' 推导
# 这里假设 item_id 就是文件名的一部分
"image_path": os.path.join(f"{item_id}.jpg")
"image_path": os.path.join(settings.DATA_ROOT, batch_source, 'image_data', f"{item_id}.jpg")
}
def _build_system_prompt(self, template: str, request_summary: str = "", stylist_guide: str = "", current_category: str = "clothing", max_len: int=4) -> str:
# Insert the style_guide content into the template
sys_template = template.format(
gender=self.gender,
current_category=current_category.upper(),
request_summary=request_summary,
stylist_guide=stylist_guide,
max_len=max_len
)
return sys_template.strip()
def _build_user_input(self, recommend_acc=False) -> str:
def _build_user_input(self, current_category: str, existing_subcategories: str) -> str:
"""构建发送给 Gemini 的用户输入,包含已选单品信息。"""
if not self.outfit_items:
return "Start"
# 将已选单品的信息作为上下文发回给 Agent
context = "Selected fashion items:\n"
for ii, item in enumerate(self.outfit_items):
context += f"{ii + 1}. Category: {item['category']}. Description: {item['description']}\n"
if not recommend_acc:
context += "\nPlease recommend the next single item based on the selected items, user's request, and style guide."
context = ""
else:
context += "\nPlease recommend a complete list of accessories to complement the selected outfit based on the user's request and accessories style guide."
context = "Selected fashion items:\n"
# 将已选单品的信息作为上下文发回给 Agent
for ii, item in enumerate(self.outfit_items):
context += f"{ii + 1}. Category: {item['category']}. Subcategory: {item['subcategory']}. Description: {item['description']}\n"
if current_category == 'clothing':
context += f"\nRecommend the next single item based on the selected items, user's request, and style guide. 【CRITICAL CONSTRAINT】You MUST strictly **maintain uniqueness**; do not recommend any clothing whose **Subcategory** is already present in this exclusion list: {existing_subcategories}."
elif current_category in ['shoes', 'bags']:
context += f"\nRecommend the next {current_category} based on the selected items, user's request, and style guide."
elif current_category == 'accessories':
context += f"\nRecommend a complete list of accessories to complement the selected outfit based on the user's request and accessories style guide. 【CRITICAL CONSTRAINT】You MUST strictly **maintain uniqueness**; do not recommend any accessories whose **Subcategory** is already present in this exclusion list: {existing_subcategories}."
elif current_category == 'all':
context += "\nRecommend a **complete, full outfit**, including all items (clothing, shoes, bags, and accessories), strictly following the Request Summary and Style Guide. Output the **complete list** of items in a single JSON response."
return context
def post_operation(self, response_data: Dict[str, Any], status: str, message: str, callback_url: str):
def post_operation(self, status: str, message: str, callback_url: str, img_path: str):
"""处理完成后的回调操作。"""
if settings.LOCAL == 0:
response_data['items'] = deepcopy(self.outfit_items)
response_data['status'] = status
response_data['message'] = message
response_data = {
'items': deepcopy(self.outfit_items),
'status': status,
'message': message,
'path': img_path,
'outfit_id': self.outfit_id
}
response = post_request(url=callback_url, data=json.dumps(response_data), headers=self.headers)
logger.info(f"request data {response_data} | JAVA callback info -> status:{response.status_code} | message:{response.text}")
return response_data
else:
return {}
async def run_styling_process(self, request_summary, occasions, stylist_name, start_outfit=[], batch_sources=[], user_id="test", callback_url="", gender: str = "male"):
self.outfit_items = start_outfit
"""主流程控制循环。"""
print(f"--- Starting Agent (Outfit ID: {self.outfit_id}) ---")
stylist_guide, accessories_guide = self._load_style_guide(stylist_name)
system_prompt = self._build_main_clothing_prompt(request_summary, gender, stylist_guide)
response_data = {
"status": "",
"message": "",
"path": "",
"outfit_id": self.outfit_id,
"items": []
}
logger.info(response_data)
url = f'{callback_url}/api/style/callback'
file_name = self.outfit_id
async def _execute_iterative_recommendation(
self,
current_category: str,
system_prompt: str,
schema: Dict,
max_len: int,
occasions: List[str],
batch_sources: List[str],
user_id: str,
url: str
):
recommend_timestep = 0
gemini_data = {'action': 'start'}
while recommend_timestep < self.max_len and gemini_data.get('action') != 'stop':
existing_subcategories = []
while recommend_timestep < max_len and gemini_data.get('action') != 'stop':
recommend_timestep += 1
# 1. 准备用户输入(上下文)
user_input = self._build_user_input()
user_input = self._build_user_input(current_category, ", ".join(existing_subcategories))
# 2. 把图片组装起来供api调用
response_data['path'], image_bytes = await self._merge_images(file_name, user_id, stylist_name)
merged_image_path, image_bytes = await self._merge_images(self.outfit_id, user_id, self.stylist_name)
# 3. 调用 Gemini Agent
gemini_response_text = await self._call_gemini(user_input, user_id, file_name, self.main_clothing_schema, image_bytes, system_prompt)
gemini_response_text = await self._call_gemini(
user_input,
user_id,
self.outfit_id,
schema,
image_bytes,
system_prompt
)
gemini_data = self._parse_gemini_response(gemini_response_text)
if not gemini_data:
print("Agent 返回无效响应,终止流程。")
self.post_operation(
response_data,
status="failed",
message="Agent returned invalid response, terminating process.",
callback_url=url
callback_url=url,
img_path=merged_image_path,
)
break
# 处理推荐单品
if gemini_data.get('action') == 'recommend_item':
category = gemini_data.get('category')
subcategory = gemini_data.get('subcategory')
description = gemini_data.get('description')
# 4a. 检查类别是否有效 (重要步骤)
if category not in CLOTHING_CATEGORY:
if subcategory not in FASHION_TAXONOMY[current_category]:
self.post_operation(
response_data,
status="continue",
message=f"Invalid category recommended by Agent: {category}. Requesting Agent to re-output.",
callback_url=url
message=f"Invalid subcategory recommended by Agent: {subcategory}. Requesting Agent to re-output.",
callback_url=url,
img_path=merged_image_path,
)
continue
# 4b. 在本地 DB 中查询单品
new_item = self._get_next_item(description, category, occasions, batch_sources, gender)
if not new_item or new_item['item_id'] in [x['item_id'] for x in self.outfit_items]:
new_item = self._get_next_item(description, current_category, subcategory, occasions, batch_sources, self.gender)
if not new_item:
self.post_operation(
response_data,
status="continue",
message=f"No matching item is found or item duplicated. Ask Gemini to re-output.",
callback_url=url
message=f"No matching item is found. Ask Gemini to re-output.",
callback_url=url,
img_path=merged_image_path,
)
continue
elif new_item['subcategory'] in [x['subcategory'] for x in self.outfit_items]:
self.post_operation(
status="continue",
message=f"{new_item['item_id']}'s subcategory {new_item['subcategory']} duplicated. Ask Gemini to re-output.",
callback_url=url,
img_path=merged_image_path,
)
continue
elif new_item['item_id'] in [x['item_id'] for x in self.outfit_items]:
self.post_operation(
status="continue",
message=f"Item {new_item['item_id']} duplicated. Ask Gemini to re-output.",
callback_url=url,
img_path=merged_image_path,
)
continue
else:
self.outfit_items.append(new_item)
existing_subcategories.append(new_item["subcategory"])
self.post_operation(
response_data,
status="ok",
message=f"Add new item {new_item['item_id']} in category {new_item['category']} successfully.",
callback_url=url
callback_url=url,
img_path=merged_image_path,
)
print(f"Step {recommend_timestep}: {gemini_data}, found item: {new_item}")
print(f"Stage {current_category.upper()}, Step {recommend_timestep}: {gemini_data}, found item: {new_item['item_id']}")
# When action is stop or timestep limit reached
logger.info(f"Main clothing stylist process finished: {gemini_data.get('reason')}")
# 根据stylist要求随机增加配饰 3-4个配饰
response_data['path'], image_bytes = await self._merge_images(file_name, user_id, stylist_name)
accessory_system_prompt = self._build_accessory_prompt(request_summary, gender, accessories_guide)
user_input = self._build_user_input(recommend_acc=True)
gemini_response_text = await self._call_gemini(user_input, user_id, file_name, self.accessory_schema, image_bytes, accessory_system_prompt)
async def _execute_batch_recommendation(
self,
current_category: str, # this can be any category or all
system_prompt: str,
schema: Dict,
occasions: List[str],
batch_sources: List[str],
user_id: str,
url: str
):
user_input = self._build_user_input(current_category, existing_subcategories=", ".join([x['subcategory'] for x in self.outfit_items]))
merged_image_path, image_bytes = await self._merge_images(self.outfit_id, user_id, self.stylist_name)
gemini_response_text = await self._call_gemini(
user_input,
user_id,
self.outfit_id,
schema,
image_bytes,
system_prompt
)
gemini_data = self._parse_gemini_response(gemini_response_text)
recommended_accessories = gemini_data.get('recommended_accessories', [])
recommended_items = gemini_data.get('recommended_items', [])
reason = gemini_data.get('reason', '')
if not recommended_accessories or not isinstance(recommended_accessories, List):
print("No accessory data from Gemini, terminating process.")
if not recommended_items or not isinstance(recommended_items, List):
print("No recommended item from Gemini, terminating process.")
self.post_operation(
response_data,
status="failed",
message="Agent returned invalid response, terminating process.",
callback_url=url
callback_url=url,
img_path=merged_image_path
)
else:
for idx, rec_accessory in enumerate(recommended_accessories):
category = rec_accessory.get('category')
description = rec_accessory.get('description')
for idx, rec_item in enumerate(recommended_items):
subcategory = rec_item.get('subcategory')
description = rec_item.get('description')
# 4a. 检查类别是否有效 (重要步骤)
if category not in ACCESSORY_CATEGORY:
if subcategory not in ALL_SUBCATEGORY_LIST:
continue
# 4b. 在本地 DB 中查询单品
new_item = self._get_next_item(description, category, occasions, batch_sources, gender)
# we need first determine the category if current category is 'all'
if current_category == "all":
for category, subcategories_list in FASHION_TAXONOMY.items():
# 将子类别列表转换为集合 (set) 可以提高查找效率,
# 特别是当列表很长时。
if subcategory in subcategories_list:
break
new_item = self._get_next_item(description, category, subcategory, occasions, batch_sources, self.gender)
if not new_item or new_item['item_id'] in [x['item_id'] for x in self.outfit_items]:
continue
else:
self.outfit_items.append(new_item)
print(f"Accessory {idx + 1}: {rec_accessory}, found item: {new_item}")
print(f"Item {idx + 1}: ({subcategory}) {rec_item}, found item: {new_item}")
return reason
response_data['path'] = await self._merge_images(file_name, user_id, stylist_name)
self.post_operation(
response_data,
async def run_iterative_styling(self, request_summary, occasions, start_outfit=[], batch_sources=[], user_id="test", callback_url=""):
start_time = time.monotonic()
STAGES = ['clothing', 'shoes', 'bags']
self.outfit_items = start_outfit
stylist_guide, accessories_guide = self._load_style_guide(self.stylist_name)
url = f'{callback_url}/api/style/callback'
"""主流程控制循环。"""
print(f"--- Starting Agent (Outfit ID: {self.outfit_id}) ---")
for current_category in STAGES:
max_len = 4 if current_category == 'clothing' else 1
system_prompt = self._build_system_prompt(core_outfit_template, request_summary, stylist_guide, current_category, max_len)
await self._execute_iterative_recommendation(
current_category,
system_prompt,
build_iterative_schema(current_category),
max_len,
occasions,
batch_sources,
user_id,
url
)
# 根据stylist要求增加配饰 3-4个配饰
MAX_LEN_ACC = 3
acc_system_prompt = self._build_system_prompt(accessories_template, request_summary, accessories_guide, 'accessories', MAX_LEN_ACC)
reason = await self._execute_batch_recommendation(
current_category, # can be 'accessories' or 'all'
acc_system_prompt,
build_batch_schema(current_category),
occasions,
batch_sources,
user_id,
url
)
final_image_path = await self._merge_images(self.outfit_id, user_id, self.stylist_name)
response_data = self.post_operation(
status="stop",
message=reason,
callback_url=url
callback_url=url,
img_path=final_image_path
)
with open(os.path.join(settings.OUTFIT_OUTPUT_DIR, stylist_name, f'{file_name}.json'), 'w') as f:
json.dump(self.outfit_items, f, indent=2)
if settings.LOCAL == 1:
with open(os.path.join(settings.OUTFIT_OUTPUT_DIR, self.stylist_name, f'{self.outfit_id}.json'), 'w') as f:
json.dump({"request_summary": request_summary,"occasions": occasions, "items": self.outfit_items}, f, indent=2)
end_time = time.monotonic()
total_duration = end_time - start_time
return response_data
return response_data, total_duration
async def run_quick_batch_styling(self, request_summary, occasions, start_outfit=[], batch_sources=[], user_id="test", callback_url=""):
start_time = time.monotonic()
self.outfit_items = start_outfit
stylist_guide, accessories_guide = self._load_style_guide(self.stylist_name)
url = f'{callback_url}/api/style/callback'
print(f"--- Starting Agent (Outfit ID: {self.outfit_id}) ---")
MAX_LEN = 9
system_prompt = self._build_system_prompt(all_items_template, request_summary, stylist_guide + accessories_guide, "", MAX_LEN)
reason = await self._execute_batch_recommendation(
'all', # can be 'accessories' or 'all'
system_prompt,
build_batch_schema(),
occasions,
batch_sources,
user_id,
url
)
final_image_path = await self._merge_images(self.outfit_id, user_id, self.stylist_name)
response_data = self.post_operation(
status="stop",
message=reason,
callback_url=url,
img_path=final_image_path
)
if settings.LOCAL == 1:
with open(os.path.join(settings.OUTFIT_OUTPUT_DIR, self.stylist_name, f'{self.outfit_id}.json'), 'w') as f:
json.dump({"request_summary": request_summary,"occasions": occasions, "items": self.outfit_items}, f, indent=2)
end_time = time.monotonic()
total_duration = end_time - start_time
return response_data, total_duration
def _upload_to_gcs(self, bucket_name: str, blob_name: str, mime_type, image_bytes) -> str:
"""同步方法:将文件上传到 GCS 并返回 GCS URI。"""

View File

@@ -1,32 +0,0 @@
BASIC_PROMPT = """You are a professional, friendly, and insightful AI {gender}'s styling assistant.
Your primary mission is to engage in a multi-turn conversation with the user to fully understand their dressing intent. You must adopt a professional yet approachable tone.
CONVERSATION GOALS:
1. **Occasion:** Determine the specific event (e.g., romantic dinner, summer wedding, business meeting).
2. **Style:** Pinpoint the desired aesthetic (e.g., classic elegance, edgy, minimalist, bohemian).
3. **Vibe/Details:** Gather any mood or specific constraints (e.g., needs to be comfortable, requires light colors, no bare shoulders).
4. **Item Preference:** Ask the user if they have any specific preferences for an item type or silhouette (e.g., preference for a dress, skirt, tailored pants, or a particular neckline/length).
GUIDANCE FOR RESPONSE GENERATION:
- After the user's initial request (e.g., "I want a chic outfit for dinner."), immediately reply with a friendly, targeted follow-up question to elicit the most crucial missing information (usually a combination of **Occasion** and **Style**).
- Be concise. Ask only 1 to 2 essential questions per turn.
- You must gather sufficient, clear intent before proceeding to actual clothing recommendations.
OUTPUT FORMAT INSTRUCTION:
- **DO NOT** use any Markdown formatting whatsoever (e.g., do not use asterisks (*), bold text (**), lists, or code blocks).
- **ONLY** output the plain text response spoken by the AI Assistant.
Example Follow-up (mimicking a conversational flow):
User: I want a chic outfit for dinner.
Your Response: Hey there! A chic dinner outfit, I love that! To give you the perfect recommendations, tell me: is this a romantic date, business dinner, or celebration with friends? And what's your go-to style vibe: classic elegance or something with more edge?"""
SUMMARY_PROMPT = """
You are an expert fashion request analyzer. Analyze the conversation history provided by the user.
Your task is to:
1. Identify the most appropriate occasions from the allowed list based on the user's intent.
2. Write a detailed summary string that captures the user's style preferences, specific item requests, disliked items, body concerns, and color preferences. This summary will be used by a stylist to recommend outfits.
Extract this information accurately from the chat history.
"""

View File

@@ -1,6 +1,3 @@
import random
import time
import numpy as np
import torch
import chromadb
@@ -8,7 +5,7 @@ from PIL import Image
from typing import List, Dict, Any
from transformers import CLIPProcessor, CLIPModel
from app.taxonomy import OCCASION, ALL_CATEGORY
from app.taxonomy import OCCASION, CATEGORY_LIST, IGNORE_SUBCATEGORY
class VectorDatabase():
@@ -50,7 +47,7 @@ class VectorDatabase():
return features.cpu().numpy().flatten().tolist()
def get_matched_item(self, embedding: List[float], category: str, occasions: List[str] = [], batch_sources: List[str] = [], gender: str = 'female', n_results: int = 1) -> List[Dict[str, Any]]:
if category not in ALL_CATEGORY:
if category not in CATEGORY_LIST:
raise ValueError(f"Recommended {category} is not valid.")
and_conditions = [
@@ -59,15 +56,19 @@ class VectorDatabase():
{"$or": [
{"gender": gender},
{"gender": "unisex"},
]}
]},
{"subcategory": {"$nin": IGNORE_SUBCATEGORY}}
]
if batch_sources and len(batch_sources) > 0:
source_conditions = []
for source in batch_sources:
source_conditions.append({"batch_source": source})
if len(batch_sources) == 1:
and_conditions.append({"batch_source": batch_sources[0]})
else:
source_conditions = []
for source in batch_sources:
source_conditions.append({"batch_source": source})
# 将 Batch Source 的 OR 子句添加到主 AND 条件中
and_conditions.append({"$or": source_conditions})
# 将 Batch Source 的 OR 子句添加到主 AND 条件中
and_conditions.append({"$or": source_conditions})
results = self.collection.query(
query_embeddings=[embedding],

View File

@@ -77,8 +77,7 @@ def merge_images_to_square(outfit_items: List[Dict[str, str]], max_len=9, add_te
# We use Image.open() and convert to 'RGB' to handle potential transparency (RGBA)
# and ensure compatibility with the final 'RGB' canvas and JPG output.
if settings.LOCAL == 1:
image_file_path = os.path.join(settings.LOCAL_IMAGE_DIR, path)
img = Image.open(image_file_path).convert('RGB')
img = Image.open(path).convert('RGB')
else:
img = oss_get_image(oss_client=minio_client, path=f"{MINIO_LC_DATA_PATH}/{path}", data_type="PIL").convert('RGB')
# img = Image.open(path).convert('RGB')
@@ -185,142 +184,3 @@ def merge_images_to_square(outfit_items: List[Dict[str, str]], max_len=9, add_te
# canvas.save(output_path, 'JPEG', quality=90)
return canvas
# def merge_images_to_square(outfit_items: List[Dict[str, str]], max_len=9, add_text=True):
# """
# Loads up to 4 images from the given paths, resizes them while maintaining
# aspect ratio, and merges them onto a 1024x1024 white background JPG.
#
# The layout depends on the number of images:
# 1: Center the single image on the 1024x1024 canvas.
# 2: Place side-by-side, each scaled to fit a 512x1024 half.
# 3: Place in top-left (512x512), top-right (512x512), and bottom-left (512x512).
# 4: Place in all four 512x512 quadrants.
#
# Args:
# outfit_items: A list of item metadata (max length 9).
#
# Returns:
# The file path of the temporary merged JPG image.
# """
#
# # Define the final canvas size
# CANVAS_SIZE = 1024
#
# # 1. Create the final white canvas
# # Using 'RGB' mode for JPG output
# canvas = Image.new('RGB', (CANVAS_SIZE, CANVAS_SIZE), 'white')
# draw = ImageDraw.Draw(canvas)
# font = ImageFont.load_default()
#
# # 2. Define the quadrants/target areas (x, y, w, h)
# # The positions are based on a 512x512 quadrant size
# quadrants = {
# 1: [(0, 0, CANVAS_SIZE, CANVAS_SIZE)], # Single full-size placement
# 2: [(0, 0, 512, CANVAS_SIZE), (512, 0, 512, CANVAS_SIZE)], # Left, Right
# 3: [(0, 0, 512, 512), (512, 0, 512, 512), (0, 512, 512, 512)], # Top-Left, Top-Right, Bottom-Left
# 4: [(0, 0, 512, 512), (512, 0, 512, 512), (0, 512, 512, 512), (512, 512, 512, 512)], # All Four
# 5: ALL_9_CELLS[:5], # 布局前5个单元格 (1-5)
# 6: ALL_9_CELLS[:6], # 布局前6个单元格 (1-6)
# 7: ALL_9_CELLS[:7], # 布局前7个单元格 (1-7)
# 8: ALL_9_CELLS[:8], # 布局前8个单元格 (1-8)
# 9: ALL_9_CELLS[:9] # 布局全部9个单元格 (1-9)
# }
#
# # 3. Load and Filter Images
# valid_images = []
# image_paths = [item['image_path'] for item in outfit_items]
# for path in image_paths:
# try:
# # We use Image.open() and convert to 'RGB' to handle potential transparency (RGBA)
# # and ensure compatibility with the final 'RGB' canvas and JPG output.
# img = oss_get_image(oss_client=minio_client, path=f"{MINIO_LC_DATA_PATH}/{path}", data_type="PIL").convert('RGB')
# # img = Image.open(path).convert('RGB')
# valid_images.append(img)
# except Exception as e:
# logger.error(f"Error loading image {path}. Skipping: {e}")
#
# num_images = len(valid_images)
#
# if num_images == 0:
# raise ValueError("No valid images were loaded.")
#
# if num_images > max_len:
# raise ValueError(f"Valid item number {num_images} exceed max limit {max_len}")
#
# # Get the correct list of target areas based on the number of valid images
# target_areas = quadrants.get(num_images, [])
#
# # 4. Resize and Paste
# for i, (img, item) in enumerate(zip(valid_images, outfit_items)):
# item_id = item['item_id']
# category = item['category']
# if i >= len(target_areas):
# # This should not happen if num_images <= 4
# break
#
# # Target area dimensions (x_start, y_start, width, height)
# x_start, y_start, target_w, target_h = target_areas[i]
#
# # Calculate new size while maintaining aspect ratio
# original_w, original_h = img.size
#
# # Calculate the ratio needed to fit within the target area
# ratio_w = target_w / original_w
# ratio_h = target_h / original_h
#
# # Use the *smaller* of the two ratios to ensure the image fits entirely
# resize_ratio = min(ratio_w, ratio_h)
#
# # Calculate the new dimensions
# new_w = int(original_w * resize_ratio)
# new_h = int(original_h * resize_ratio)
#
# # Resize the image. Image.Resampling.LANCZOS provides high-quality scaling.
# # Pillow documentation recommends ANTIALIAS or BICUBIC for downscaling,
# # but LANCZOS is a good general high-quality filter.
# # Note: In Pillow versions > 9.0.0, Image.LANCZOS is now Image.Resampling.LANCZOS
# resized_img = img.resize((new_w, new_h), Image.Resampling.LANCZOS)
#
# # Calculate the paste position to center the resized image within its target area
# # Center X: (Target Width - New Width) / 2 + X Start
# paste_x = (target_w - new_w) // 2 + x_start
# # Center Y: (Target Height - New Height) / 2 + Y Start
# # paste_y = (target_h - new_h) // 2 + y_start
#
# TEXT_RESERVE_HEIGHT = 30
# paste_y = (target_h - new_h - TEXT_RESERVE_HEIGHT) // 2 + y_start
# paste_y = max(paste_y, y_start)
#
# # Paste the resized image onto the canvas
# canvas.paste(resized_img, (paste_x, paste_y))
#
# full_text = f"ID: {item_id}, Category: {category}"
# try:
# # 推荐使用:计算文本的实际尺寸 (width, height)
# bbox = draw.textbbox((0, 0), full_text, font=font)
# text_w = bbox[2] - bbox[0]
# text_h = bbox[3] - bbox[1]
# except AttributeError:
# # 兼容旧版本 Pillow
# text_w, text_h = draw.textsize(full_text, font=font)
#
# # 计算 X 轴起始位置:使其在目标区域 (target_w) 中居中
# text_x_center = x_start + target_w // 2
# text_x_start = text_x_center - text_w // 2
#
# # 计算 Y 轴起始位置:将其放在目标区域的底部
# # (目标区域的起始Y + 目标区域的高度 - 文本行的高度)
# text_y_start = y_start + target_h - text_h - 5 # 减去 5 像素作为边距
#
# # 3. 绘制合并后的文本
# if add_text:
# draw.text((text_x_start, text_y_start),
# full_text,
# fill='black',
# font=font)
#
# # Save as a high-quality JPG (quality=90 is a good balance)
# # canvas.save(output_path, 'JPEG', quality=90)
#
# return canvas

View File

@@ -7,26 +7,46 @@ OCCASION = [
"Garden Party / Daytime Event"
]
CATEGORY = {
FASHION_TAXONOMY = {
'clothing': [
'coats',
'jackets',
'blazers',
'puffer',
'cardigan',
'sweater',
'shirts',
't-shirts',
'pullover',
'polos',
'bodysuits',
'dresses',
'skirts',
'jeans',
'shorts',
'leggings',
'jumpsuits',
'swimwear',
# --- Tops ---
't-shirts', # T恤
'shirts', # 衬衫 (泛指梭织)
'blouses', # 女式衬衫
'polo shirts', # Polo衫
'tank tops', # 背心/坎肩
'camisoles', # 吊带背心
# --- Knits/Sweaters ---
'sweaters', # 毛衣 (泛指)
'cardigans', # 开衫
'pullovers', # 套头衫
'hoodies', # 连帽衫
'sweatshirts', # 圆领卫衣
'vests', # 马甲/背心 (外穿)
# --- Outerwear ---
'coats', # 大衣 (长款)
'jackets', # 夹克 (短款)
'blazers', # 西装外套
# --- Bottoms ---
'jeans', # 牛仔裤 (虽是材质,但在时尚界视为独立大类)
'trousers', # 西裤/正装长裤
'pants', # 长裤 (泛指休闲)
'joggers', # 束脚裤
'leggings', # 打底裤/紧身裤
'shorts', # 短裤
'skirts', # 半身裙
'skorts', # 裙裤
# --- One-Piece ---
'dresses', # 连衣裙
'jumpsuits', # 连体长裤
'bodysuits', # 连体紧身衣
'suits', # 套装 (西装套)
# --- Intimates/Swim ---
'bras', # 文胸
'underwear', # 内衣
'lingerie', # 性感内衣
'pajamas', # 睡衣套装
'swimwear', # 泳装
],
'shoes': [
'sneakers',
@@ -38,22 +58,37 @@ CATEGORY = {
'boots',
],
'bags': [
'bags'
'shoulder bags',
'crossbody',
'bucket bags',
'tote bags',
'clutch bags',
'backpacks',
'travel bags',
'luggage',
],
'accessories': [
# --- Jewelry & Watches ---
'necklaces',
'earrings',
'bracelets',
'jewellery',
'eyewear',
'scarves',
'rings',
'cufflinks',
'watches',
# --- Head/Face ---
'hats',
'gloves',
'eyewear',
# --- Body/Textile ---
'belts',
'socks',
'watches'
'scarves',
'gloves',
'ties',
'bow ties',
'pocket squares',
'socks',
]
}
ALL_CATEGORY = sum(CATEGORY.values(), [])
CATEGORY_LIST = list(FASHION_TAXONOMY.keys())
ALL_SUBCATEGORY_LIST = sum(FASHION_TAXONOMY.values(), [])
IGNORE_CATEGORY = ['socks']
IGNORE_SUBCATEGORY = ['socks']

View File

@@ -40,7 +40,7 @@
## Example in `metadata_extraction.json`
```json
"EOJ367": {
"category": "shoes",
"subcategory": "necklaces",
"gender": "female",
"applicable_occasions": [
"Casual",
@@ -60,33 +60,34 @@
## Metadata in Vector Database
```json
{
'item_id': 'EOJ128',
'category': 'sunglasses',
'gender': 'unisex',
'modality': 'image',
'brand': 'CELINE',
'color': 'BROWN',
'description': "Immerse yourself in the depth of classic style with CELINE\'s Tortoiseshell Logo Sunglasses. Featuring a rich, tortoiseshell acetate frame and adorned with the iconic CELINE logo in gold, these sunglasses are a testament to timeless elegance and luxury. Perfect for those who appreciate a sophisticated aesthetic, they offer optimal UV protection while ensuring you remain at the forefront of fashion.",
'tags': 'celine,accessories,in-stock,new,maxi,triomphe,acetate,round',
'price': 4500,
'url': 'https://www.lanecrawford.com.hk/product/celine/maxi-triomphe-acetate-round-sunglasses/_/EOJ128/product.lc?utm_medium=embed&utm_source=ai-recommended&utm_campaign=2025-christmas_lc_ai-recommended',
'batch_source': '2025_q4',
'Outdoor': 0,
'Ski / Snow / Mountain': 0,
'Festival / Concert': 0,
'Activewear': 0,
'Casual': 1,
'Cocktail / Semi-Formal': -1,
'Formal': -1,
'Party / Clubbing': 0,
'Evening': 0,
'Travel / Transit': 0,
'Beach / Swim': 0,
'Garden Party / Daytime Event': 1,
'Black Tie / White Tie': -1,
'Resort': 1,
'Athleisure': 0,
'Business / workwear': -1,
'Bridal / Wedding': -1,
"item_id": "EOJ128",
"category": "accessories",
"subcategory": "eyewear",
"gender": "unisex",
"modality": "image",
"brand": "CELINE",
"color": "BROWN",
"description": "Immerse yourself in the depth of classic style with CELINE's Tortoiseshell Logo Sunglasses. Featuring a rich, tortoiseshell acetate frame and adorned with the iconic CELINE logo in gold, these sunglasses are a testament to timeless elegance and luxury. Perfect for those who appreciate a sophisticated aesthetic, they offer optimal UV protection while ensuring you remain at the forefront of fashion.",
"tags": "celine,accessories,in-stock,new,maxi,triomphe,acetate,round",
"price": 4500,
"url": "https://www.lanecrawford.com.hk/product/celine/maxi-triomphe-acetate-round-sunglasses/_/EOJ128/product.lc?utm_medium=embed&utm_source=ai-recommended&utm_campaign=2025-christmas_lc_ai-recommended",
"batch_source": "2025_q4",
"Outdoor": 0,
"Ski / Snow / Mountain": 0,
"Festival / Concert": 0,
"Activewear": 0,
"Casual": 1,
"Cocktail / Semi-Formal": -1,
"Formal": -1,
"Party / Clubbing": 0,
"Evening": 0,
"Travel / Transit": 0,
"Beach / Swim": 0,
"Garden Party / Daytime Event": 1,
"Black Tie / White Tie": -1,
"Resort": 1,
"Athleisure": 0,
"Business / workwear": -1,
"Bridal / Wedding": -1,
}
```

View File

@@ -5,7 +5,7 @@ from PIL import Image
import json
from tqdm import tqdm
from app.taxonomy import OCCASION, CATEGORY, ALL_CATEGORY
from app.taxonomy import OCCASION, FASHION_TAXONOMY, ALL_SUBCATEGORY_LIST
# data config
@@ -42,7 +42,7 @@ Description: Cut from cardinal-red virgin wool, Armarium's Loren skirt wields ta
Tags: armarium, clothing, in-stock, new, loren, wool, blend, tube
"""
EXAMPLE_1_JSON = json.dumps({
"category": "skirts",
"subcategory": "skirts",
"gender": "female",
"applicable_occasions": [
"Business/workwear", "Evening", "Cocktail / Semi-Formal", "Party / Clubbing", "Formal"
@@ -61,7 +61,7 @@ Description: Crafted from 18k yellow gold and rhodium-plated sterling silver, th
Tags: tateossian, accessories, in-stock, new, mayfair, yellow, gold, rhodium
"""
EXAMPLE_2_JSON = json.dumps({
"category": "jewelry",
"subcategory": "jewelry",
"gender": "female",
"applicable_occasions": [
"Formal", "Black Tie / White Tie", "Bridal / Wedding", "Business/workwear", "Cocktail / Semi-Formal"
@@ -94,20 +94,24 @@ def format_product_info(product):
return info
def generate_full_prompt(product_info, raw_category):
def raw_category_mapping(raw_category: str) -> str:
if raw_category == 'Fine Jewellery And Watches':
category = 'accessories'
return 'accessories'
else:
category = raw_category.lower()
subcategory_list = CATEGORY.get(category)
return raw_category.lower()
def generate_full_prompt(product_info, raw_category):
category = raw_category_mapping(raw_category)
subcategory_list = FASHION_TAXONOMY.get(category)
SYSTEM_PROMPT = f"""You are an expert fashion AI assistant. Your task is to analyze the provided product image and product details to:
1. determine the suitable occasions for wearing or using the item. You must choose occasions ONLY from the following strict list: {json.dumps(OCCASION, indent=4)}. Only relevant suitable or inappropriate occasions should be selected.
2. categorize it into suitable category in strict list: {json.dumps(subcategory_list)}.
2. categorize it into suitable subcategory in strict list: {json.dumps(subcategory_list)}.
3. categorize it into appropriate gender in ["female", "male", "unisex"]
Output Format:
Return ONLY a valid JSON object with four keys: "category", "gender", "applicable_occasions" and "inappropriate_occasions". Do not include any analysis or extra text outside of the final JSON object.
Return ONLY a valid JSON object with four keys: "subcategory", "gender", "applicable_occasions" and "inappropriate_occasions". Do not include any analysis or extra text outside of the final JSON object.
"""
# 组合对话序列
@@ -140,37 +144,36 @@ product_list = [
]
def validate_results():
if os.path.exists(OUTPUT_FILE):
with open(OUTPUT_FILE, 'r') as f:
final_results = json.load(f)
else:
final_results = {}
def validate_result(result_dict):
subcategory = result_dict.get("subcategory")
gender = result_dict.get("gender")
unfinished_ids = []
for product in product_list:
item_id = product.get('id')
if item_id not in final_results.keys():
unfinished_ids.append(product)
else:
processed_item = final_results[item_id]
category = processed_item.get("category")
gender = processed_item.get("gender")
if not subcategory or not gender:
return False
if subcategory not in ALL_SUBCATEGORY_LIST:
return False
if category not in ALL_CATEGORY:
unfinished_ids.append(product)
if gender not in ['female', 'male', 'unisex']:
return False
return True
if os.path.exists(OUTPUT_FILE):
with open(OUTPUT_FILE, 'r') as f:
final_results = json.load(f)
else:
final_results = {}
if gender not in ['female', 'male', 'unisex']:
unfinished_ids.append(product)
return unfinished_ids, final_results
attemps = 0
while attemps < 3:
unfinished_products = [product for product in product_list if product.get('id') not in final_results.keys()]
attemps += 1
unfinished_products, final_results = validate_results()
completion_ratio = len(unfinished_products) / len(product_list)
if (completion_ratio > 0.95):
print("valid results surpass 95%. Finish Now.")
completion_ratio = len(final_results) / len(product_list)
if (completion_ratio > 0.85):
print("valid results surpass 85%. Finish Now.")
break
else:
print(f"Start {attemps} categorization process. Current ratio: {completion_ratio * 100}%")
@@ -252,11 +255,11 @@ while attemps < 3:
json_str = generated_text[start_idx:end_idx]
result_dict = json.loads(json_str)
final_results[product_id] = result_dict
if validate_result(result_dict):
final_results[product_id] = result_dict
except Exception as e:
print(f"ID {product_id}: FAILED to parse JSON. Raw Output: {generated_text.strip()}")
final_results[product_id] = {"error": str(e), "raw_output": generated_text.strip()}
# 显存清理(可选,但在长任务中推荐)
del inputs, outputs

View File

@@ -1,6 +1,3 @@
import chromadb
import os
import json
@@ -11,7 +8,7 @@ from tqdm import tqdm
from PIL import Image
from transformers import CLIPProcessor, CLIPModel
from app.taxonomy import CATEGORY, ALL_CATEGORY, OCCASION
from app.taxonomy import ALL_SUBCATEGORY_LIST, OCCASION
BATCH_SOURCE = '2025_q4'
@@ -20,6 +17,7 @@ IMAGE_DIR = f'./data/{BATCH_SOURCE}/image_data'
RAW_DATA_PATH = f'{DATA_DIR}/products-all.json'
CATEGORIZED_METADATA_PATH = f'{DATA_DIR}/metadata_extraction.json'
ADD_TEXT_EMBEDDING = False
## Load data
with open(RAW_DATA_PATH, 'r', encoding='utf-8') as file:
@@ -36,11 +34,11 @@ collection = client.get_or_create_collection(
)
# if you wish to delete some item, uncomment following
# results = collection.delete(
# where={
# "batch_source": BATCH_SOURCE
# }
# )
results = collection.delete(
where={
"batch_source": BATCH_SOURCE
}
)
# Load model
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
@@ -63,9 +61,13 @@ def format_product_info(product):
)
return info
def raw_category_mapping(raw_category: str) -> str:
if raw_category == 'Fine Jewellery And Watches':
return 'accessories'
else:
return raw_category.lower()
# Combine all data together
new_category = {}
valid_count = 0
all_count = 0
for raw_item in tqdm(raw_data['products']):
@@ -91,18 +93,14 @@ for raw_item in tqdm(raw_data['products']):
print(f"{item_id} has not been categorized. It does not exist in {CATEGORIZED_METADATA_PATH}")
continue
category = processed_item.get("category")
category = raw_category_mapping(raw_category)
subcategory = processed_item.get("subcategory")
gender = processed_item.get("gender")
applicable_occasions = processed_item.get("applicable_occasions", [])
inappropriate_occasions = processed_item.get("inappropriate_occasions", [])
if category not in ALL_CATEGORY:
if subcategory not in ALL_SUBCATEGORY_LIST:
print(f"{item_id}'s category, {category}, does not valid.")
if category not in new_category:
new_category[category] = [item_id]
else:
new_category[category].append(item_id)
continue
if gender not in ['female', 'male', 'unisex']:
print(f"{item_id}'s gender is not valid in {['female', 'male', 'unisex']}")
@@ -129,6 +127,7 @@ for raw_item in tqdm(raw_data['products']):
item_img_metadata = {
"item_id": item_id,
"category": category,
"subcategory": subcategory,
"description": description,
"gender": gender,
'brand': raw_item.get('brand', ''),
@@ -146,10 +145,6 @@ for raw_item in tqdm(raw_data['products']):
for occasion in inappropriate_occasions:
item_img_metadata[occasion] = -1
item_txt_metadata = deepcopy(item_img_metadata)
item_txt_metadata["modality"] = "text"
# Get image feature
image = Image.open(image_path).convert("RGB")
inputs = processor(images=image, return_tensors="pt").to(device)
@@ -158,21 +153,30 @@ for raw_item in tqdm(raw_data['products']):
img_features = img_features / img_features.norm(p=2, dim=-1, keepdim=True)
img_embedding = img_features.cpu().numpy().flatten().tolist()
# Get text feature
inputs = processor(text=[description], return_tensors="pt", padding=True, truncation=True).to(device)
with torch.no_grad():
txt_features = model.get_text_features(**inputs)
txt_features = txt_features / txt_features.norm(p=2, dim=-1, keepdim=True)
txt_embedding = txt_features.cpu().numpy().flatten().tolist()
product_info = format_product_info(raw_item)
# 插入到 ChromaDB
collection.add(
ids=[f'{item_id}_img', f'{item_id}_txt'],
documents=[product_info, product_info],
embeddings=[img_embedding, txt_embedding],
metadatas=[item_img_metadata, item_txt_metadata],
ids=[f'{item_id}_img'],
documents=[product_info],
embeddings=[img_embedding],
metadatas=[item_img_metadata],
)
print(f"Final valid ratio is {valid_count / all_count * 100}%. Total number is {all_count}, Valid number is {valid_count}")
print(f'Found new category for consideration: {new_category}')
if ADD_TEXT_EMBEDDING:
item_txt_metadata = deepcopy(item_img_metadata)
item_txt_metadata["modality"] = "text"
# Get text feature
inputs = processor(text=[description], return_tensors="pt", padding=True, truncation=True).to(device)
with torch.no_grad():
txt_features = model.get_text_features(**inputs)
txt_features = txt_features / txt_features.norm(p=2, dim=-1, keepdim=True)
txt_embedding = txt_features.cpu().numpy().flatten().tolist()
collection.add(
ids=[f'{item_id}_txt'],
documents=[product_info],
embeddings=[txt_embedding],
metadatas=[item_txt_metadata],
)
print(f"Final valid ratio is {valid_count / all_count * 100}%. Total number is {all_count}, Valid number is {valid_count}")