From 9d11e995dd7230917b61f93d0d6ca8bac2929315 Mon Sep 17 00:00:00 2001 From: xupei Date: Tue, 29 Oct 2024 16:50:46 +0800 Subject: [PATCH 01/19] =?UTF-8?q?=E4=BB=8E=E5=90=91=E9=87=8F=E6=95=B0?= =?UTF-8?q?=E6=8D=AE=E5=BA=93=E4=B8=AD=E6=A3=80=E7=B4=A2=E5=9B=BE=E7=89=87?= =?UTF-8?q?=E5=B9=B6=E9=9B=86=E6=88=90=E5=88=B0chat-robot?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app/api/api_query_image.py | 36 ++++++++ app/api/api_route.py | 3 +- app/schemas/query_image.py | 6 ++ app/service/chat_robot/script/main.py | 2 +- app/service/chat_robot/script/prompt.py | 49 ++++++---- .../chat_robot/script/service/CallQWen.py | 57 ++++++++++-- app/service/search_image_with_text/service.py | 89 +++++++++++++++++++ 7 files changed, 217 insertions(+), 25 deletions(-) create mode 100644 app/api/api_query_image.py create mode 100644 app/schemas/query_image.py create mode 100644 app/service/search_image_with_text/service.py diff --git a/app/api/api_query_image.py b/app/api/api_query_image.py new file mode 100644 index 0000000..d27c67b --- /dev/null +++ b/app/api/api_query_image.py @@ -0,0 +1,36 @@ +import json +import logging +from http.client import HTTPException + +from fastapi import APIRouter + +from app.schemas.query_image import QueryImageModel +from app.schemas.response_template import ResponseModel +from app.service.search_image_with_text.service import query + +router = APIRouter() +logger = logging.getLogger() + + +@router.post("/query_image") +def query_image(request_data: QueryImageModel): + """ + 对话机器人 + 创建一个具有以下参数的请求体: + - **gender**: 性别 + - **content**: 用户输入的内容 + + 示例参数: + { + "gender": "male", + "content": "give me a long sleeve blouse", + } + """ + try: + logger.info(f"query_image request item is : @@@@@@:{json.dumps(request_data.dict())}") + data = query(request_data.gender, request_data.content) + logger.info(f"query_image response @@@@@@:{json.dumps(data)}") + except Exception as e: + logger.warning(f"query_image Run Exception @@@@@@:{e}") + raise HTTPException(status_code=404, detail=str(e)) + return ResponseModel(data=data) diff --git a/app/api/api_route.py b/app/api/api_route.py index 7ee774d..0da3a66 100644 --- a/app/api/api_route.py +++ b/app/api/api_route.py @@ -1,6 +1,6 @@ from fastapi import APIRouter -from app.api import api_attribute_retrieve +from app.api import api_attribute_retrieve, api_query_image from app.api import api_brighten from app.api import api_chat_robot from app.api import api_design @@ -23,3 +23,4 @@ router.include_router(api_prompt_generation.router, tags=['prompt_generation'], router.include_router(api_design_pre_processing.router, tags=['design_pre_processing'], prefix="/api") router.include_router(api_image2sketch.router, tags=['api_image2sketch'], prefix="/api") router.include_router(api_brighten.router, tags=['api_brighten'], prefix="/api") +router.include_router(api_query_image.router, tags=['api_query_image'], prefix="/api") \ No newline at end of file diff --git a/app/schemas/query_image.py b/app/schemas/query_image.py new file mode 100644 index 0000000..147603f --- /dev/null +++ b/app/schemas/query_image.py @@ -0,0 +1,6 @@ +from pydantic import BaseModel + + +class QueryImageModel(BaseModel): + gender: str + content: str diff --git a/app/service/chat_robot/script/main.py b/app/service/chat_robot/script/main.py index cabe372..3342a5c 100644 --- a/app/service/chat_robot/script/main.py +++ b/app/service/chat_robot/script/main.py @@ -100,7 +100,7 @@ def chat(post_data): # session_key=f"buffer:{user_id}:{session_id}", # ) - final_outputs = CallQWen.call_with_messages(input_message) + final_outputs = CallQWen.call_with_messages(input_message, gender) # api_response = { # 'user_id': user_id, # 'session_id': session_id, diff --git a/app/service/chat_robot/script/prompt.py b/app/service/chat_robot/script/prompt.py index a88044d..ad6ac9e 100644 --- a/app/service/chat_robot/script/prompt.py +++ b/app/service/chat_robot/script/prompt.py @@ -1,16 +1,31 @@ +# FASHION_CHAT_BOT_PREFIX = """ +# You are a helpful assistant for fashion designers. You can chat with the users or answer their query as much as you can. +# The most crucial aspect is to accurately determine whether the user's inquiry requires a internet search or querying the database. +# Remember your answer should be very precise and the final output answer should not exceed 20 words. +# +# You may encounter the following types of questions: +# 1) If the query related to clothing retrieval, you are an agent designed to interact with a SQL database. +# Given an input question, create a syntactically correct mysql query to run, always fetching random data from tables. +# Unless the user specifies a specific number of examples they wish to obtain,always limit your query to at most 4 results. +# Never query for all the columns from a specific table, only ask for the relevant columns given the question. +# You MUST double check your query before executing it. If you get an error while executing a query, rewrite the query and try again. +# DO NOT make any DML statements (INSERT, UPDATE, DELETE, DROP etc.) to the database. +# If the question does not seem related to the database, just return "I don't know" as the answer. +# +# 2) If the query related to current events, you should use internet_search to seek help from the internet. +# +# 3) If the query is just casual conversation, engage in the conversation as a fashion designer assistant. +# +# Be careful to use the tools, since you are actually a chat bot. Tools can only be used when essential. +# """ + FASHION_CHAT_BOT_PREFIX = """ You are a helpful assistant for fashion designers. You can chat with the users or answer their query as much as you can. The most crucial aspect is to accurately determine whether the user's inquiry requires a internet search or querying the database. Remember your answer should be very precise and the final output answer should not exceed 20 words. You may encounter the following types of questions: -1) If the query related to clothing retrieval, you are an agent designed to interact with a SQL database. -Given an input question, create a syntactically correct mysql query to run, always fetching random data from tables. -Unless the user specifies a specific number of examples they wish to obtain,always limit your query to at most 4 results. -Never query for all the columns from a specific table, only ask for the relevant columns given the question. -You MUST double check your query before executing it. If you get an error while executing a query, rewrite the query and try again. -DO NOT make any DML statements (INSERT, UPDATE, DELETE, DROP etc.) to the database. -If the question does not seem related to the database, just return "I don't know" as the answer. +1) If you need to query information related to clothing retrieval, please use the get_image_from_vector_db tool. 2) If the query related to current events, you should use internet_search to seek help from the internet. @@ -37,15 +52,19 @@ ANSWER_FORMAT_SUFFIX = """ My final answer are limited to 20 words and be as much precise as possible. """ +# TOOLS_FUNCTIONS_SUFFIX = ( +# "If the input involves clothing queries," +# "I should look at the tables in the database to see what I can query. Then I should query the schema of the most relevant tables." +# "All SQL statements must use 'ORDER BY RAND()', for example:" +# "Example Input 1: 'SELECT img_name FROM skirt WHERE opening_type = 'Button' ORDER BY RAND() LIMIT 1'" +# "Example Input 2: 'SELECT img_name FROM top WHERE sleeve_length = 'Long' AND type = 'Blouse' ORDER BY RAND() LIMIT 2'" +# "If the input does not involve clothing queries, " +# "I should engage in conversation as an assistant or search from internet with internet_search tool." +# "If the database query returns no results, please respond directly with: 'Apologies, I couldn't find any images that match your description. Could you please give me more details about the clothing you're searching for?'" +# "Upon mentioning words related to 'tutorial' in the input, I should use tutorial_tool " +# ) TOOLS_FUNCTIONS_SUFFIX = ( - "If the input involves clothing queries," - "I should look at the tables in the database to see what I can query. Then I should query the schema of the most relevant tables." - "All SQL statements must use 'ORDER BY RAND()', for example:" - "Example Input 1: 'SELECT img_name FROM skirt WHERE opening_type = 'Button' ORDER BY RAND() LIMIT 1'" - "Example Input 2: 'SELECT img_name FROM top WHERE sleeve_length = 'Long' AND type = 'Blouse' ORDER BY RAND() LIMIT 2'" - "If the input does not involve clothing queries, " - "I should engage in conversation as an assistant or search from internet with internet_search tool." - "If the database query returns no results, please respond directly with: 'Apologies, I couldn't find any images that match your description. Could you please give me more details about the clothing you're searching for?'" + "If the input involves clothing queries,please use the get_image_from_vector_db tool." "Upon mentioning words related to 'tutorial' in the input, I should use tutorial_tool " ) diff --git a/app/service/chat_robot/script/service/CallQWen.py b/app/service/chat_robot/script/service/CallQWen.py index d2e2c06..33dcd04 100644 --- a/app/service/chat_robot/script/service/CallQWen.py +++ b/app/service/chat_robot/script/service/CallQWen.py @@ -8,6 +8,7 @@ from app.core.config import * from app.service.chat_robot.script.callbacks.qwen_callback_handler import QWenCallbackHandler from app.service.chat_robot.script.database import CustomDatabase from app.service.chat_robot.script.prompt import FASHION_CHAT_BOT_PREFIX, TOOLS_FUNCTIONS_SUFFIX, TUTORIAL_TOOL_RETURN +from app.service.search_image_with_text.service import query get_database_table_description = "Input is an empty string, output is a comma separated list of tables in the database." @@ -32,6 +33,12 @@ query_database_description = ( "order by rand() LIMIT 2'" ) +query_vector_db_description = ( + "Use this tool to find the clothing images that users need. " + "If the user's input includes clothing types such as blouse, skirt, dress, outerwear, pants, or trousers, please use this tool. " + "The input for the tool is the string provided by the user." +) + tutorial_description = ("Utilize this tool to retrieve specific statements related to user guidance tutorials." "Input is an empty string") @@ -105,15 +112,37 @@ tools = [ "function": { "name": "tutorial_tool", "description": tutorial_description, + # "parameters": { + # "type": "object", + # "properties": { + # "sql_string": { + # "type": "string", + # "description": "由模型生成的sql语句" + # } + # } + # }, + } + }, + { + "type": "function", + "function": { + "name": "get_image_from_vector_db", + "description": query_vector_db_description, "parameters": { - "type": "object", - "properties": { - "sql_string": { - "type": "string", - "description": "由模型生成的sql语句" + "parameters": { + "type": "object", + "properties": { + "gender": { + "type": "string", + "description": "性别" + }, + "content": { + "type": "string", + "description": "用户描述" + } } - } - }, + }, + } } } ] @@ -150,6 +179,10 @@ def query_database(sql_string): return CustomDatabase.run(db, sql_string) +def get_image_from_vector_db(gender, content): + return query(gender, content) + + @retry(exceptions=NewConnectionError, tries=3, delay=1) def get_response(messages): response = Generation.call( @@ -164,7 +197,8 @@ def get_response(messages): return response -def call_with_messages(message): +def call_with_messages(message, gender): + user_input = message print('\n') # messages = [ # { @@ -235,6 +269,12 @@ def call_with_messages(message): tool_info = {"name": "tutorial_tool", "role": "tool", 'content': tutorial_tool()} flag = False result_content = tool_info['content'] + elif assistant_output.tool_calls[0]['function']['name'] == 'get_image_from_vector_db': + tool_info = {"name": "get_image_from_vector_db", "role": "tool", + 'content': get_image_from_vector_db(gender, user_input)} + flag = False + result_content = tool_info['content'] + response_type = "image" print(f"工具输出信息:{tool_info['content']}\n") messages.append(tool_info) @@ -257,5 +297,6 @@ def call_with_messages(message): def tutorial_tool(): return TUTORIAL_TOOL_RETURN + if __name__ == '__main__': call_with_messages() diff --git a/app/service/search_image_with_text/service.py b/app/service/search_image_with_text/service.py new file mode 100644 index 0000000..98f6ac4 --- /dev/null +++ b/app/service/search_image_with_text/service.py @@ -0,0 +1,89 @@ +import chromadb +import hashlib + +import pandas as pd +from chromadb.config import Settings +from chromadb.utils.embedding_functions.ollama_embedding_function import OllamaEmbeddingFunction +from tqdm import tqdm + +# 读取 csv 文件 +csv_file_path = r'D:/Files/csv/output/output.csv' +image_path = r'D:/images-clean' + +df = pd.read_csv(csv_file_path, encoding='Windows-1252') + +# 创建 Chroma 客户端 +client = chromadb.Client(Settings(is_persistent=True, persist_directory="/vector_db")) +# client = chromadb.Client(Settings(is_persistent=True, persist_directory="./service/search_image_with_text/vector_db")) +# client = chromadb.Client(Settings(is_persistent=True, persist_directory="D:/workspace/AiDLab/vector_db")) +# 创建集合 +embedding_fn = OllamaEmbeddingFunction(url="http://localhost:11434/api/embeddings", model_name="mxbai-embed-large") + + +def create_collection(): + collection = client.get_or_create_collection("sub_sketches_description", embedding_function=embedding_fn) + + # 存储数据,包括自定义属性 + images_description = [] + images_metadata = [] + ids = [] + batch_size = 41666 # 最大批量大小 + for index, row in tqdm(df.iterrows()): + # 将图片的md5作为id + with open(image_path + row['path'], 'rb') as f: + image_data = f.read() + md5_value = hashlib.md5(image_data).hexdigest() + ids.append(md5_value) + images_description.append(row['description']) + images_metadata.append({ + "gender": row['gender'], + "path": row['path'] + }) + + # 将数据添加到集合 + # 每达到 batch_size 就执行一次 upsert + if len(ids) >= batch_size: + collection.upsert( + ids=list(ids), + documents=images_description, + metadatas=images_metadata # 添加自定义属性 + ) + # 清空列表以准备下一批数据 + ids.clear() + images_description.clear() + images_metadata.clear() + + if ids: + collection.upsert( + ids=list(ids), + documents=images_description, + metadatas=images_metadata # 添加自定义属性 + ) + + print("Data successfully stored in the vector database.") + + +def query(gender, content): + collection = client.get_collection("sub_sketches_description", embedding_function=embedding_fn) + # 6. 查询相似内容 + user_gender = gender # 用户输入的性别 + user_content = content # 用户输入的内容 + + results = collection.query( + query_texts=user_content, + n_results=5, # 返回前 5 个结果 + where={"gender": user_gender} # 根据性别过滤 + ) + + # 输出结果 + resp = [] + for document, result in zip(results['documents'][0], results['metadatas'][0]): + # print("Path:", result['path']) + # print("Content:", document) + resp.append(result['path']) + return resp + + +if __name__ == '__main__': + # create_collection() + query("female", "I need a long sleeve dress") From 6b8b24de896e236617bda6f989fb5ee85458f906 Mon Sep 17 00:00:00 2001 From: zhouchengrong Date: Mon, 2 Dec 2024 22:42:03 +0800 Subject: [PATCH 02/19] =?UTF-8?q?feat=20=20OLLAMA=5FURL=20=E5=88=87?= =?UTF-8?q?=E6=8D=A2=E5=88=B0A6000=20fix?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app/core/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/core/config.py b/app/core/config.py index d369ff2..7629429 100644 --- a/app/core/config.py +++ b/app/core/config.py @@ -138,7 +138,7 @@ SEGMENTATION = { "output": "seg_output__0", } # ollama config -OLLAMA_URL = "http://10.1.1.243:11434/api/embeddings" +OLLAMA_URL = "http://10.1.1.240:11434/api/embeddings" # DESIGN config DESIGN_MODEL_URL = '10.1.1.240:10000' AIDA_CLOTHING = "aida-clothing" From 5812c3eaaba61e8c1f4514a968c9c294b3b1867e Mon Sep 17 00:00:00 2001 From: zhouchengrong Date: Tue, 29 Oct 2024 17:44:10 +0800 Subject: [PATCH 03/19] feat fix 1 --- app/service/search_image_with_text/service.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/app/service/search_image_with_text/service.py b/app/service/search_image_with_text/service.py index 98f6ac4..5ac9cef 100644 --- a/app/service/search_image_with_text/service.py +++ b/app/service/search_image_with_text/service.py @@ -17,7 +17,8 @@ client = chromadb.Client(Settings(is_persistent=True, persist_directory="/vector # client = chromadb.Client(Settings(is_persistent=True, persist_directory="./service/search_image_with_text/vector_db")) # client = chromadb.Client(Settings(is_persistent=True, persist_directory="D:/workspace/AiDLab/vector_db")) # 创建集合 -embedding_fn = OllamaEmbeddingFunction(url="http://localhost:11434/api/embeddings", model_name="mxbai-embed-large") +# embedding_fn = OllamaEmbeddingFunction(url="http://localhost:11434/api/embeddings", model_name="mxbai-embed-large") +embedding_fn = OllamaEmbeddingFunction(url="http://10.1.1.240:11434/api/embeddings", model_name="mxbai-embed-large") def create_collection(): From 6a62d0844694e4cee921fccc5526e4cd77a8ca85 Mon Sep 17 00:00:00 2001 From: xupei Date: Tue, 29 Oct 2024 16:50:46 +0800 Subject: [PATCH 04/19] =?UTF-8?q?=E4=BB=8E=E5=90=91=E9=87=8F=E6=95=B0?= =?UTF-8?q?=E6=8D=AE=E5=BA=93=E4=B8=AD=E6=A3=80=E7=B4=A2=E5=9B=BE=E7=89=87?= =?UTF-8?q?=E5=B9=B6=E9=9B=86=E6=88=90=E5=88=B0chat-robot?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app/service/search_image_with_text/service.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/app/service/search_image_with_text/service.py b/app/service/search_image_with_text/service.py index 5ac9cef..98f6ac4 100644 --- a/app/service/search_image_with_text/service.py +++ b/app/service/search_image_with_text/service.py @@ -17,8 +17,7 @@ client = chromadb.Client(Settings(is_persistent=True, persist_directory="/vector # client = chromadb.Client(Settings(is_persistent=True, persist_directory="./service/search_image_with_text/vector_db")) # client = chromadb.Client(Settings(is_persistent=True, persist_directory="D:/workspace/AiDLab/vector_db")) # 创建集合 -# embedding_fn = OllamaEmbeddingFunction(url="http://localhost:11434/api/embeddings", model_name="mxbai-embed-large") -embedding_fn = OllamaEmbeddingFunction(url="http://10.1.1.240:11434/api/embeddings", model_name="mxbai-embed-large") +embedding_fn = OllamaEmbeddingFunction(url="http://localhost:11434/api/embeddings", model_name="mxbai-embed-large") def create_collection(): From 31d7f55402d789a884bc7d0094fc31631869f6a2 Mon Sep 17 00:00:00 2001 From: zhouchengrong Date: Tue, 29 Oct 2024 16:58:37 +0800 Subject: [PATCH 05/19] =?UTF-8?q?feat=20=20=20dockerfile=20=E4=BF=AE?= =?UTF-8?q?=E6=94=B9=20fix?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- requirements.txt | Bin 1828 -> 1860 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/requirements.txt b/requirements.txt index 6c9e38f1ded86de71e2126d5c357903ea0d08a05..73507145f0a1adf6986bae737597a22a911f640e 100644 GIT binary patch delta 44 ycmZ3&cZ6@lELQnsh75)xhJ1!xhD3%Gh9rhM23rOL20aE-AU0$$-8_@En-Ku{m Date: Tue, 29 Oct 2024 17:17:30 +0800 Subject: [PATCH 06/19] =?UTF-8?q?feat=20=20=20dockerfile=20=E4=BF=AE?= =?UTF-8?q?=E6=94=B9=20fix?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app/service/search_image_with_text/service.py | 88 +++++++++---------- 1 file changed, 44 insertions(+), 44 deletions(-) diff --git a/app/service/search_image_with_text/service.py b/app/service/search_image_with_text/service.py index 98f6ac4..47a9dde 100644 --- a/app/service/search_image_with_text/service.py +++ b/app/service/search_image_with_text/service.py @@ -7,10 +7,10 @@ from chromadb.utils.embedding_functions.ollama_embedding_function import OllamaE from tqdm import tqdm # 读取 csv 文件 -csv_file_path = r'D:/Files/csv/output/output.csv' -image_path = r'D:/images-clean' +# csv_file_path = r'D:/Files/csv/output/output.csv' +# image_path = r'D:/images-clean' -df = pd.read_csv(csv_file_path, encoding='Windows-1252') +# df = pd.read_csv(csv_file_path, encoding='Windows-1252') # 创建 Chroma 客户端 client = chromadb.Client(Settings(is_persistent=True, persist_directory="/vector_db")) @@ -20,47 +20,47 @@ client = chromadb.Client(Settings(is_persistent=True, persist_directory="/vector embedding_fn = OllamaEmbeddingFunction(url="http://localhost:11434/api/embeddings", model_name="mxbai-embed-large") -def create_collection(): - collection = client.get_or_create_collection("sub_sketches_description", embedding_function=embedding_fn) - - # 存储数据,包括自定义属性 - images_description = [] - images_metadata = [] - ids = [] - batch_size = 41666 # 最大批量大小 - for index, row in tqdm(df.iterrows()): - # 将图片的md5作为id - with open(image_path + row['path'], 'rb') as f: - image_data = f.read() - md5_value = hashlib.md5(image_data).hexdigest() - ids.append(md5_value) - images_description.append(row['description']) - images_metadata.append({ - "gender": row['gender'], - "path": row['path'] - }) - - # 将数据添加到集合 - # 每达到 batch_size 就执行一次 upsert - if len(ids) >= batch_size: - collection.upsert( - ids=list(ids), - documents=images_description, - metadatas=images_metadata # 添加自定义属性 - ) - # 清空列表以准备下一批数据 - ids.clear() - images_description.clear() - images_metadata.clear() - - if ids: - collection.upsert( - ids=list(ids), - documents=images_description, - metadatas=images_metadata # 添加自定义属性 - ) - - print("Data successfully stored in the vector database.") +# def create_collection(): +# collection = client.get_or_create_collection("sub_sketches_description", embedding_function=embedding_fn) +# +# # 存储数据,包括自定义属性 +# images_description = [] +# images_metadata = [] +# ids = [] +# batch_size = 41666 # 最大批量大小 +# for index, row in tqdm(df.iterrows()): +# # 将图片的md5作为id +# with open(image_path + row['path'], 'rb') as f: +# image_data = f.read() +# md5_value = hashlib.md5(image_data).hexdigest() +# ids.append(md5_value) +# images_description.append(row['description']) +# images_metadata.append({ +# "gender": row['gender'], +# "path": row['path'] +# }) +# +# # 将数据添加到集合 +# # 每达到 batch_size 就执行一次 upsert +# if len(ids) >= batch_size: +# collection.upsert( +# ids=list(ids), +# documents=images_description, +# metadatas=images_metadata # 添加自定义属性 +# ) +# # 清空列表以准备下一批数据 +# ids.clear() +# images_description.clear() +# images_metadata.clear() +# +# if ids: +# collection.upsert( +# ids=list(ids), +# documents=images_description, +# metadatas=images_metadata # 添加自定义属性 +# ) +# +# print("Data successfully stored in the vector database.") def query(gender, content): From 55cd1b27bdb6d1e39b46a882243eb5c65cc0c299 Mon Sep 17 00:00:00 2001 From: zhouchengrong Date: Tue, 29 Oct 2024 17:27:05 +0800 Subject: [PATCH 07/19] feat fix 1 --- app/api/api_query_image.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/app/api/api_query_image.py b/app/api/api_query_image.py index d27c67b..ca0dbe6 100644 --- a/app/api/api_query_image.py +++ b/app/api/api_query_image.py @@ -1,8 +1,7 @@ import json import logging -from http.client import HTTPException -from fastapi import APIRouter +from fastapi import APIRouter, HTTPException from app.schemas.query_image import QueryImageModel from app.schemas.response_template import ResponseModel From be7e12103395166b80eb5f56977b3283e9673846 Mon Sep 17 00:00:00 2001 From: zhouchengrong Date: Tue, 29 Oct 2024 17:44:10 +0800 Subject: [PATCH 08/19] feat fix 1 --- app/service/search_image_with_text/service.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/app/service/search_image_with_text/service.py b/app/service/search_image_with_text/service.py index 47a9dde..36a86a8 100644 --- a/app/service/search_image_with_text/service.py +++ b/app/service/search_image_with_text/service.py @@ -17,7 +17,8 @@ client = chromadb.Client(Settings(is_persistent=True, persist_directory="/vector # client = chromadb.Client(Settings(is_persistent=True, persist_directory="./service/search_image_with_text/vector_db")) # client = chromadb.Client(Settings(is_persistent=True, persist_directory="D:/workspace/AiDLab/vector_db")) # 创建集合 -embedding_fn = OllamaEmbeddingFunction(url="http://localhost:11434/api/embeddings", model_name="mxbai-embed-large") +# embedding_fn = OllamaEmbeddingFunction(url="http://localhost:11434/api/embeddings", model_name="mxbai-embed-large") +embedding_fn = OllamaEmbeddingFunction(url="http://10.1.1.240:11434/api/embeddings", model_name="mxbai-embed-large") # def create_collection(): From aa0db5006d7dab9be273f3db6943d1fc39846053 Mon Sep 17 00:00:00 2001 From: zhouchengrong Date: Fri, 8 Nov 2024 14:35:23 +0800 Subject: [PATCH 09/19] =?UTF-8?q?feat=20=20OLLAMA=5FURL=20=E5=88=87?= =?UTF-8?q?=E6=8D=A2=E5=88=B0A6000=20fix?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app/service/search_image_with_text/service.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/app/service/search_image_with_text/service.py b/app/service/search_image_with_text/service.py index 36a86a8..edd4d93 100644 --- a/app/service/search_image_with_text/service.py +++ b/app/service/search_image_with_text/service.py @@ -6,6 +6,8 @@ from chromadb.config import Settings from chromadb.utils.embedding_functions.ollama_embedding_function import OllamaEmbeddingFunction from tqdm import tqdm +from app.core.config import OLLAMA_URL + # 读取 csv 文件 # csv_file_path = r'D:/Files/csv/output/output.csv' # image_path = r'D:/images-clean' @@ -18,7 +20,7 @@ client = chromadb.Client(Settings(is_persistent=True, persist_directory="/vector # client = chromadb.Client(Settings(is_persistent=True, persist_directory="D:/workspace/AiDLab/vector_db")) # 创建集合 # embedding_fn = OllamaEmbeddingFunction(url="http://localhost:11434/api/embeddings", model_name="mxbai-embed-large") -embedding_fn = OllamaEmbeddingFunction(url="http://10.1.1.240:11434/api/embeddings", model_name="mxbai-embed-large") +embedding_fn = OllamaEmbeddingFunction(url=OLLAMA_URL, model_name="mxbai-embed-large") # def create_collection(): @@ -67,7 +69,7 @@ embedding_fn = OllamaEmbeddingFunction(url="http://10.1.1.240:11434/api/embeddin def query(gender, content): collection = client.get_collection("sub_sketches_description", embedding_function=embedding_fn) # 6. 查询相似内容 - user_gender = gender # 用户输入的性别 + user_gender = gender.lower() # 用户输入的性别 user_content = content # 用户输入的内容 results = collection.query( From 1a12ed2201eaeebed4d8fdf8190bb8deee7ec7c9 Mon Sep 17 00:00:00 2001 From: zhouchengrong Date: Mon, 2 Dec 2024 23:17:59 +0800 Subject: [PATCH 10/19] =?UTF-8?q?feat=20=20rabbitt=20env=20=E5=88=87?= =?UTF-8?q?=E6=8D=A2=E5=88=B0=E7=94=9F=E4=BA=A7=20fix?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app/core/config.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/app/core/config.py b/app/core/config.py index 7629429..2575c97 100644 --- a/app/core/config.py +++ b/app/core/config.py @@ -30,8 +30,8 @@ else: CATEGORY_PATH = "app/service/attribute/config/descriptor/category/category_dis.csv" SEG_CACHE_PATH = "/seg_cache/" -# RABBITMQ_ENV = "" # 生产环境 -RABBITMQ_ENV = "-dev" # 开发环境 +RABBITMQ_ENV = "" # 生产环境 +# RABBITMQ_ENV = "-dev" # 开发环境 # RABBITMQ_ENV = "-local" # 本地测试环境 settings = Settings() From f6c166b24d02747f2a19f092d9a6869affe898b8 Mon Sep 17 00:00:00 2001 From: zhouchengrong Date: Mon, 2 Dec 2024 23:21:22 +0800 Subject: [PATCH 11/19] =?UTF-8?q?Revert=20"feat=20=20rabbitt=20env=20?= =?UTF-8?q?=E5=88=87=E6=8D=A2=E5=88=B0=E7=94=9F=E4=BA=A7"?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 1a12ed2201eaeebed4d8fdf8190bb8deee7ec7c9. --- app/core/config.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/app/core/config.py b/app/core/config.py index 2575c97..7629429 100644 --- a/app/core/config.py +++ b/app/core/config.py @@ -30,8 +30,8 @@ else: CATEGORY_PATH = "app/service/attribute/config/descriptor/category/category_dis.csv" SEG_CACHE_PATH = "/seg_cache/" -RABBITMQ_ENV = "" # 生产环境 -# RABBITMQ_ENV = "-dev" # 开发环境 +# RABBITMQ_ENV = "" # 生产环境 +RABBITMQ_ENV = "-dev" # 开发环境 # RABBITMQ_ENV = "-local" # 本地测试环境 settings = Settings() From 7b7dad636c39feb137e687332ce16c2d4964a919 Mon Sep 17 00:00:00 2001 From: zhouchengrong Date: Mon, 2 Dec 2024 23:21:30 +0800 Subject: [PATCH 12/19] =?UTF-8?q?Revert=20"feat=20=20OLLAMA=5FURL=20?= =?UTF-8?q?=E5=88=87=E6=8D=A2=E5=88=B0A6000"?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit aa0db5006d7dab9be273f3db6943d1fc39846053. --- app/service/search_image_with_text/service.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/app/service/search_image_with_text/service.py b/app/service/search_image_with_text/service.py index edd4d93..36a86a8 100644 --- a/app/service/search_image_with_text/service.py +++ b/app/service/search_image_with_text/service.py @@ -6,8 +6,6 @@ from chromadb.config import Settings from chromadb.utils.embedding_functions.ollama_embedding_function import OllamaEmbeddingFunction from tqdm import tqdm -from app.core.config import OLLAMA_URL - # 读取 csv 文件 # csv_file_path = r'D:/Files/csv/output/output.csv' # image_path = r'D:/images-clean' @@ -20,7 +18,7 @@ client = chromadb.Client(Settings(is_persistent=True, persist_directory="/vector # client = chromadb.Client(Settings(is_persistent=True, persist_directory="D:/workspace/AiDLab/vector_db")) # 创建集合 # embedding_fn = OllamaEmbeddingFunction(url="http://localhost:11434/api/embeddings", model_name="mxbai-embed-large") -embedding_fn = OllamaEmbeddingFunction(url=OLLAMA_URL, model_name="mxbai-embed-large") +embedding_fn = OllamaEmbeddingFunction(url="http://10.1.1.240:11434/api/embeddings", model_name="mxbai-embed-large") # def create_collection(): @@ -69,7 +67,7 @@ embedding_fn = OllamaEmbeddingFunction(url=OLLAMA_URL, model_name="mxbai-embed-l def query(gender, content): collection = client.get_collection("sub_sketches_description", embedding_function=embedding_fn) # 6. 查询相似内容 - user_gender = gender.lower() # 用户输入的性别 + user_gender = gender # 用户输入的性别 user_content = content # 用户输入的内容 results = collection.query( From bb0ac9046b8bf228114b0fc94d91ef5a43c7a456 Mon Sep 17 00:00:00 2001 From: zhouchengrong Date: Mon, 2 Dec 2024 23:21:45 +0800 Subject: [PATCH 13/19] Revert "feat" This reverts commit be7e12103395166b80eb5f56977b3283e9673846. --- app/service/search_image_with_text/service.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/app/service/search_image_with_text/service.py b/app/service/search_image_with_text/service.py index 36a86a8..47a9dde 100644 --- a/app/service/search_image_with_text/service.py +++ b/app/service/search_image_with_text/service.py @@ -17,8 +17,7 @@ client = chromadb.Client(Settings(is_persistent=True, persist_directory="/vector # client = chromadb.Client(Settings(is_persistent=True, persist_directory="./service/search_image_with_text/vector_db")) # client = chromadb.Client(Settings(is_persistent=True, persist_directory="D:/workspace/AiDLab/vector_db")) # 创建集合 -# embedding_fn = OllamaEmbeddingFunction(url="http://localhost:11434/api/embeddings", model_name="mxbai-embed-large") -embedding_fn = OllamaEmbeddingFunction(url="http://10.1.1.240:11434/api/embeddings", model_name="mxbai-embed-large") +embedding_fn = OllamaEmbeddingFunction(url="http://localhost:11434/api/embeddings", model_name="mxbai-embed-large") # def create_collection(): From 85145ba2c9a54cc4f8899aaedfb853f5978d2249 Mon Sep 17 00:00:00 2001 From: zhouchengrong Date: Mon, 2 Dec 2024 23:21:46 +0800 Subject: [PATCH 14/19] Revert "feat" This reverts commit 55cd1b27bdb6d1e39b46a882243eb5c65cc0c299. --- app/api/api_query_image.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/app/api/api_query_image.py b/app/api/api_query_image.py index ca0dbe6..d27c67b 100644 --- a/app/api/api_query_image.py +++ b/app/api/api_query_image.py @@ -1,7 +1,8 @@ import json import logging +from http.client import HTTPException -from fastapi import APIRouter, HTTPException +from fastapi import APIRouter from app.schemas.query_image import QueryImageModel from app.schemas.response_template import ResponseModel From e134453976d519b9cc443865e8b1cdc9a3200816 Mon Sep 17 00:00:00 2001 From: zhouchengrong Date: Mon, 2 Dec 2024 23:21:48 +0800 Subject: [PATCH 15/19] =?UTF-8?q?Revert=20"feat=20=20=20dockerfile=20?= =?UTF-8?q?=E4=BF=AE=E6=94=B9"?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 1ba67d0bf72c400a405437ee67591ab5750a4d9a. --- app/service/search_image_with_text/service.py | 88 +++++++++---------- 1 file changed, 44 insertions(+), 44 deletions(-) diff --git a/app/service/search_image_with_text/service.py b/app/service/search_image_with_text/service.py index 47a9dde..98f6ac4 100644 --- a/app/service/search_image_with_text/service.py +++ b/app/service/search_image_with_text/service.py @@ -7,10 +7,10 @@ from chromadb.utils.embedding_functions.ollama_embedding_function import OllamaE from tqdm import tqdm # 读取 csv 文件 -# csv_file_path = r'D:/Files/csv/output/output.csv' -# image_path = r'D:/images-clean' +csv_file_path = r'D:/Files/csv/output/output.csv' +image_path = r'D:/images-clean' -# df = pd.read_csv(csv_file_path, encoding='Windows-1252') +df = pd.read_csv(csv_file_path, encoding='Windows-1252') # 创建 Chroma 客户端 client = chromadb.Client(Settings(is_persistent=True, persist_directory="/vector_db")) @@ -20,47 +20,47 @@ client = chromadb.Client(Settings(is_persistent=True, persist_directory="/vector embedding_fn = OllamaEmbeddingFunction(url="http://localhost:11434/api/embeddings", model_name="mxbai-embed-large") -# def create_collection(): -# collection = client.get_or_create_collection("sub_sketches_description", embedding_function=embedding_fn) -# -# # 存储数据,包括自定义属性 -# images_description = [] -# images_metadata = [] -# ids = [] -# batch_size = 41666 # 最大批量大小 -# for index, row in tqdm(df.iterrows()): -# # 将图片的md5作为id -# with open(image_path + row['path'], 'rb') as f: -# image_data = f.read() -# md5_value = hashlib.md5(image_data).hexdigest() -# ids.append(md5_value) -# images_description.append(row['description']) -# images_metadata.append({ -# "gender": row['gender'], -# "path": row['path'] -# }) -# -# # 将数据添加到集合 -# # 每达到 batch_size 就执行一次 upsert -# if len(ids) >= batch_size: -# collection.upsert( -# ids=list(ids), -# documents=images_description, -# metadatas=images_metadata # 添加自定义属性 -# ) -# # 清空列表以准备下一批数据 -# ids.clear() -# images_description.clear() -# images_metadata.clear() -# -# if ids: -# collection.upsert( -# ids=list(ids), -# documents=images_description, -# metadatas=images_metadata # 添加自定义属性 -# ) -# -# print("Data successfully stored in the vector database.") +def create_collection(): + collection = client.get_or_create_collection("sub_sketches_description", embedding_function=embedding_fn) + + # 存储数据,包括自定义属性 + images_description = [] + images_metadata = [] + ids = [] + batch_size = 41666 # 最大批量大小 + for index, row in tqdm(df.iterrows()): + # 将图片的md5作为id + with open(image_path + row['path'], 'rb') as f: + image_data = f.read() + md5_value = hashlib.md5(image_data).hexdigest() + ids.append(md5_value) + images_description.append(row['description']) + images_metadata.append({ + "gender": row['gender'], + "path": row['path'] + }) + + # 将数据添加到集合 + # 每达到 batch_size 就执行一次 upsert + if len(ids) >= batch_size: + collection.upsert( + ids=list(ids), + documents=images_description, + metadatas=images_metadata # 添加自定义属性 + ) + # 清空列表以准备下一批数据 + ids.clear() + images_description.clear() + images_metadata.clear() + + if ids: + collection.upsert( + ids=list(ids), + documents=images_description, + metadatas=images_metadata # 添加自定义属性 + ) + + print("Data successfully stored in the vector database.") def query(gender, content): From e097e485b82771f6948b8e36abb072c8b81b2b09 Mon Sep 17 00:00:00 2001 From: zhouchengrong Date: Mon, 2 Dec 2024 23:21:49 +0800 Subject: [PATCH 16/19] =?UTF-8?q?Revert=20"feat=20=20=20dockerfile=20?= =?UTF-8?q?=E4=BF=AE=E6=94=B9"?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 31d7f55402d789a884bc7d0094fc31631869f6a2. --- requirements.txt | Bin 1860 -> 1828 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/requirements.txt b/requirements.txt index 73507145f0a1adf6986bae737597a22a911f640e..6c9e38f1ded86de71e2126d5c357903ea0d08a05 100644 GIT binary patch delta 12 TcmX@Yw}fxQEY{7tSi2YjA{zwJ delta 44 ycmZ3&cZ6@lELQnsh75)xhJ1!xhD3%Gh9rhM23rOL20aE-AU0$$-8_@En-Ku{m Date: Mon, 2 Dec 2024 23:21:51 +0800 Subject: [PATCH 17/19] =?UTF-8?q?Revert=20"=E4=BB=8E=E5=90=91=E9=87=8F?= =?UTF-8?q?=E6=95=B0=E6=8D=AE=E5=BA=93=E4=B8=AD=E6=A3=80=E7=B4=A2=E5=9B=BE?= =?UTF-8?q?=E7=89=87=E5=B9=B6=E9=9B=86=E6=88=90=E5=88=B0chat-robot"?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 6a62d0844694e4cee921fccc5526e4cd77a8ca85. --- app/service/search_image_with_text/service.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/app/service/search_image_with_text/service.py b/app/service/search_image_with_text/service.py index 98f6ac4..5ac9cef 100644 --- a/app/service/search_image_with_text/service.py +++ b/app/service/search_image_with_text/service.py @@ -17,7 +17,8 @@ client = chromadb.Client(Settings(is_persistent=True, persist_directory="/vector # client = chromadb.Client(Settings(is_persistent=True, persist_directory="./service/search_image_with_text/vector_db")) # client = chromadb.Client(Settings(is_persistent=True, persist_directory="D:/workspace/AiDLab/vector_db")) # 创建集合 -embedding_fn = OllamaEmbeddingFunction(url="http://localhost:11434/api/embeddings", model_name="mxbai-embed-large") +# embedding_fn = OllamaEmbeddingFunction(url="http://localhost:11434/api/embeddings", model_name="mxbai-embed-large") +embedding_fn = OllamaEmbeddingFunction(url="http://10.1.1.240:11434/api/embeddings", model_name="mxbai-embed-large") def create_collection(): From 68c95eec0c0480415982f77e59310ee578354bd2 Mon Sep 17 00:00:00 2001 From: zhouchengrong Date: Mon, 2 Dec 2024 23:21:53 +0800 Subject: [PATCH 18/19] Revert "feat" This reverts commit 5812c3eaaba61e8c1f4514a968c9c294b3b1867e. --- app/service/search_image_with_text/service.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/app/service/search_image_with_text/service.py b/app/service/search_image_with_text/service.py index 5ac9cef..98f6ac4 100644 --- a/app/service/search_image_with_text/service.py +++ b/app/service/search_image_with_text/service.py @@ -17,8 +17,7 @@ client = chromadb.Client(Settings(is_persistent=True, persist_directory="/vector # client = chromadb.Client(Settings(is_persistent=True, persist_directory="./service/search_image_with_text/vector_db")) # client = chromadb.Client(Settings(is_persistent=True, persist_directory="D:/workspace/AiDLab/vector_db")) # 创建集合 -# embedding_fn = OllamaEmbeddingFunction(url="http://localhost:11434/api/embeddings", model_name="mxbai-embed-large") -embedding_fn = OllamaEmbeddingFunction(url="http://10.1.1.240:11434/api/embeddings", model_name="mxbai-embed-large") +embedding_fn = OllamaEmbeddingFunction(url="http://localhost:11434/api/embeddings", model_name="mxbai-embed-large") def create_collection(): From 2102b712300f89bb4b2820ae415b08da0aeecc20 Mon Sep 17 00:00:00 2001 From: zhouchengrong Date: Mon, 2 Dec 2024 23:26:19 +0800 Subject: [PATCH 19/19] =?UTF-8?q?feat=20=20=E4=BF=AE=E5=A4=8Dchatroboot=20?= =?UTF-8?q?fix?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app/service/search_image_with_text/service.py | 90 +++++++++---------- 1 file changed, 45 insertions(+), 45 deletions(-) diff --git a/app/service/search_image_with_text/service.py b/app/service/search_image_with_text/service.py index 98f6ac4..712050f 100644 --- a/app/service/search_image_with_text/service.py +++ b/app/service/search_image_with_text/service.py @@ -7,60 +7,60 @@ from chromadb.utils.embedding_functions.ollama_embedding_function import OllamaE from tqdm import tqdm # 读取 csv 文件 -csv_file_path = r'D:/Files/csv/output/output.csv' -image_path = r'D:/images-clean' +# csv_file_path = r'D:/Files/csv/output/output.csv' +# image_path = r'D:/images-clean' -df = pd.read_csv(csv_file_path, encoding='Windows-1252') +# df = pd.read_csv(csv_file_path, encoding='Windows-1252') # 创建 Chroma 客户端 client = chromadb.Client(Settings(is_persistent=True, persist_directory="/vector_db")) # client = chromadb.Client(Settings(is_persistent=True, persist_directory="./service/search_image_with_text/vector_db")) # client = chromadb.Client(Settings(is_persistent=True, persist_directory="D:/workspace/AiDLab/vector_db")) # 创建集合 -embedding_fn = OllamaEmbeddingFunction(url="http://localhost:11434/api/embeddings", model_name="mxbai-embed-large") +embedding_fn = OllamaEmbeddingFunction(url="http://10.1.1.240:11434/api/embeddings", model_name="mxbai-embed-large") -def create_collection(): - collection = client.get_or_create_collection("sub_sketches_description", embedding_function=embedding_fn) - - # 存储数据,包括自定义属性 - images_description = [] - images_metadata = [] - ids = [] - batch_size = 41666 # 最大批量大小 - for index, row in tqdm(df.iterrows()): - # 将图片的md5作为id - with open(image_path + row['path'], 'rb') as f: - image_data = f.read() - md5_value = hashlib.md5(image_data).hexdigest() - ids.append(md5_value) - images_description.append(row['description']) - images_metadata.append({ - "gender": row['gender'], - "path": row['path'] - }) - - # 将数据添加到集合 - # 每达到 batch_size 就执行一次 upsert - if len(ids) >= batch_size: - collection.upsert( - ids=list(ids), - documents=images_description, - metadatas=images_metadata # 添加自定义属性 - ) - # 清空列表以准备下一批数据 - ids.clear() - images_description.clear() - images_metadata.clear() - - if ids: - collection.upsert( - ids=list(ids), - documents=images_description, - metadatas=images_metadata # 添加自定义属性 - ) - - print("Data successfully stored in the vector database.") +# def create_collection(): +# collection = client.get_or_create_collection("sub_sketches_description", embedding_function=embedding_fn) +# +# # 存储数据,包括自定义属性 +# images_description = [] +# images_metadata = [] +# ids = [] +# batch_size = 41666 # 最大批量大小 +# for index, row in tqdm(df.iterrows()): +# # 将图片的md5作为id +# with open(image_path + row['path'], 'rb') as f: +# image_data = f.read() +# md5_value = hashlib.md5(image_data).hexdigest() +# ids.append(md5_value) +# images_description.append(row['description']) +# images_metadata.append({ +# "gender": row['gender'], +# "path": row['path'] +# }) +# +# # 将数据添加到集合 +# # 每达到 batch_size 就执行一次 upsert +# if len(ids) >= batch_size: +# collection.upsert( +# ids=list(ids), +# documents=images_description, +# metadatas=images_metadata # 添加自定义属性 +# ) +# # 清空列表以准备下一批数据 +# ids.clear() +# images_description.clear() +# images_metadata.clear() +# +# if ids: +# collection.upsert( +# ids=list(ids), +# documents=images_description, +# metadatas=images_metadata # 添加自定义属性 +# ) +# +# print("Data successfully stored in the vector database.") def query(gender, content):