Files
sora_python/app/api/api_process_lookbooks.py
2024-10-21 11:01:28 +08:00

56 lines
2.0 KiB
Python

import logging
import os
import shutil
from typing import List
import tqdm
from fastapi import UploadFile, File, APIRouter
from app.service.lookbooks.service import create_image_batch_requests
logger = logging.getLogger()
router = APIRouter()
@router.post("/process_lookbooks/")
async def process_lookbooks(files: List[UploadFile] = File(...)):
lookbook_dir = "service/lookbooks/temp_lookbooks"
os.makedirs(lookbook_dir, exist_ok=True)
lookbook_list = []
for file in files:
file_path = os.path.join(lookbook_dir, file.filename)
with open(file_path, "wb") as f:
shutil.copyfileobj(file.file, f)
lookbook_list.append(file_path)
image_list = []
for look_book_path in tqdm.tqdm(lookbook_list):
lookbook_name = os.path.splitext(os.path.basename(look_book_path))[0]
output_dir = os.path.join("app/service/lookbooks/fashion_documents/lookbook/images", lookbook_name)
os.makedirs(output_dir, exist_ok=True)
if not os.listdir(output_dir):
from unstructured.partition.pdf import partition_pdf
partition_pdf(
filename=look_book_path,
extract_images_in_pdf=True,
infer_table_structure=False,
chunking_strategy="by_title",
max_characters=4000,
new_after_n_chars=3800,
combine_text_under_n_chars=2000,
extract_image_block_output_dir=output_dir,
)
else:
current_images = os.listdir(output_dir)
image_list.extend([os.path.join(output_dir, x) for x in current_images])
image_description_results_file = create_image_batch_requests(image_list, "app/service/lookbooks/fashion_documents/lookbook/results")
shutil.rmtree(lookbook_dir)
if image_description_results_file:
return {"message": "Lookbooks processed successfully", "result_file": image_description_results_file}
else:
return {"message": "No new images to process"}