Files
AiDA_Python/app/service/generate_image/utils/image_processing.py
2024-04-29 10:49:38 +08:00

348 lines
12 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import logging
import mmcv
import numpy as np
import torch
import tritonclient.http as httpclient
import torch.nn.functional as F
from app.core.config import *
import cv2
logger = logging.getLogger()
def seg_preprocess(img_path):
img = mmcv.imread(img_path)
ori_shape = img.shape[:2]
img_scale = (224, 224)
scale_factor = []
img, x, y = mmcv.imresize(img, img_scale, return_scale=True)
scale_factor.append(x)
scale_factor.append(y)
img = mmcv.imnormalize(img, mean=np.array([123.675, 116.28, 103.53]), std=np.array([58.395, 57.12, 57.375]), to_rgb=True)
preprocessed_img = np.expand_dims(img.transpose(2, 0, 1), axis=0)
return preprocessed_img, ori_shape
def get_mask(image_obj):
pre_mask = None
if len(image_obj.shape) == 2:
image_obj = cv2.cvtColor(image_obj, cv2.COLOR_GRAY2RGB)
if image_obj.shape[2] == 4: # 如果是四通道 mask
pre_mask = image_obj[:, :, 3]
image_obj = image_obj[:, :, :3]
Contour = get_contours(image_obj)
Mask = np.zeros(image_obj.shape[:2], np.uint8)
if len(Contour):
Max_contour = Contour[0]
Epsilon = 0.001 * cv2.arcLength(Max_contour, True)
Approx = cv2.approxPolyDP(Max_contour, Epsilon, True)
cv2.drawContours(Mask, [Approx], -1, 255, -1)
else:
Mask = np.ones(image_obj.shape[:2], np.uint8) * 255
if pre_mask is None:
mask = Mask
else:
mask = cv2.bitwise_and(Mask, pre_mask)
return image_obj, mask
def get_contours(image):
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
Edge = cv2.Canny(gray, 10, 150)
kernel = np.ones((5, 5), np.uint8)
Edge = cv2.dilate(Edge, kernel=kernel, iterations=1)
Edge = cv2.erode(Edge, kernel=kernel, iterations=1)
Contour, _ = cv2.findContours(Edge, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
Contour = sorted(Contour, key=cv2.contourArea, reverse=True)
return Contour
def seg_infer_image(image_obj):
image, ori_shape = seg_preprocess(image_obj)
client = httpclient.InferenceServerClient(url=f"{SEG_MODEL_URL}")
transformed_img = image.astype(np.float32)
# 输入集
inputs = [
httpclient.InferInput(SEGMENTATION['input'], transformed_img.shape, datatype="FP32")
]
inputs[0].set_data_from_numpy(transformed_img, binary_data=True)
# 输出集
outputs = [
httpclient.InferRequestedOutput(SEGMENTATION['output'], binary_data=True),
]
results = client.infer(model_name=SEGMENTATION['name'], inputs=inputs, outputs=outputs)
# 推理
# 取结果
inference_output1 = torch.from_numpy(results.as_numpy(SEGMENTATION['output']))
seg_result = seg_postprocess(inference_output1, ori_shape)
return seg_result
def seg_postprocess(output, ori_shape):
seg_logit = F.interpolate(output, size=ori_shape, scale_factor=None, mode='bilinear', align_corners=False)
seg_logit = F.softmax(seg_logit, dim=1)
seg_pred = seg_logit.argmax(dim=1)
seg_pred = seg_pred.cpu().numpy()
return seg_pred
def remove_background(image):
image_obj, mask = get_mask(image)
seg_result = seg_infer_image(image_obj)
temp_front = seg_result == 1
front_mask = (mask * (temp_front + 0).astype(np.uint8))
temp_back = seg_result == 2
back_mask = (mask * (temp_back + 0).astype(np.uint8))
if len(front_mask.shape) > 2:
front_mask = front_mask[0]
else:
front_mask = front_mask
if len(back_mask.shape) > 2:
back_mask = back_mask[0]
else:
back_mask = back_mask
result_mask = front_mask + back_mask
white_background = np.ones_like(image_obj) * 255
remove_bg_image = np.where(result_mask[:, :, None].astype(bool), image_obj, white_background)
# cv2.imwrite("source_image", image)
# cv2.imwrite("remove_bg_image", remove_bg_image)
return remove_bg_image
def bounding_box(image):
edges = cv2.Canny(image, 50, 150)
# 查找轮廓
contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# 初始化包围所有外接矩形的大矩形的坐标
x_min, y_min, x_max, y_max = float('inf'), float('inf'), -1, -1
# 遍历所有外接矩形,更新大矩形的坐标
for contour in contours:
x, y, w, h = cv2.boundingRect(contour)
x_min = min(x_min, x)
y_min = min(y_min, y)
x_max = max(x_max, x + w)
y_max = max(y_max, y + h)
# 根据大矩形的坐标来裁剪原始图像
result_image = image[y_min:y_max, x_min:x_max]
# cv2.imshow("result_image", result_image)
# cv2.waitKey(0)
return result_image
def stain_detection(image, spot_size=100):
height, width, _ = image.shape
corners = [
image[0:spot_size, 0:spot_size], # top left
image[0:spot_size, width - spot_size:width], # top right
# image[height - spot_size:height, 0:spot_size], # bottom left
# image[height - spot_size:height, width - spot_size:width] # bottom right
]
for index, corner in enumerate(corners):
num_white_pixels = (corner == [255, 255, 255]).all(axis=2).sum()
if num_white_pixels != spot_size * spot_size:
logger.info(f"{index + 1}发现了污点")
return False, None
# 中心区域检测
# 将图像转换为灰度图像
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# 获取图像的中心点坐标
center_x, center_y = image.shape[1] // 2, image.shape[0] // 2
# 定义中心区域的大小
patch_size = 100
half_patch = patch_size // 2
# 提取中心区域
center_patch = gray[center_y - half_patch:center_y + half_patch, center_x - half_patch:center_x + half_patch]
# 设置阈值来检测纯白区域
_, thresh = cv2.threshold(center_patch, 254, 255, cv2.THRESH_BINARY)
# 寻找轮廓
contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# 过滤非连续的纯白区域
filtered_contours = [cnt for cnt in contours if cv2.contourArea(cnt) >= 50] # 根据面积进行过滤这里假设面积大于50的为连续区域
# 如果有连续的纯白区域存在
if filtered_contours:
# 将纯白区域替换为灰色
if DEBUG:
for cnt in filtered_contours:
x, y, w, h = cv2.boundingRect(cnt)
# 在原始图像上进行替换
image[y + center_y - half_patch:y + center_y - half_patch + h, x + center_x - half_patch:x + center_x - half_patch + w][thresh[y:y + h, x:x + w] == 255] = (128, 128, 128)
# 显示图像
cv2.imshow('Marked Image', image)
cv2.waitKey(0)
logger.info("中心区域存在连续的纯白区域")
is_pure_white = True
else:
logger.info("中心区域不存在连续的纯白区域")
is_pure_white = False
if is_pure_white:
return False, None
if DEBUG:
for corner_coords in [
(0, 0),
# (0, width - spot_size),
(height - spot_size, 0),
# (height - spot_size, width - spot_size)
# 中心点
]:
cv2.rectangle(image, corner_coords, (corner_coords[0] + spot_size, corner_coords[1] + spot_size), (0, 0, 255), 2)
cv2.rectangle(image, (center_x - spot_size // 2, center_y - spot_size // 2), (center_x + spot_size // 2, center_y + spot_size // 2), (0, 255, 0), 2) # 在原始图像上绘制矩形框
return True, image
def generate_category_recognition(image, gender):
def preprocess(img):
img = mmcv.imread(img)
# ori_shape = img.shape[:2]
img_scale = (224, 224)
scale_factor = []
img, x, y = mmcv.imresize(img, img_scale, return_scale=True)
scale_factor.append(x)
scale_factor.append(y)
img = mmcv.imnormalize(img, mean=np.array([123.675, 116.28, 103.53]), std=np.array([58.395, 57.12, 57.375]), to_rgb=True)
preprocessed_img = np.expand_dims(img.transpose(2, 0, 1), axis=0)
return preprocessed_img
preprocessed_img = preprocess(image)
triton_client = httpclient.InferenceServerClient(url=ATT_TRITON_URL)
inputs = [
httpclient.InferInput("input__0", preprocessed_img.shape, datatype="FP32")
]
inputs[0].set_data_from_numpy(preprocessed_img, binary_data=True)
results = triton_client.infer(model_name="attr_retrieve_category", inputs=inputs)
inference_output = torch.from_numpy(results.as_numpy(f'output__0'))
scores = inference_output.detach().numpy()
import pandas as pd
attr_type = pd.read_csv(CATEGORY_PATH)
colattr = list(attr_type['labelName'])
task = attr_type['taskName'][0]
maxsc = np.max(scores[0][:5])
indexs = np.argwhere(scores == maxsc)[:, 1]
category = colattr[indexs[0]]
if gender == "Male":
if category == 'Trousers' or category == 'Skirt':
category = 'Bottoms'
elif category == 'Blouse' or category == 'Dress':
category = 'Tops'
else:
category = 'Outwear'
return category, scores, image
def autoLevels(img, cutoff=0.1):
channels = img.shape[2] # h,w,ch
table = np.zeros((1, 256, 3), np.uint8)
for ch in range(channels):
# cutoff=0.1, 计算 0.1%, 99.9% 分位的灰度值
low = np.percentile(img[:, :, ch], q=cutoff) # ch 通道, cutoff=0.1, 0.1 分位的灰度值
high = np.percentile(img[:, :, ch], q=100 - cutoff) # 99.9 分位的灰度值, [0, high] 占比99.9%
# 输入动态线性拉伸
Sin = min(max(low, 0), high - 2) # Sin, 黑场阈值, 0<=Sin<Hin
Hin = min(high, 255) # Hin, 白场阈值, Sin<Hin<=255
difIn = Hin - Sin
V1 = np.array([(min(max(255 * (i - Sin) / difIn, 0), 255)) for i in range(256)])
# 灰场伽马调节
gradMed = np.median(img[:, :, ch]) # 拉伸前的中值
Mt = V1[int(gradMed)] / 128. # 拉伸后的映射值
V2 = 255 * np.power(V1 / 255, 1 / Mt) # 伽马调节
# 输出线性拉伸
Sout, Hout = 5, 250 # Sout 输出黑场阈值, Hout 输出白场阈值
difOut = Hout - Sout
table[0, :, ch] = np.array([(min(max(Sout + difOut * V2[i] / 255, 0), 255)) for i in range(256)])
return cv2.LUT(img, table)
def luminance_adjust(alpha, img):
if alpha > 0:
img_out = img * (1 - alpha) + alpha * 255.0
else:
img_out = img * (1 + alpha)
return np.array(img_out, dtype='uint8')
# 14.14 Photoshop 自动色阶调整算法
def face_detect_pic(image):
# 1、转灰度图
gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
# cv2.imshow("gray", gray)
# 2、训练一组人脸
face_detector = cv2.CascadeClassifier(FACE_CLASSIFIER)
# 3、检测人脸用灰度图检测返回人脸矩形坐标(4个角)
faces_rect = face_detector.detectMultiScale(gray, 1.05, 3)
if DEBUG:
dst = image.copy()
for x, y, w, h in faces_rect:
cv2.rectangle(dst, (x, y), (x + w, y + h), (0, 0, 255), 3) # 画出矩形框
cv2.imshow("", dst)
cv2.waitKey(0)
return len(faces_rect)
if __name__ == '__main__':
# Photoshop 自动色阶调整算法
img = cv2.imread("2.png", flags=1) # 读取彩色图像
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # 转换为灰度图像
print("cutoff={}, minG={}, maxG={}".format(0.0, gray.min(), gray.min()))
# 色阶手动调整
# equManual = levelsAdjust(img, 63, 205, 0.8, 10, 245) # 手动调节
# 色阶自动调整
cutoff = 1.0 # 截断比例, 建议范围 [0.0,1.0]
# cv2.imwrite("source.png", img)
equAuto = autoLevels(img, cutoff)
# cv2.imwrite("levels.png", equAuto)
luminance = luminance_adjust(0.3, equAuto)
# cv2.imwrite("luminance.png", luminance)
#
# # 将图像转换为灰度
# gray = cv2.cvtColor(luminance, cv2.COLOR_BGR2GRAY)
#
# # 使用Canny边缘检测算法检测图像的边缘
# edges = cv2.Canny(gray, 150, 200)
#
# # 对边缘进行膨胀操作,增强轮廓
# kernel = np.ones((1, 1), np.uint8)
# dilated_edges = cv2.dilate(edges, kernel, iterations=1)
#
# # 创建一个与原始图像相同大小的空白图像
# # result = np.zeros_like(luminance)
#
# # 将增强后的轮廓叠加到原始图像上
# luminance[dilated_edges != 0] = (255, 255, 255)
remove_bg_img = remove_background(luminance)
# cv2.imwrite("remove_bg_img.png", remove_bg_img)
print(1)
cv2.imshow("source", img)
cv2.imshow("levels", equAuto)
cv2.imshow("luminance", luminance)
# cv2.imshow("dilated_edges", luminance)
cv2.imshow("remove_bg_img", remove_bg_img)
cv2.waitKey(0)
image = cv2.imread("1.png")
remove_background(image)