Added command line arguments for --share_gradio #7. Implemented multithreaded parallel processing and CoreML optimization, still pending CUDA optimization #5.

This commit is contained in:
Xavi Vinaixa
2023-06-06 07:49:07 +02:00
committed by xaviviro
parent b4146fa26d
commit cd1f6cd2df
6 changed files with 109 additions and 33 deletions

View File

@@ -22,13 +22,13 @@ Refacer has been thoroughly tested on the following operating systems:
| Operating System | CPU Support | GPU Support | | Operating System | CPU Support | GPU Support |
| ---------------- | ----------- | ----------- | | ---------------- | ----------- | ----------- |
| MacOSX | ✅ | | | MacOSX | ✅ | :warning: |
| Windows | ✅ | ✅ | | Windows | ✅ | ✅ |
| Linux | ✅ | ✅ | | Linux | ✅ | ✅ |
The application is compatible with both CPU and GPU (Nvidia CUDA) environments, with the exception of MacOSX which does not currently support GPU (CoreML) usage. The application is compatible with both CPU and GPU (Nvidia CUDA) environments, and MacOSX(CoreML)
Please note, we do not recommend using `onnxruntime-silicon` on MacOSX due to an apparent issue with memory management. If you manage to compile `onnxruntime` for Silicon, the program is prepared to use CoreML. :warning: Please note, we do not recommend using `onnxruntime-silicon` on MacOSX due to an apparent issue with memory management. If you manage to compile `onnxruntime` for Silicon, the program is prepared to use CoreML.
## Installation ## Installation
@@ -59,6 +59,11 @@ Follow these steps to install Refacer:
* For GPU (compatible with Windows and Linux only, requires a NVIDIA GPU with CUDA and its libraries): * For GPU (compatible with Windows and Linux only, requires a NVIDIA GPU with CUDA and its libraries):
```bash ```bash
pip install -r requirements-GPU.txt pip install -r requirements-GPU.txt
```
* For CoreML (compatible with MacOSX, requires Silicon architecture):
```bash
pip install -r requirements-COREML.txt
``` ```
For more information on installing the CUDA necessary to use `onnxruntime-gpu`, please refer directly to the official [ONNX Runtime repository](https://github.com/microsoft/onnxruntime/). For more information on installing the CUDA necessary to use `onnxruntime-gpu`, please refer directly to the official [ONNX Runtime repository](https://github.com/microsoft/onnxruntime/).

23
app.py
View File

@@ -1,20 +1,25 @@
import gradio as gr import gradio as gr
from refacer import Refacer from refacer import Refacer
import argparse
MAX_NUM_OF_FACES=8 parser = argparse.ArgumentParser(description='Refacer')
parser.add_argument("--max_num_faces", help="Max number of faces on UI", default=5)
parser.add_argument("--force_cpu", help="Force CPU mode", default=False,action="store_true")
parser.add_argument("--share_gradio", help="Share Gradio", default=False,action="store_true")
args = parser.parse_args()
refacer = Refacer() refacer = Refacer(force_cpu=args.force_cpu)
n=MAX_NUM_OF_FACES num_faces=args.max_num_faces
def run(*vars): def run(*vars):
video_path=vars[0] video_path=vars[0]
origins=vars[1:(n+1)] origins=vars[1:(num_faces+1)]
destinations=vars[(n+1):(n*2)+1] destinations=vars[(num_faces+1):(num_faces*2)+1]
thresholds=vars[(n*2)+1:] thresholds=vars[(num_faces*2)+1:]
faces = [] faces = []
for k in range(0,n): for k in range(0,num_faces):
if origins[k] is not None and destinations[k] is not None: if origins[k] is not None and destinations[k] is not None:
faces.append({ faces.append({
'origin':origins[k], 'origin':origins[k],
@@ -35,7 +40,7 @@ with gr.Blocks() as demo:
video=gr.Video(label="Original video") video=gr.Video(label="Original video")
video2=gr.Video(label="Refaced video",interactive=False) video2=gr.Video(label="Refaced video",interactive=False)
for i in range(0,MAX_NUM_OF_FACES): for i in range(0,num_faces):
with gr.Tab(f"Face #{i+1}"): with gr.Tab(f"Face #{i+1}"):
with gr.Row(): with gr.Row():
origin.append(gr.Image(label="Face to replace")) origin.append(gr.Image(label="Face to replace"))
@@ -48,4 +53,4 @@ with gr.Blocks() as demo:
button.click(fn=run,inputs=[video]+origin+destination+thresholds,outputs=[video2]) button.click(fn=run,inputs=[video]+origin+destination+thresholds,outputs=[video2])
#demo.launch(share=True,server_name="0.0.0.0", show_error=True) #demo.launch(share=True,server_name="0.0.0.0", show_error=True)
demo.queue().launch(show_error=True,share=True) demo.queue().launch(show_error=True,share=args.share_gradio)

View File

@@ -1,6 +1,5 @@
import cv2 import cv2
import insightface import onnxruntime as rt
import onnxruntime
import sys import sys
from insightface.app import FaceAnalysis from insightface.app import FaceAnalysis
sys.path.insert(1, './recognition') sys.path.insert(1, './recognition')
@@ -14,25 +13,63 @@ import ffmpeg
import random import random
import multiprocessing as mp import multiprocessing as mp
from concurrent.futures import ThreadPoolExecutor from concurrent.futures import ThreadPoolExecutor
from insightface.model_zoo.inswapper import INSwapper
import psutil
from enum import Enum
from insightface.app.common import Face
from insightface.utils.storage import ensure_available
class RefacerMode(Enum):
CPU, CUDA, COREML = range(1, 4)
class Refacer: class Refacer:
def __init__(self,force_cpu=False):
self.force_cpu = force_cpu
self.__check_providers()
self.total_mem = psutil.virtual_memory().total
self.__init_apps()
def __init__(self): def __check_providers(self):
onnxruntime.set_default_logger_severity(4) if self.force_cpu :
self.providers = ['CPUExecutionProvider']
else:
self.providers = rt.get_available_providers()
rt.set_default_logger_severity(4)
self.sess_options = rt.SessionOptions()
self.sess_options.execution_mode = rt.ExecutionMode.ORT_SEQUENTIAL
self.sess_options.graph_optimization_level = rt.GraphOptimizationLevel.ORT_ENABLE_ALL
self.face_app = FaceAnalysis(name='buffalo_l') if len(self.providers) == 1 and 'CPUExecutionProvider' in self.providers:
self.face_app.prepare(ctx_id=0, det_size=(640, 640)) self.mode = RefacerMode.CPU
self.use_num_cpus = mp.cpu_count()-1
assets_dir = osp.expanduser('~/.insightface/models/buffalo_l') self.sess_options.intra_op_num_threads = int(self.use_num_cpus/2)
print(f"CPU mode with providers {self.providers}")
elif 'CoreMLExecutionProvider' in self.providers:
self.mode = RefacerMode.COREML
self.use_num_cpus = mp.cpu_count()-1
print(f"CoreML mode with providers {self.providers}")
self.sess_options.intra_op_num_threads = int(self.use_num_cpus/2)
elif 'CUDAExecutionProvider' in self.providers:
self.mode = RefacerMode.CUDA
self.use_num_cpus = 1
self.sess_options.intra_op_num_threads = 1
print(f"CUDA mode with providers {self.providers}")
def __init_apps(self):
assets_dir = ensure_available('models', 'buffalo_l', root='~/.insightface')
self.face_detector = SCRFD(os.path.join(assets_dir, 'det_10g.onnx')) model_path = os.path.join(assets_dir, 'det_10g.onnx')
self.face_detector.prepare(0) sess_face = rt.InferenceSession(model_path, self.sess_options, providers=self.providers)
self.face_detector = SCRFD(model_path,sess_face)
self.face_detector.prepare(0,input_size=(640, 640))
model_path = os.path.join(assets_dir, 'w600k_r50.onnx') model_path = os.path.join(assets_dir , 'w600k_r50.onnx')
self.rec_app = ArcFaceONNX(model_path) sess_rec = rt.InferenceSession(model_path, self.sess_options, providers=self.providers)
self.rec_app = ArcFaceONNX(model_path,sess_rec)
self.rec_app.prepare(0) self.rec_app.prepare(0)
self.face_swapper = insightface.model_zoo.get_model('inswapper_128.onnx', download=True, download_zip=True, providers=['CoreMLExecutionProvider','CUDAExecutionProvider']) model_path = 'inswapper_128.onnx'
sess_swap = rt.InferenceSession(model_path, self.sess_options, providers=self.providers)
self.face_swapper = INSwapper(model_path,sess_swap)
def __prepare_faces(self, faces): def __prepare_faces(self, faces):
replacements=[] replacements=[]
@@ -43,7 +80,7 @@ class Refacer:
raise Exception('No face detected on "Face to replace" image') raise Exception('No face detected on "Face to replace" image')
feat_original = self.rec_app.get(face['origin'], kpss1[0]) feat_original = self.rec_app.get(face['origin'], kpss1[0])
#image2 = cv2.imread(face.destination) #image2 = cv2.imread(face.destination)
_faces = self.face_app.get(face['destination'],max_num=1) _faces = self.__get_faces(face['destination'],max_num=1)
if len(_faces)<1: if len(_faces)<1:
raise Exception('No face detected on "Destination face" image') raise Exception('No face detected on "Destination face" image')
replacements.append((feat_original,_faces[0],face['threshold'])) replacements.append((feat_original,_faces[0],face['threshold']))
@@ -57,9 +94,26 @@ class Refacer:
out = ffmpeg.output(in1.video, in2.audio, new_path,vcodec="libx264") out = ffmpeg.output(in1.video, in2.audio, new_path,vcodec="libx264")
out.run() out.run()
return new_path return new_path
def __get_faces(self,frame,max_num=0):
bboxes, kpss = self.face_detector.detect(frame,max_num=max_num,metric='default')
if bboxes.shape[0] == 0:
return []
ret = []
for i in range(bboxes.shape[0]):
bbox = bboxes[i, 0:4]
det_score = bboxes[i, 4]
kps = None
if kpss is not None:
kps = kpss[i]
face = Face(bbox=bbox, kps=kps, det_score=det_score)
face.embedding = self.rec_app.get(frame, kps)
ret.append(face)
return ret
def __process_faces(self,frame): def __process_faces(self,frame):
faces = self.face_app.get(frame) faces = self.__get_faces(frame)
for face in faces: for face in faces:
for rep_face in self.replacement_faces: for rep_face in self.replacement_faces:
sim = self.rec_app.compute_sim(rep_face[0], face.embedding) sim = self.rec_app.compute_sim(rep_face[0], face.embedding)
@@ -67,7 +121,7 @@ class Refacer:
frame = self.face_swapper.get(frame, face, rep_face[1], paste_back=True) frame = self.face_swapper.get(frame, face, rep_face[1], paste_back=True)
return frame return frame
def reface(self, video_path, faces): def reface(self, video_path, faces):
output_video_path = os.path.join('out',Path(video_path).name) output_video_path = os.path.join('out',Path(video_path).name)
self.replacement_faces=self.__prepare_faces(faces) self.replacement_faces=self.__prepare_faces(faces)
@@ -87,6 +141,7 @@ class Refacer:
output = cv2.VideoWriter(output_video_path, fourcc, fps, (frame_width, frame_height)) output = cv2.VideoWriter(output_video_path, fourcc, fps, (frame_width, frame_height))
frames=[] frames=[]
self.k = 1
with tqdm(total=total_frames,desc="Extracting frames") as pbar: with tqdm(total=total_frames,desc="Extracting frames") as pbar:
while cap.isOpened(): while cap.isOpened():
flag, frame = cap.read() flag, frame = cap.read()
@@ -98,12 +153,10 @@ class Refacer:
cap.release() cap.release()
pbar.close() pbar.close()
with ThreadPoolExecutor(max_workers = mp.cpu_count()-1) as executor: with ThreadPoolExecutor(max_workers = self.use_num_cpus) as executor:
results = list(tqdm(executor.map(self.__process_faces, frames), total=len(frames),desc="Processing frames")) results = list(tqdm(executor.map(self.__process_faces, frames), total=len(frames),desc="Processing frames"))
for result in results: for result in results:
output.write(result) output.write(result)
output.release() output.release()
return self.__convert_video(video_path,output_video_path) return self.__convert_video(video_path,output_video_path)

11
requirements-COREML.txt Normal file
View File

@@ -0,0 +1,11 @@
ffmpeg_python==0.2.0
gradio==3.33.1
insightface==0.7.3
numpy==1.24.3
onnx==1.14.0
onnxruntime-sillicon
opencv_python==4.7.0.72
opencv_python_headless==4.7.0.72
scikit-image==0.20.0
tqdm
psutil

View File

@@ -7,4 +7,5 @@ onnxruntime_gpu==1.15.0
opencv_python==4.7.0.72 opencv_python==4.7.0.72
opencv_python_headless==4.7.0.72 opencv_python_headless==4.7.0.72
scikit-image==0.20.0 scikit-image==0.20.0
tqdm tqdm
psutil

View File

@@ -8,3 +8,4 @@ opencv_python==4.7.0.72
opencv_python_headless==4.7.0.72 opencv_python_headless==4.7.0.72
scikit-image==0.20.0 scikit-image==0.20.0
tqdm tqdm
psutil