diff --git a/README.md b/README.md index b9cf8df..c85dafc 100644 --- a/README.md +++ b/README.md @@ -22,13 +22,13 @@ Refacer has been thoroughly tested on the following operating systems: | Operating System | CPU Support | GPU Support | | ---------------- | ----------- | ----------- | -| MacOSX | ✅ | ❌ | +| MacOSX | ✅ | :warning: | | Windows | ✅ | ✅ | | Linux | ✅ | ✅ | -The application is compatible with both CPU and GPU (Nvidia CUDA) environments, with the exception of MacOSX which does not currently support GPU (CoreML) usage. +The application is compatible with both CPU and GPU (Nvidia CUDA) environments, and MacOSX(CoreML) -Please note, we do not recommend using `onnxruntime-silicon` on MacOSX due to an apparent issue with memory management. If you manage to compile `onnxruntime` for Silicon, the program is prepared to use CoreML. +:warning: Please note, we do not recommend using `onnxruntime-silicon` on MacOSX due to an apparent issue with memory management. If you manage to compile `onnxruntime` for Silicon, the program is prepared to use CoreML. ## Installation @@ -59,6 +59,11 @@ Follow these steps to install Refacer: * For GPU (compatible with Windows and Linux only, requires a NVIDIA GPU with CUDA and its libraries): ```bash pip install -r requirements-GPU.txt + ``` + + * For CoreML (compatible with MacOSX, requires Silicon architecture): + ```bash + pip install -r requirements-COREML.txt ``` For more information on installing the CUDA necessary to use `onnxruntime-gpu`, please refer directly to the official [ONNX Runtime repository](https://github.com/microsoft/onnxruntime/). diff --git a/app.py b/app.py index 845558a..692532d 100644 --- a/app.py +++ b/app.py @@ -1,20 +1,25 @@ import gradio as gr from refacer import Refacer +import argparse -MAX_NUM_OF_FACES=8 +parser = argparse.ArgumentParser(description='Refacer') +parser.add_argument("--max_num_faces", help="Max number of faces on UI", default=5) +parser.add_argument("--force_cpu", help="Force CPU mode", default=False,action="store_true") +parser.add_argument("--share_gradio", help="Share Gradio", default=False,action="store_true") +args = parser.parse_args() -refacer = Refacer() +refacer = Refacer(force_cpu=args.force_cpu) -n=MAX_NUM_OF_FACES +num_faces=args.max_num_faces def run(*vars): video_path=vars[0] - origins=vars[1:(n+1)] - destinations=vars[(n+1):(n*2)+1] - thresholds=vars[(n*2)+1:] + origins=vars[1:(num_faces+1)] + destinations=vars[(num_faces+1):(num_faces*2)+1] + thresholds=vars[(num_faces*2)+1:] faces = [] - for k in range(0,n): + for k in range(0,num_faces): if origins[k] is not None and destinations[k] is not None: faces.append({ 'origin':origins[k], @@ -35,7 +40,7 @@ with gr.Blocks() as demo: video=gr.Video(label="Original video") video2=gr.Video(label="Refaced video",interactive=False) - for i in range(0,MAX_NUM_OF_FACES): + for i in range(0,num_faces): with gr.Tab(f"Face #{i+1}"): with gr.Row(): origin.append(gr.Image(label="Face to replace")) @@ -48,4 +53,4 @@ with gr.Blocks() as demo: button.click(fn=run,inputs=[video]+origin+destination+thresholds,outputs=[video2]) #demo.launch(share=True,server_name="0.0.0.0", show_error=True) -demo.queue().launch(show_error=True,share=True) \ No newline at end of file +demo.queue().launch(show_error=True,share=args.share_gradio) \ No newline at end of file diff --git a/refacer.py b/refacer.py index 5079324..df855f1 100644 --- a/refacer.py +++ b/refacer.py @@ -1,6 +1,5 @@ import cv2 -import insightface -import onnxruntime +import onnxruntime as rt import sys from insightface.app import FaceAnalysis sys.path.insert(1, './recognition') @@ -14,25 +13,63 @@ import ffmpeg import random import multiprocessing as mp from concurrent.futures import ThreadPoolExecutor +from insightface.model_zoo.inswapper import INSwapper +import psutil +from enum import Enum +from insightface.app.common import Face +from insightface.utils.storage import ensure_available + +class RefacerMode(Enum): + CPU, CUDA, COREML = range(1, 4) class Refacer: + def __init__(self,force_cpu=False): + self.force_cpu = force_cpu + self.__check_providers() + self.total_mem = psutil.virtual_memory().total + self.__init_apps() - def __init__(self): - onnxruntime.set_default_logger_severity(4) + def __check_providers(self): + if self.force_cpu : + self.providers = ['CPUExecutionProvider'] + else: + self.providers = rt.get_available_providers() + rt.set_default_logger_severity(4) + self.sess_options = rt.SessionOptions() + self.sess_options.execution_mode = rt.ExecutionMode.ORT_SEQUENTIAL + self.sess_options.graph_optimization_level = rt.GraphOptimizationLevel.ORT_ENABLE_ALL - self.face_app = FaceAnalysis(name='buffalo_l') - self.face_app.prepare(ctx_id=0, det_size=(640, 640)) - - assets_dir = osp.expanduser('~/.insightface/models/buffalo_l') + if len(self.providers) == 1 and 'CPUExecutionProvider' in self.providers: + self.mode = RefacerMode.CPU + self.use_num_cpus = mp.cpu_count()-1 + self.sess_options.intra_op_num_threads = int(self.use_num_cpus/2) + print(f"CPU mode with providers {self.providers}") + elif 'CoreMLExecutionProvider' in self.providers: + self.mode = RefacerMode.COREML + self.use_num_cpus = mp.cpu_count()-1 + print(f"CoreML mode with providers {self.providers}") + self.sess_options.intra_op_num_threads = int(self.use_num_cpus/2) + elif 'CUDAExecutionProvider' in self.providers: + self.mode = RefacerMode.CUDA + self.use_num_cpus = 1 + self.sess_options.intra_op_num_threads = 1 + print(f"CUDA mode with providers {self.providers}") + def __init_apps(self): + assets_dir = ensure_available('models', 'buffalo_l', root='~/.insightface') - self.face_detector = SCRFD(os.path.join(assets_dir, 'det_10g.onnx')) - self.face_detector.prepare(0) + model_path = os.path.join(assets_dir, 'det_10g.onnx') + sess_face = rt.InferenceSession(model_path, self.sess_options, providers=self.providers) + self.face_detector = SCRFD(model_path,sess_face) + self.face_detector.prepare(0,input_size=(640, 640)) - model_path = os.path.join(assets_dir, 'w600k_r50.onnx') - self.rec_app = ArcFaceONNX(model_path) + model_path = os.path.join(assets_dir , 'w600k_r50.onnx') + sess_rec = rt.InferenceSession(model_path, self.sess_options, providers=self.providers) + self.rec_app = ArcFaceONNX(model_path,sess_rec) self.rec_app.prepare(0) - self.face_swapper = insightface.model_zoo.get_model('inswapper_128.onnx', download=True, download_zip=True, providers=['CoreMLExecutionProvider','CUDAExecutionProvider']) + model_path = 'inswapper_128.onnx' + sess_swap = rt.InferenceSession(model_path, self.sess_options, providers=self.providers) + self.face_swapper = INSwapper(model_path,sess_swap) def __prepare_faces(self, faces): replacements=[] @@ -43,7 +80,7 @@ class Refacer: raise Exception('No face detected on "Face to replace" image') feat_original = self.rec_app.get(face['origin'], kpss1[0]) #image2 = cv2.imread(face.destination) - _faces = self.face_app.get(face['destination'],max_num=1) + _faces = self.__get_faces(face['destination'],max_num=1) if len(_faces)<1: raise Exception('No face detected on "Destination face" image') replacements.append((feat_original,_faces[0],face['threshold'])) @@ -57,9 +94,26 @@ class Refacer: out = ffmpeg.output(in1.video, in2.audio, new_path,vcodec="libx264") out.run() return new_path + + def __get_faces(self,frame,max_num=0): + bboxes, kpss = self.face_detector.detect(frame,max_num=max_num,metric='default') + + if bboxes.shape[0] == 0: + return [] + ret = [] + for i in range(bboxes.shape[0]): + bbox = bboxes[i, 0:4] + det_score = bboxes[i, 4] + kps = None + if kpss is not None: + kps = kpss[i] + face = Face(bbox=bbox, kps=kps, det_score=det_score) + face.embedding = self.rec_app.get(frame, kps) + ret.append(face) + return ret def __process_faces(self,frame): - faces = self.face_app.get(frame) + faces = self.__get_faces(frame) for face in faces: for rep_face in self.replacement_faces: sim = self.rec_app.compute_sim(rep_face[0], face.embedding) @@ -67,7 +121,7 @@ class Refacer: frame = self.face_swapper.get(frame, face, rep_face[1], paste_back=True) return frame - def reface(self, video_path, faces): + def reface(self, video_path, faces): output_video_path = os.path.join('out',Path(video_path).name) self.replacement_faces=self.__prepare_faces(faces) @@ -87,6 +141,7 @@ class Refacer: output = cv2.VideoWriter(output_video_path, fourcc, fps, (frame_width, frame_height)) frames=[] + self.k = 1 with tqdm(total=total_frames,desc="Extracting frames") as pbar: while cap.isOpened(): flag, frame = cap.read() @@ -98,12 +153,10 @@ class Refacer: cap.release() pbar.close() - with ThreadPoolExecutor(max_workers = mp.cpu_count()-1) as executor: + with ThreadPoolExecutor(max_workers = self.use_num_cpus) as executor: results = list(tqdm(executor.map(self.__process_faces, frames), total=len(frames),desc="Processing frames")) for result in results: output.write(result) output.release() - return self.__convert_video(video_path,output_video_path) - - \ No newline at end of file + return self.__convert_video(video_path,output_video_path) \ No newline at end of file diff --git a/requirements-COREML.txt b/requirements-COREML.txt new file mode 100644 index 0000000..c21e541 --- /dev/null +++ b/requirements-COREML.txt @@ -0,0 +1,11 @@ +ffmpeg_python==0.2.0 +gradio==3.33.1 +insightface==0.7.3 +numpy==1.24.3 +onnx==1.14.0 +onnxruntime-sillicon +opencv_python==4.7.0.72 +opencv_python_headless==4.7.0.72 +scikit-image==0.20.0 +tqdm +psutil \ No newline at end of file diff --git a/requirements-GPU.txt b/requirements-GPU.txt index 34bca8f..88c8e34 100644 --- a/requirements-GPU.txt +++ b/requirements-GPU.txt @@ -7,4 +7,5 @@ onnxruntime_gpu==1.15.0 opencv_python==4.7.0.72 opencv_python_headless==4.7.0.72 scikit-image==0.20.0 -tqdm \ No newline at end of file +tqdm +psutil \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 1cee65d..13c543b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,3 +8,4 @@ opencv_python==4.7.0.72 opencv_python_headless==4.7.0.72 scikit-image==0.20.0 tqdm +psutil \ No newline at end of file