Fix GPU detection on Apple Silicon

2025-05-14 02:05:34 +01:00
parent a33d858916
commit 59b6233882
3 changed files with 33 additions and 40 deletions
--- a/README.md
+++ b/README.md
@@ -64,13 +64,11 @@ NeoRefacer has been tested on the following operating systems:

 | Operating System | CPU Support | GPU Support |
 | ---------------- | ----------- | ----------- |
-| MacOSX           | ✅         | :warning:         |
+| MacOSX           | ✅         | ✅         |
 | Windows          | ✅         | ✅         |
 | Linux            | ✅         | ✅         |

-The application is compatible with both CPU and GPU (Nvidia CUDA) environments, and MacOSX(CoreML) 
-
-:warning: Please note, we do not recommend using `onnxruntime-silicon` on MacOSX due to an apparent issue with memory management. If you manage to compile `onnxruntime` for Silicon, the program is prepared to use CoreML.
+The application is compatible with both CPU and GPU (Nvidia CUDA) environments, and MacOSX (CoreML) 

 ## Installation

@@ -82,9 +80,12 @@ Follow these steps to install Refacer and its dependencies:

 ```bash
    # Check if ffmpeg is available (if not, you might to download it and add it to your PATH)
+    ffmpeg  
+    
    # Windows: download ffmpeg-git-essentials.7z from https://www.gyan.dev/ffmpeg/builds/
-    # Other systems: see a tutorial https://www.hostinger.com/tutorials/how-to-install-ffmpeg
-    ffmpeg    
+    # MacOS: if you have brew installed:
+    # brew install ffmpeg
+    # Other systems: see a tutorial https://www.hostinger.com/tutorials/how-to-install-ffmpeg 

    # Clone the repository
    git clone https://github.com/MechasAI/NeoRefacer.git
--- a/refacer.py
+++ b/refacer.py
@@ -112,56 +112,48 @@ class Refacer:
            raise Exception("ERROR, something went wrong downloading the model!")

    def __check_providers(self):
+        available_providers = rt.get_available_providers()
+
        if self.force_cpu:
            self.providers = ['CPUExecutionProvider']
        else:
-            self.providers = ['CUDAExecutionProvider', 'CPUExecutionProvider']
+            # Prefer faster execution providers in order
+            self.providers = []
+            for p in ['CoreMLExecutionProvider', 'CUDAExecutionProvider', 'CPUExecutionProvider']:
+                if p in available_providers:
+                    self.providers.append(p)

        rt.set_default_logger_severity(4)
        self.sess_options = rt.SessionOptions()
-        self.sess_options.execution_mode = rt.ExecutionMode.ORT_PARALLEL  # Better parallelism
-        self.sess_options.graph_optimization_level = rt.GraphOptimizationLevel.ORT_ENABLE_ALL
-        
-        def __check_providers(self):
-            if self.force_cpu:
-                self.providers = ['CPUExecutionProvider']
-            else:
-                self.providers = ['CUDAExecutionProvider', 'CPUExecutionProvider']
-
-        rt.set_default_logger_severity(4)
-
-        self.sess_options = rt.SessionOptions()
-        self.sess_options.execution_mode = rt.ExecutionMode.ORT_PARALLEL  # Better parallelism
+        self.sess_options.execution_mode = rt.ExecutionMode.ORT_PARALLEL
        self.sess_options.graph_optimization_level = rt.GraphOptimizationLevel.ORT_ENABLE_ALL

-        # Use a temporary model session to detect the actual active provider
        test_model = os.path.expanduser("~/.insightface/models/buffalo_l/det_10g.onnx")
        try:
-          test_session = rt.InferenceSession(test_model, self.sess_options, providers=self.providers)
-          active_provider = test_session.get_providers()[0]  # First provider used
+            test_session = rt.InferenceSession(test_model, self.sess_options, providers=self.providers)
+            active_provider = test_session.get_providers()[0]
        except Exception as e:
-           print(f"[ERROR] Failed to create test session: {e}")
-           active_provider = 'CPUExecutionProvider'  # Safe fallback
+            print(f"[ERROR] Failed to create test session: {e}")
+            active_provider = 'CPUExecutionProvider'

-         # Set mode based on actual provider
        if active_provider == 'CUDAExecutionProvider':
-           self.mode = RefacerMode.CUDA
-           self.use_num_cpus = 2
-           self.sess_options.intra_op_num_threads = 1
+            self.mode = RefacerMode.CUDA
+            self.use_num_cpus = 2
+            self.sess_options.intra_op_num_threads = 1
        elif active_provider == 'CoreMLExecutionProvider':
-           self.mode = RefacerMode.COREML
-           self.use_num_cpus = max(mp.cpu_count() - 1, 1)
-           self.sess_options.intra_op_num_threads = int(self.use_num_cpus / 2)
-        # elif active_provider == 'TensorrtExecutionProvider':
+            self.mode = RefacerMode.COREML
+            self.use_num_cpus = max(mp.cpu_count() - 1, 1)
+            self.sess_options.intra_op_num_threads = int(self.use_num_cpus / 2)
        elif self.colab_performance:
-           self.mode = RefacerMode.TENSORRT
-           self.use_num_cpus = max(mp.cpu_count() - 1, 1)
-           self.sess_options.intra_op_num_threads = int(self.use_num_cpus / 2)
+            self.mode = RefacerMode.TENSORRT
+            self.use_num_cpus = max(mp.cpu_count() - 1, 1)
+            self.sess_options.intra_op_num_threads = int(self.use_num_cpus / 2)
        else:
-           self.mode = RefacerMode.CPU
-           self.use_num_cpus = max(mp.cpu_count() - 1, 1)
-           self.sess_options.intra_op_num_threads = int(self.use_num_cpus / 2)
+            self.mode = RefacerMode.CPU
+            self.use_num_cpus = max(mp.cpu_count() - 1, 1)
+            self.sess_options.intra_op_num_threads = int(self.use_num_cpus / 2)

+        print(f"Available providers: {available_providers}")
        print(f"Using providers: {self.providers}")
        print(f"Active provider: {active_provider}")
        print(f"Mode: {self.mode}")
--- a/requirements-COREML.txt
+++ b/requirements-COREML.txt
@@ -5,7 +5,7 @@ gradio==5.22.0
 insightface==0.7.3
 numpy==1.24.3
 onnx==1.14.0
-onnxruntime-silicon
+onnxruntime-silicon==1.16.3
 opencv_python==4.7.0.72
 opencv_python_headless==4.7.0.72
 scikit-image==0.20.0