Fix GPU detection on Apple Silicon

2025-05-14 02:05:34 +01:00
parent a33d858916
commit 59b6233882
3 changed files with 33 additions and 40 deletions
--- a/README.md
+++ b/README.md
@@ -64,13 +64,11 @@ NeoRefacer has been tested on the following operating systems:
 | Operating System | CPU Support | GPU Support |
 | ---------------- | ----------- | ----------- |
-| MacOSX           | ✅         | :warning:         |
+| MacOSX           | ✅         | ✅         |
 | Windows          | ✅         | ✅         |
 | Linux            | ✅         | ✅         |
-The application is compatible with both CPU and GPU (Nvidia CUDA) environments, and MacOSX(CoreML) 
+The application is compatible with both CPU and GPU (Nvidia CUDA) environments, and MacOSX (CoreML) 
 :warning: Please note, we do not recommend using `onnxruntime-silicon` on MacOSX due to an apparent issue with memory management. If you manage to compile `onnxruntime` for Silicon, the program is prepared to use CoreML.
 ## Installation
@@ -82,10 +80,13 @@ Follow these steps to install Refacer and its dependencies:
 ```bash
    # Check if ffmpeg is available (if not, you might to download it and add it to your PATH)
    # Windows: download ffmpeg-git-essentials.7z from https://www.gyan.dev/ffmpeg/builds/
    # Other systems: see a tutorial https://www.hostinger.com/tutorials/how-to-install-ffmpeg
    ffmpeg  
    # Windows: download ffmpeg-git-essentials.7z from https://www.gyan.dev/ffmpeg/builds/
    # MacOS: if you have brew installed:
    # brew install ffmpeg
    # Other systems: see a tutorial https://www.hostinger.com/tutorials/how-to-install-ffmpeg 
    # Clone the repository
    git clone https://github.com/MechasAI/NeoRefacer.git
    cd NeoRefacer
--- a/refacer.py
+++ b/refacer.py
@@ -112,56 +112,48 @@ class Refacer:
            raise Exception("ERROR, something went wrong downloading the model!")
    def __check_providers(self):
        available_providers = rt.get_available_providers()
        if self.force_cpu:
            self.providers = ['CPUExecutionProvider']
        else:
-            self.providers = ['CUDAExecutionProvider', 'CPUExecutionProvider']
+            # Prefer faster execution providers in order
            self.providers = []
            for p in ['CoreMLExecutionProvider', 'CUDAExecutionProvider', 'CPUExecutionProvider']:
                if p in available_providers:
                    self.providers.append(p)
        rt.set_default_logger_severity(4)
        self.sess_options = rt.SessionOptions()
-        self.sess_options.execution_mode = rt.ExecutionMode.ORT_PARALLEL  # Better parallelism
+        self.sess_options.execution_mode = rt.ExecutionMode.ORT_PARALLEL
        self.sess_options.graph_optimization_level = rt.GraphOptimizationLevel.ORT_ENABLE_ALL
        def __check_providers(self):
            if self.force_cpu:
                self.providers = ['CPUExecutionProvider']
            else:
                self.providers = ['CUDAExecutionProvider', 'CPUExecutionProvider']
        rt.set_default_logger_severity(4)
        self.sess_options = rt.SessionOptions()
        self.sess_options.execution_mode = rt.ExecutionMode.ORT_PARALLEL  # Better parallelism
        self.sess_options.graph_optimization_level = rt.GraphOptimizationLevel.ORT_ENABLE_ALL
        # Use a temporary model session to detect the actual active provider
        test_model = os.path.expanduser("~/.insightface/models/buffalo_l/det_10g.onnx")
        try:
-          test_session = rt.InferenceSession(test_model, self.sess_options, providers=self.providers)
+            test_session = rt.InferenceSession(test_model, self.sess_options, providers=self.providers)
-          active_provider = test_session.get_providers()[0]  # First provider used
+            active_provider = test_session.get_providers()[0]
        except Exception as e:
-           print(f"[ERROR] Failed to create test session: {e}")
+            print(f"[ERROR] Failed to create test session: {e}")
-           active_provider = 'CPUExecutionProvider'  # Safe fallback
+            active_provider = 'CPUExecutionProvider'
         # Set mode based on actual provider
        if active_provider == 'CUDAExecutionProvider':
-           self.mode = RefacerMode.CUDA
+            self.mode = RefacerMode.CUDA
-           self.use_num_cpus = 2
+            self.use_num_cpus = 2
-           self.sess_options.intra_op_num_threads = 1
+            self.sess_options.intra_op_num_threads = 1
        elif active_provider == 'CoreMLExecutionProvider':
-           self.mode = RefacerMode.COREML
+            self.mode = RefacerMode.COREML
-           self.use_num_cpus = max(mp.cpu_count() - 1, 1)
+            self.use_num_cpus = max(mp.cpu_count() - 1, 1)
-           self.sess_options.intra_op_num_threads = int(self.use_num_cpus / 2)
+            self.sess_options.intra_op_num_threads = int(self.use_num_cpus / 2)
        # elif active_provider == 'TensorrtExecutionProvider':
        elif self.colab_performance:
-           self.mode = RefacerMode.TENSORRT
+            self.mode = RefacerMode.TENSORRT
-           self.use_num_cpus = max(mp.cpu_count() - 1, 1)
+            self.use_num_cpus = max(mp.cpu_count() - 1, 1)
-           self.sess_options.intra_op_num_threads = int(self.use_num_cpus / 2)
+            self.sess_options.intra_op_num_threads = int(self.use_num_cpus / 2)
        else:
-           self.mode = RefacerMode.CPU
+            self.mode = RefacerMode.CPU
-           self.use_num_cpus = max(mp.cpu_count() - 1, 1)
+            self.use_num_cpus = max(mp.cpu_count() - 1, 1)
-           self.sess_options.intra_op_num_threads = int(self.use_num_cpus / 2)
+            self.sess_options.intra_op_num_threads = int(self.use_num_cpus / 2)
        print(f"Available providers: {available_providers}")
        print(f"Using providers: {self.providers}")
        print(f"Active provider: {active_provider}")
        print(f"Mode: {self.mode}")
--- a/requirements-COREML.txt
+++ b/requirements-COREML.txt
@@ -5,7 +5,7 @@ gradio==5.22.0
 insightface==0.7.3
 numpy==1.24.3
 onnx==1.14.0
-onnxruntime-silicon
+onnxruntime-silicon==1.16.3
 opencv_python==4.7.0.72
 opencv_python_headless==4.7.0.72
 scikit-image==0.20.0