Fix GPU detection on Apple Silicon

This commit is contained in:
Felipe Daragon
2025-05-14 02:05:34 +01:00
parent a33d858916
commit 59b6233882
3 changed files with 33 additions and 40 deletions

View File

@@ -64,13 +64,11 @@ NeoRefacer has been tested on the following operating systems:
| Operating System | CPU Support | GPU Support |
| ---------------- | ----------- | ----------- |
| MacOSX | ✅ | :warning: |
| MacOSX | ✅ | |
| Windows | ✅ | ✅ |
| Linux | ✅ | ✅ |
The application is compatible with both CPU and GPU (Nvidia CUDA) environments, and MacOSX(CoreML)
:warning: Please note, we do not recommend using `onnxruntime-silicon` on MacOSX due to an apparent issue with memory management. If you manage to compile `onnxruntime` for Silicon, the program is prepared to use CoreML.
The application is compatible with both CPU and GPU (Nvidia CUDA) environments, and MacOSX (CoreML)
## Installation
@@ -82,9 +80,12 @@ Follow these steps to install Refacer and its dependencies:
```bash
# Check if ffmpeg is available (if not, you might to download it and add it to your PATH)
ffmpeg
# Windows: download ffmpeg-git-essentials.7z from https://www.gyan.dev/ffmpeg/builds/
# Other systems: see a tutorial https://www.hostinger.com/tutorials/how-to-install-ffmpeg
ffmpeg
# MacOS: if you have brew installed:
# brew install ffmpeg
# Other systems: see a tutorial https://www.hostinger.com/tutorials/how-to-install-ffmpeg
# Clone the repository
git clone https://github.com/MechasAI/NeoRefacer.git

View File

@@ -112,56 +112,48 @@ class Refacer:
raise Exception("ERROR, something went wrong downloading the model!")
def __check_providers(self):
available_providers = rt.get_available_providers()
if self.force_cpu:
self.providers = ['CPUExecutionProvider']
else:
self.providers = ['CUDAExecutionProvider', 'CPUExecutionProvider']
# Prefer faster execution providers in order
self.providers = []
for p in ['CoreMLExecutionProvider', 'CUDAExecutionProvider', 'CPUExecutionProvider']:
if p in available_providers:
self.providers.append(p)
rt.set_default_logger_severity(4)
self.sess_options = rt.SessionOptions()
self.sess_options.execution_mode = rt.ExecutionMode.ORT_PARALLEL # Better parallelism
self.sess_options.graph_optimization_level = rt.GraphOptimizationLevel.ORT_ENABLE_ALL
def __check_providers(self):
if self.force_cpu:
self.providers = ['CPUExecutionProvider']
else:
self.providers = ['CUDAExecutionProvider', 'CPUExecutionProvider']
rt.set_default_logger_severity(4)
self.sess_options = rt.SessionOptions()
self.sess_options.execution_mode = rt.ExecutionMode.ORT_PARALLEL # Better parallelism
self.sess_options.execution_mode = rt.ExecutionMode.ORT_PARALLEL
self.sess_options.graph_optimization_level = rt.GraphOptimizationLevel.ORT_ENABLE_ALL
# Use a temporary model session to detect the actual active provider
test_model = os.path.expanduser("~/.insightface/models/buffalo_l/det_10g.onnx")
try:
test_session = rt.InferenceSession(test_model, self.sess_options, providers=self.providers)
active_provider = test_session.get_providers()[0] # First provider used
test_session = rt.InferenceSession(test_model, self.sess_options, providers=self.providers)
active_provider = test_session.get_providers()[0]
except Exception as e:
print(f"[ERROR] Failed to create test session: {e}")
active_provider = 'CPUExecutionProvider' # Safe fallback
print(f"[ERROR] Failed to create test session: {e}")
active_provider = 'CPUExecutionProvider'
# Set mode based on actual provider
if active_provider == 'CUDAExecutionProvider':
self.mode = RefacerMode.CUDA
self.use_num_cpus = 2
self.sess_options.intra_op_num_threads = 1
self.mode = RefacerMode.CUDA
self.use_num_cpus = 2
self.sess_options.intra_op_num_threads = 1
elif active_provider == 'CoreMLExecutionProvider':
self.mode = RefacerMode.COREML
self.use_num_cpus = max(mp.cpu_count() - 1, 1)
self.sess_options.intra_op_num_threads = int(self.use_num_cpus / 2)
# elif active_provider == 'TensorrtExecutionProvider':
self.mode = RefacerMode.COREML
self.use_num_cpus = max(mp.cpu_count() - 1, 1)
self.sess_options.intra_op_num_threads = int(self.use_num_cpus / 2)
elif self.colab_performance:
self.mode = RefacerMode.TENSORRT
self.use_num_cpus = max(mp.cpu_count() - 1, 1)
self.sess_options.intra_op_num_threads = int(self.use_num_cpus / 2)
self.mode = RefacerMode.TENSORRT
self.use_num_cpus = max(mp.cpu_count() - 1, 1)
self.sess_options.intra_op_num_threads = int(self.use_num_cpus / 2)
else:
self.mode = RefacerMode.CPU
self.use_num_cpus = max(mp.cpu_count() - 1, 1)
self.sess_options.intra_op_num_threads = int(self.use_num_cpus / 2)
self.mode = RefacerMode.CPU
self.use_num_cpus = max(mp.cpu_count() - 1, 1)
self.sess_options.intra_op_num_threads = int(self.use_num_cpus / 2)
print(f"Available providers: {available_providers}")
print(f"Using providers: {self.providers}")
print(f"Active provider: {active_provider}")
print(f"Mode: {self.mode}")

View File

@@ -5,7 +5,7 @@ gradio==5.22.0
insightface==0.7.3
numpy==1.24.3
onnx==1.14.0
onnxruntime-silicon
onnxruntime-silicon==1.16.3
opencv_python==4.7.0.72
opencv_python_headless==4.7.0.72
scikit-image==0.20.0