@@ -40,6 +40,7 @@ class BackendSpec:
4040 model_suffix : str # file extension/dir to look for cached model
4141 half : bool = True # use FP16
4242 extra_export_args : dict = field (default_factory = dict )
43+ compute_units : Optional [str ] = None # CoreML compute units: "cpu_and_ne", "all", etc.
4344
4445
4546BACKEND_SPECS = {
@@ -61,6 +62,7 @@ class BackendSpec:
6162 model_suffix = ".mlpackage" ,
6263 half = True ,
6364 extra_export_args = {"nms" : False },
65+ compute_units = "cpu_and_ne" , # Route to Neural Engine, leave GPU free for LLM/VLM
6466 ),
6567 "intel" : BackendSpec (
6668 name = "intel" ,
@@ -86,6 +88,7 @@ class HardwareEnv:
8688 backend : str = "cpu" # "cuda" | "rocm" | "mps" | "intel" | "cpu"
8789 device : str = "cpu" # torch device string
8890 export_format : str = "onnx" # optimal export format
91+ compute_units : str = "all" # CoreML compute units (Apple only)
8992 gpu_name : str = "" # human-readable GPU name
9093 gpu_memory_mb : int = 0 # GPU memory in MB
9194 driver_version : str = "" # GPU driver version
@@ -113,9 +116,11 @@ def detect() -> "HardwareEnv":
113116 else :
114117 env ._fallback_cpu ()
115118
116- # Set export format from backend spec
119+ # Set export format and compute units from backend spec
117120 spec = BACKEND_SPECS .get (env .backend , BACKEND_SPECS ["cpu" ])
118121 env .export_format = spec .export_format
122+ if spec .compute_units :
123+ env .compute_units = spec .compute_units
119124
120125 # Check if optimized runtime is available
121126 env .framework_ok = env ._check_framework ()
@@ -439,6 +444,58 @@ def export_model(self, model, model_name: str) -> Optional[Path]:
439444
440445 return None
441446
447+ def _load_coreml_with_compute_units (self , model_path : str ):
448+ """
449+ Load a CoreML model via YOLO with specific compute_units.
450+
451+ Monkey-patches coremltools.MLModel to inject compute_units
452+ (e.g. CPU_AND_NE for Neural Engine) since ultralytics doesn't
453+ expose this parameter. Patch is scoped and immediately restored.
454+ """
455+ from ultralytics import YOLO
456+
457+ # Map string config → coremltools enum
458+ _COMPUTE_UNIT_MAP = {
459+ "all" : "ALL" ,
460+ "cpu_only" : "CPU_ONLY" ,
461+ "cpu_and_gpu" : "CPU_AND_GPU" ,
462+ "cpu_and_ne" : "CPU_AND_NE" ,
463+ }
464+
465+ ct_enum_name = _COMPUTE_UNIT_MAP .get (self .compute_units )
466+ if not ct_enum_name :
467+ _log (f"Unknown compute_units '{ self .compute_units } ', using default" )
468+ return YOLO (model_path )
469+
470+ try :
471+ import coremltools as ct
472+ target_units = getattr (ct .ComputeUnit , ct_enum_name , None )
473+ if target_units is None :
474+ _log (f"coremltools.ComputeUnit.{ ct_enum_name } not available" )
475+ return YOLO (model_path )
476+
477+ # Temporarily patch MLModel to inject compute_units
478+ _OrigMLModel = ct .models .MLModel
479+
480+ class _PatchedMLModel (_OrigMLModel ):
481+ def __init__ (self , * args , ** kwargs ):
482+ kwargs .setdefault ('compute_units' , target_units )
483+ super ().__init__ (* args , ** kwargs )
484+
485+ ct .models .MLModel = _PatchedMLModel
486+ try :
487+ model = YOLO (model_path )
488+ finally :
489+ ct .models .MLModel = _OrigMLModel # Always restore
490+
491+ _log (f"CoreML model loaded with compute_units={ ct_enum_name } "
492+ f"(Neural Engine preferred)" )
493+ return model
494+
495+ except ImportError :
496+ _log ("coremltools not available, loading without compute_units" )
497+ return YOLO (model_path )
498+
442499 def load_optimized (self , model_name : str , use_optimized : bool = True ):
443500 """
444501 Load the best available model for this hardware.
@@ -455,7 +512,12 @@ def load_optimized(self, model_name: str, use_optimized: bool = True):
455512 optimized_path = self .get_optimized_path (model_name )
456513 if optimized_path .exists ():
457514 try :
458- model = YOLO (str (optimized_path ))
515+ # On Apple Silicon: route CoreML to Neural Engine
516+ if self .backend == "mps" and self .compute_units != "all" :
517+ model = self ._load_coreml_with_compute_units (
518+ str (optimized_path ))
519+ else :
520+ model = YOLO (str (optimized_path ))
459521 self .load_ms = (time .perf_counter () - t0 ) * 1000
460522 _log (f"Loaded { self .export_format } model ({ self .load_ms :.0f} ms)" )
461523 return model , self .export_format
@@ -467,7 +529,12 @@ def load_optimized(self, model_name: str, use_optimized: bool = True):
467529 exported = self .export_model (pt_model , model_name )
468530 if exported :
469531 try :
470- model = YOLO (str (exported ))
532+ # On Apple Silicon: route CoreML to Neural Engine
533+ if self .backend == "mps" and self .compute_units != "all" :
534+ model = self ._load_coreml_with_compute_units (
535+ str (exported ))
536+ else :
537+ model = YOLO (str (exported ))
471538 self .load_ms = (time .perf_counter () - t0 ) * 1000
472539 _log (f"Loaded freshly exported { self .export_format } model ({ self .load_ms :.0f} ms)" )
473540 return model , self .export_format
@@ -508,7 +575,7 @@ def load_optimized(self, model_name: str, use_optimized: bool = True):
508575
509576 def to_dict (self ) -> dict :
510577 """Serialize environment info for JSON output."""
511- return {
578+ d = {
512579 "backend" : self .backend ,
513580 "device" : self .device ,
514581 "export_format" : self .export_format ,
@@ -519,6 +586,9 @@ def to_dict(self) -> dict:
519586 "export_ms" : round (self .export_ms , 1 ),
520587 "load_ms" : round (self .load_ms , 1 ),
521588 }
589+ if self .backend == "mps" :
590+ d ["compute_units" ] = self .compute_units
591+ return d
522592
523593
524594# ─── CLI: run standalone for diagnostics ─────────────────────────────────────
0 commit comments