Skip to content

Commit 79fe3a3

Browse files
authored
Add coreml_compute_plan.py: report which CoreML ops dispatch to ANE / GPU / CPU (#19252)
### Summary CoreML decides at compile/load time which device each MIL operation will execute on, and coremltools 9.0+ exposes that through `MLComputePlan`. The recurring question on the issue tracker is *"why isn't my model running fully on the ANE?"* — for example: - #4091 — `llama model is not fully lowered to ANE` - #11541 — `CoreML model is crashing on iPhone GPU, but not on iPhone CPU or macOS GPU` - #8439 — `ANE compile OOMs on certain input shapes` - #8445 — `CPU Overhead After ANE Execution` Today the only way for an ExecuTorch user to answer it is to break out Swift / Xcode. This PR adds a Python wrapper around `MLComputePlan` so the answer is one shell command: ``` $ python coreml_compute_plan.py --model_path my_model.mlpackage \ --compute_units cpu_and_ne --show_non_ane === my_model.mlpackage === ANE: 412 / 480 ( 85.8%) CPU: 68 / 480 ( 14.2%) Non-ANE op types: 32 ios17.cast 18 ios17.gather 12 ios17.reshape 6 ios17.constexpr_blockwise_shift_scale ``` Inputs supported: | Input | Behavior | |---|---| | `.pte` | Extract every Core ML partition into a tempdir, then analyze each. | | `.mlpackage` | Compile to `.mlmodelc` in a tempdir, then analyze. | | `.mlmodelc` | Analyze directly. | The PTE path reuses the same JSON/named-data extraction logic that `extract_coreml_models.py` uses, and is inlined into the script so it can be run against a plain CoreML model without depending on the executorch package. ### Test plan Added `test_coreml_compute_plan.py` covering: - `_device_name(...)` for `None` and a stub `MLNeuralEngineComputeDevice`. - `_COMPUTE_UNIT_CHOICES` mapping (`cpu_and_ne` / `all`). - `analyze_one(...)` end-to-end on a tiny `relu(x @ x.T) + x.sum()` mlpackage built with `coremltools.convert(...)`: returns rows for every dispatched op, with a `main` function and the expected MIL op types (`matmul`, `relu`, `add`, `reduce_sum`). ``` $ python -m pytest examples/apple/coreml/scripts/test_coreml_compute_plan.py -v ============================== 7 passed in 3.68s =============================== ``` I also ran the script against a few hand-built `.mlpackage` and `.mlmodelc` files on macOS 26 with coremltools 9.0 and verified the output matches what `MLComputePlan` returns directly. Authored with Claude. cc @kimishpatel @YifanShenSZ @cymbalrush @metascroy
1 parent 034b044 commit 79fe3a3

4 files changed

Lines changed: 422 additions & 3 deletions

File tree

examples/apple/coreml/scripts/BUCK

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,19 @@ fbcode_target(_kind = python_binary,
1616
],
1717
)
1818

19+
fbcode_target(_kind = python_binary,
20+
name = "coreml_compute_plan",
21+
srcs = [
22+
"coreml_compute_plan.py",
23+
],
24+
main_function = "executorch.examples.apple.coreml.scripts.coreml_compute_plan.main",
25+
deps = [
26+
"//executorch/backends/apple/coreml:executorchcoreml",
27+
"//executorch/exir:schema",
28+
"//executorch/exir/_serialize:lib",
29+
],
30+
)
31+
1932
fbcode_target(_kind = python_binary,
2033
name = "export",
2134
srcs = [
Lines changed: 236 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,236 @@
1+
# Copyright © 2026 Apple Inc. All rights reserved.
2+
#
3+
# Please refer to the license found in the LICENSE file in the root directory of the source tree.
4+
5+
"""Report which CoreML operations would dispatch to ANE / GPU / CPU.
6+
7+
The CoreML runtime decides at compile/load time which compute device each
8+
MIL operation will run on; that decision is exposed by ``MLComputePlan``
9+
in coremltools 9.0+. This script wraps that API so users can answer
10+
"why isn't my model running on the ANE?" without writing Swift.
11+
12+
Usage::
13+
14+
# Analyze a CoreML model directly (mlpackage or compiled mlmodelc).
15+
python coreml_compute_plan.py --model_path path/to/model.mlpackage
16+
17+
# Analyze every Core ML partition embedded in an ExecuTorch .pte.
18+
python coreml_compute_plan.py --model_path path/to/program.pte
19+
20+
# Show ops that fell off the ANE, grouped by op type.
21+
python coreml_compute_plan.py --model_path model.mlpackage --show_non_ane
22+
23+
# Pick which devices the runtime is allowed to consider.
24+
python coreml_compute_plan.py --model_path model.mlpackage \\
25+
--compute_units cpu_and_ne
26+
"""
27+
28+
import argparse
29+
import os
30+
import sys
31+
import tempfile
32+
from collections import Counter
33+
from typing import Iterable, List, Tuple
34+
35+
import coremltools as ct
36+
from coremltools.models.compute_device import (
37+
MLCPUComputeDevice,
38+
MLGPUComputeDevice,
39+
MLNeuralEngineComputeDevice,
40+
)
41+
from coremltools.models.compute_plan import MLComputePlan
42+
43+
from executorch.examples.apple.coreml.scripts.extract_coreml_models import (
44+
extract_coreml_models,
45+
)
46+
47+
48+
_DEVICE_NAMES: List[Tuple[type, str]] = [
49+
(MLNeuralEngineComputeDevice, "ANE"),
50+
(MLGPUComputeDevice, "GPU"),
51+
(MLCPUComputeDevice, "CPU"),
52+
]
53+
54+
_COMPUTE_UNIT_CHOICES = {
55+
"all": ct.ComputeUnit.ALL,
56+
"cpu_and_ne": ct.ComputeUnit.CPU_AND_NE,
57+
"cpu_and_gpu": ct.ComputeUnit.CPU_AND_GPU,
58+
"cpu_only": ct.ComputeUnit.CPU_ONLY,
59+
}
60+
61+
62+
def _device_name(device) -> str:
63+
if device is None:
64+
return "unknown"
65+
for cls, name in _DEVICE_NAMES:
66+
if isinstance(device, cls):
67+
return name
68+
return type(device).__name__
69+
70+
71+
def _iter_operations(block) -> Iterable:
72+
for op in block.operations:
73+
yield op
74+
for nested in getattr(op, "blocks", None) or []:
75+
yield from _iter_operations(nested)
76+
77+
78+
def _ensure_compiled(model_path: str, tmpdir: str) -> str:
79+
"""Return a `.mlmodelc` path; compile from `.mlpackage` if needed."""
80+
if model_path.endswith(".mlmodelc"):
81+
return model_path
82+
if model_path.endswith(".mlpackage"):
83+
dest = os.path.join(
84+
tmpdir, os.path.basename(model_path).replace(".mlpackage", ".mlmodelc")
85+
)
86+
return str(ct.models.utils.compile_model(model_path, destination_path=dest))
87+
raise ValueError(f"Expected a .mlpackage or .mlmodelc path, got: {model_path}")
88+
89+
90+
def analyze_one(
91+
model_path: str, compute_units: ct.ComputeUnit
92+
) -> List[Tuple[str, str, str]]:
93+
"""Return [(function, operator_name, device)] for every op that has a plan.
94+
95+
coremltools 9.0's ``MLComputePlan.load_from_path`` only exposes usage for
96+
the default function of a multifunction package, so a multifunction
97+
.mlpackage is analyzed function-by-function by projecting each function
98+
as the ``main`` of a temp single-function copy.
99+
"""
100+
function_names = _mlpackage_function_names(model_path)
101+
if len(function_names) <= 1:
102+
return _analyze_compiled(model_path, compute_units)
103+
rows: List[Tuple[str, str, str]] = []
104+
with tempfile.TemporaryDirectory() as tmpdir:
105+
for fname in function_names:
106+
projected = _project_to_single(model_path, fname, tmpdir)
107+
for _, op_name, device in _analyze_compiled(projected, compute_units):
108+
rows.append((fname, op_name, device))
109+
return rows
110+
111+
112+
def _analyze_compiled(
113+
model_path: str, compute_units: ct.ComputeUnit
114+
) -> List[Tuple[str, str, str]]:
115+
with tempfile.TemporaryDirectory() as tmpdir:
116+
compiled = _ensure_compiled(model_path, tmpdir)
117+
plan = MLComputePlan.load_from_path(compiled, compute_units=compute_units)
118+
program = plan.model_structure.program
119+
if program is None:
120+
raise RuntimeError(
121+
f"{model_path} is not an MLProgram model; this tool only supports "
122+
"the MLProgram backend (the CoreML backend executorch produces today)."
123+
)
124+
125+
rows: List[Tuple[str, str, str]] = []
126+
for fname, fn in program.functions.items():
127+
for op in _iter_operations(fn.block):
128+
usage = plan.get_compute_device_usage_for_mlprogram_operation(op)
129+
if usage is None:
130+
# Constants and similar non-dispatched ops don't have a plan.
131+
continue
132+
rows.append(
133+
(
134+
fname,
135+
op.operator_name,
136+
_device_name(usage.preferred_compute_device),
137+
)
138+
)
139+
return rows
140+
141+
142+
def _mlpackage_function_names(model_path: str) -> List[str]:
143+
"""Names of the MLProgram functions inside an .mlpackage, or [] otherwise."""
144+
if not model_path.endswith(".mlpackage"):
145+
return []
146+
spec = ct.models.MLModel(model_path, skip_model_load=True).get_spec()
147+
if spec.WhichOneof("Type") != "mlProgram":
148+
return []
149+
return list(spec.mlProgram.functions.keys())
150+
151+
152+
def _project_to_single(src_mlpackage: str, function_name: str, tmpdir: str) -> str:
153+
"""Re-save ``src_mlpackage`` with only ``function_name`` exposed as ``main``."""
154+
from coremltools.models.utils import MultiFunctionDescriptor, save_multifunction
155+
156+
dest = os.path.join(tmpdir, f"{function_name}.mlpackage")
157+
desc = MultiFunctionDescriptor()
158+
desc.add_function(
159+
src_mlpackage,
160+
src_function_name=function_name,
161+
target_function_name="main",
162+
)
163+
desc.default_function_name = "main"
164+
save_multifunction(desc, dest)
165+
return dest
166+
167+
168+
def _print_report(
169+
label: str, rows: List[Tuple[str, str, str]], show_non_ane: bool
170+
) -> None:
171+
print(f"\n=== {label} ===")
172+
if not rows:
173+
print(" (no dispatched operations found)")
174+
return
175+
by_device = Counter(device for _, _, device in rows)
176+
total = sum(by_device.values())
177+
for device in ("ANE", "GPU", "CPU", "unknown"):
178+
count = by_device.get(device, 0)
179+
if count == 0:
180+
continue
181+
pct = 100.0 * count / total
182+
print(f" {device}: {count:5d} / {total} ({pct:5.1f}%)")
183+
184+
if show_non_ane:
185+
non_ane = [(fn, op_name) for fn, op_name, dev in rows if dev != "ANE"]
186+
if non_ane:
187+
print("\n Non-ANE op types:")
188+
for op_name, count in Counter(op for _, op in non_ane).most_common():
189+
print(f" {count:5d} {op_name}")
190+
191+
192+
def main() -> int:
193+
parser = argparse.ArgumentParser(description=__doc__.splitlines()[0])
194+
parser.add_argument(
195+
"--model_path",
196+
required=True,
197+
help="Path to a .pte, .mlpackage, or .mlmodelc.",
198+
)
199+
parser.add_argument(
200+
"--compute_units",
201+
default="cpu_and_ne",
202+
choices=sorted(_COMPUTE_UNIT_CHOICES),
203+
help="Which devices the runtime may use when planning dispatch.",
204+
)
205+
parser.add_argument(
206+
"--show_non_ane",
207+
action="store_true",
208+
help="List op types that did not get assigned to the ANE.",
209+
)
210+
args = parser.parse_args()
211+
212+
compute_units = _COMPUTE_UNIT_CHOICES[args.compute_units]
213+
model_path = args.model_path
214+
215+
if model_path.endswith(".pte"):
216+
with open(model_path, "rb") as f:
217+
pte_data = f.read()
218+
with tempfile.TemporaryDirectory() as out_dir:
219+
extracted = extract_coreml_models(pte_data, out_dir=out_dir)
220+
if not extracted:
221+
print(
222+
f"{model_path} does not contain any CoreML delegate partitions.",
223+
file=sys.stderr,
224+
)
225+
return 1
226+
for path in extracted:
227+
rows = analyze_one(str(path), compute_units)
228+
_print_report(path.name, rows, args.show_non_ane)
229+
else:
230+
rows = analyze_one(model_path, compute_units)
231+
_print_report(os.path.basename(model_path.rstrip("/")), rows, args.show_non_ane)
232+
return 0
233+
234+
235+
if __name__ == "__main__":
236+
sys.exit(main())

examples/apple/coreml/scripts/extract_coreml_models.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
import shutil
1010
from pathlib import Path
1111

12-
from typing import Dict, List, Optional
12+
from typing import Dict, List, Optional, Union
1313

1414
from executorch.backends.apple.coreml import executorchcoreml
1515
from executorch.exir._serialize._program import deserialize_pte_binary
@@ -22,7 +22,12 @@
2222
COREML_BACKEND_ID = "CoreMLBackend"
2323

2424

25-
def extract_coreml_models(pte_data: bytes):
25+
def extract_coreml_models(
26+
pte_data: bytes,
27+
out_dir: Optional[Union[str, Path]] = None,
28+
) -> List[Path]:
29+
out_root = Path(out_dir) if out_dir is not None else Path("extracted_coreml_models")
30+
2631
pte_file = deserialize_pte_binary(pte_data)
2732
program = pte_file.program
2833

@@ -44,6 +49,7 @@ def extract_coreml_models(pte_data: bytes):
4449
]
4550

4651
# Track extracted models to avoid duplicates (multifunction models share partitions)
52+
extracted_paths: List[Path] = []
4753
extracted_keys: set = set()
4854
model_index: int = 1
4955

@@ -95,7 +101,7 @@ def extract_coreml_models(pte_data: bytes):
95101
if model_name is None:
96102
model_name = f"model_{model_index}"
97103

98-
model_path: Path = Path() / "extracted_coreml_models" / model_name
104+
model_path: Path = out_root / model_name
99105
if model_path.exists():
100106
shutil.rmtree(model_path.absolute())
101107
os.makedirs(model_path.absolute())
@@ -104,11 +110,14 @@ def extract_coreml_models(pte_data: bytes):
104110
coreml_processed_bytes, str(model_path.absolute())
105111
):
106112
print(f"Core ML models are extracted and saved to path = {model_path}")
113+
extracted_paths.append(model_path)
107114
model_index += 1
108115

109116
if len(coreml_delegates) == 0:
110117
print("The model isn't delegated to Core ML.")
111118

119+
return extracted_paths
120+
112121

113122
def main() -> None:
114123
"""

0 commit comments

Comments
 (0)