Skip to content

Commit 8c6764c

Browse files
committed
feat: add CoreML depth estimation models and benchmark
- Add models.json with CoreML model definitions for depth estimation - Add benchmark_coreml.py script for CoreML inference performance testing
1 parent 86cf4ad commit 8c6764c

File tree

2 files changed

+324
-0
lines changed

2 files changed

+324
-0
lines changed
Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
{
2+
"studio": {
3+
"title": "3D Depth Vision Studio",
4+
"subtitle": "Convert 2D video to 3D depth maps • Privacy-first scene understanding",
5+
"icon": "layers"
6+
},
7+
"models_dir": "~/.aegis-ai/models/feature-extraction",
8+
"models": {
9+
"depth-anything-v2-small": {
10+
"name": "Depth Anything V2 Small",
11+
"type": "depth_estimation",
12+
"description": "Real-time monocular depth estimation — Apple Neural Engine optimized",
13+
"input_size": [518, 392],
14+
"platforms": {
15+
"darwin": {
16+
"repository": "apple/coreml-depth-anything-v2-small",
17+
"format": "mlpackage",
18+
"variants": {
19+
"DepthAnythingV2SmallF16": {
20+
"precision": "float16",
21+
"size_mb": 49.8,
22+
"description": "Float16 — optimized for Neural Engine"
23+
},
24+
"DepthAnythingV2SmallF16INT8": {
25+
"precision": "float16_int8",
26+
"size_mb": 25.0,
27+
"description": "Float16 + INT8 quantization — smallest"
28+
},
29+
"DepthAnythingV2SmallF32": {
30+
"precision": "float32",
31+
"size_mb": 99.2,
32+
"description": "Float32 — highest precision"
33+
},
34+
"DepthAnythingV2SmallF16P6": {
35+
"precision": "float16_p6",
36+
"size_mb": 18.0,
37+
"description": "Float16 palettized 6-bit"
38+
},
39+
"DepthAnythingV2SmallF16P8": {
40+
"precision": "float16_p8",
41+
"size_mb": 24.0,
42+
"description": "Float16 palettized 8-bit"
43+
},
44+
"DepthAnythingV2SmallF32INT8": {
45+
"precision": "float32_int8",
46+
"size_mb": 24.0,
47+
"description": "Float32 + INT8 quantization"
48+
},
49+
"DepthAnythingV2SmallF32P6": {
50+
"precision": "float32_p6",
51+
"size_mb": 18.0,
52+
"description": "Float32 palettized 6-bit"
53+
},
54+
"DepthAnythingV2SmallF32P8": {
55+
"precision": "float32_p8",
56+
"size_mb": 24.0,
57+
"description": "Float32 palettized 8-bit"
58+
}
59+
}
60+
},
61+
"linux": {
62+
"repository": "depth-anything/Depth-Anything-V2-Small",
63+
"format": "pth",
64+
"variants": {
65+
"depth_anything_v2_vits": {
66+
"precision": "float32",
67+
"size_mb": 99.0,
68+
"description": "PyTorch ViT-S — CUDA/CPU"
69+
}
70+
}
71+
},
72+
"win32": {
73+
"repository": "depth-anything/Depth-Anything-V2-Small",
74+
"format": "pth",
75+
"variants": {
76+
"depth_anything_v2_vits": {
77+
"precision": "float32",
78+
"size_mb": 99.0,
79+
"description": "PyTorch ViT-S — CUDA/CPU"
80+
}
81+
}
82+
}
83+
}
84+
},
85+
"depth-anything-v2-base": {
86+
"name": "Depth Anything V2 Base",
87+
"type": "depth_estimation",
88+
"description": "Higher accuracy depth estimation — larger model",
89+
"input_size": [518, 392],
90+
"platforms": {
91+
"linux": {
92+
"repository": "depth-anything/Depth-Anything-V2-Base",
93+
"format": "pth",
94+
"variants": {
95+
"depth_anything_v2_vitb": {
96+
"precision": "float32",
97+
"size_mb": 390.0,
98+
"description": "PyTorch ViT-B — CUDA/CPU"
99+
}
100+
}
101+
},
102+
"win32": {
103+
"repository": "depth-anything/Depth-Anything-V2-Base",
104+
"format": "pth",
105+
"variants": {
106+
"depth_anything_v2_vitb": {
107+
"precision": "float32",
108+
"size_mb": 390.0,
109+
"description": "PyTorch ViT-B — CUDA/CPU"
110+
}
111+
}
112+
}
113+
}
114+
},
115+
"depth-anything-v2-large": {
116+
"name": "Depth Anything V2 Large",
117+
"type": "depth_estimation",
118+
"description": "Highest accuracy depth estimation — largest model",
119+
"input_size": [518, 392],
120+
"platforms": {
121+
"linux": {
122+
"repository": "depth-anything/Depth-Anything-V2-Large",
123+
"format": "pth",
124+
"variants": {
125+
"depth_anything_v2_vitl": {
126+
"precision": "float32",
127+
"size_mb": 1280.0,
128+
"description": "PyTorch ViT-L — CUDA recommended"
129+
}
130+
}
131+
},
132+
"win32": {
133+
"repository": "depth-anything/Depth-Anything-V2-Large",
134+
"format": "pth",
135+
"variants": {
136+
"depth_anything_v2_vitl": {
137+
"precision": "float32",
138+
"size_mb": 1280.0,
139+
"description": "PyTorch ViT-L — CUDA recommended"
140+
}
141+
}
142+
}
143+
}
144+
}
145+
}
146+
}
Lines changed: 178 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,178 @@
1+
#!/usr/bin/env python3
2+
"""
3+
Standalone CoreML depth benchmark — spawned by Aegis IPC handler.
4+
5+
Usage:
6+
python3 benchmark_coreml.py --variant DepthAnythingV2SmallF16 --runs 10 --colormap viridis
7+
8+
Outputs a single JSON line to stdout with benchmark results.
9+
"""
10+
11+
import sys
12+
import json
13+
import time
14+
import os
15+
import argparse
16+
import platform
17+
from pathlib import Path
18+
19+
20+
MODELS_DIR = Path.home() / ".aegis-ai" / "models" / "feature-extraction"
21+
COREML_INPUT_SIZE = (518, 392) # width, height
22+
23+
COLORMAP_MAP = {
24+
"inferno": 1, "viridis": 16, "plasma": 13, "magma": 12,
25+
"jet": 2, "turbo": 18, "hot": 11, "cool": 8,
26+
}
27+
28+
COMPUTE_UNIT_MAP = {
29+
"all": "ALL",
30+
"cpu": "CPU_ONLY",
31+
"gpu": "CPU_AND_GPU",
32+
"cpu_npu": "CPU_AND_NE",
33+
"npu": "ALL",
34+
}
35+
36+
37+
def _log(msg):
38+
print(f"[DepthBenchmark] {msg}", file=sys.stderr, flush=True)
39+
40+
41+
def download_test_image(url):
42+
"""Download a test image from URL, return numpy BGR array."""
43+
import cv2
44+
import numpy as np
45+
import urllib.request
46+
47+
_log(f"Downloading test image: {url}")
48+
tmp_path = "/tmp/aegis_depth_bench_test.jpg"
49+
50+
try:
51+
urllib.request.urlretrieve(url, tmp_path)
52+
img = cv2.imread(tmp_path)
53+
if img is not None:
54+
return img
55+
except Exception as e:
56+
_log(f"Download failed: {e}")
57+
58+
# Fallback: generate a synthetic test image
59+
_log("Using synthetic test image (640x480 gradient)")
60+
return np.random.randint(0, 255, (480, 640, 3), dtype=np.uint8)
61+
62+
63+
def run_benchmark(args):
64+
import cv2
65+
import numpy as np
66+
import coremltools as ct
67+
from PIL import Image
68+
69+
variant_id = args.variant
70+
model_path = MODELS_DIR / f"{variant_id}.mlpackage"
71+
72+
if not model_path.exists():
73+
print(json.dumps({"error": f"Model not found: {model_path}"}))
74+
sys.exit(1)
75+
76+
# Load model
77+
_log(f"Loading CoreML model: {variant_id}")
78+
compute_unit_key = COMPUTE_UNIT_MAP.get(args.compute_units, "ALL")
79+
compute_unit = getattr(ct.ComputeUnit, compute_unit_key, ct.ComputeUnit.ALL)
80+
81+
t0 = time.perf_counter()
82+
model = ct.models.MLModel(str(model_path), compute_units=compute_unit)
83+
load_time_ms = (time.perf_counter() - t0) * 1000
84+
_log(f"Model loaded in {load_time_ms:.0f}ms (compute_units={compute_unit_key})")
85+
86+
# Get test image
87+
test_image = download_test_image(args.test_image_url)
88+
original_h, original_w = test_image.shape[:2]
89+
input_w, input_h = COREML_INPUT_SIZE
90+
91+
# Prepare input
92+
rgb = cv2.cvtColor(test_image, cv2.COLOR_BGR2RGB)
93+
resized = cv2.resize(rgb, (input_w, input_h), interpolation=cv2.INTER_LINEAR)
94+
pil_image = Image.fromarray(resized, mode="RGB")
95+
96+
colormap_id = COLORMAP_MAP.get(args.colormap, 16)
97+
98+
# Warm-up run
99+
_log("Warm-up inference...")
100+
model.predict({"image": pil_image})
101+
102+
# Benchmark runs
103+
_log(f"Running {args.runs} benchmark iterations...")
104+
times = []
105+
last_depth_colored = None
106+
107+
for i in range(args.runs):
108+
t0 = time.perf_counter()
109+
prediction = model.predict({"image": pil_image})
110+
elapsed_ms = (time.perf_counter() - t0) * 1000
111+
times.append(elapsed_ms)
112+
113+
if i == 0:
114+
# Process first result for extraction preview
115+
output_key = list(prediction.keys())[0]
116+
depth_map = np.array(prediction[output_key])
117+
if depth_map.ndim > 2:
118+
depth_map = np.squeeze(depth_map)
119+
depth_norm = (depth_map - depth_map.min()) / (depth_map.max() - depth_map.min() + 1e-8)
120+
depth_uint8 = (depth_norm * 255).astype(np.uint8)
121+
last_depth_colored = cv2.applyColorMap(depth_uint8, colormap_id)
122+
last_depth_colored = cv2.resize(last_depth_colored, (original_w, original_h))
123+
124+
# Compute stats
125+
import statistics
126+
times_sorted = sorted(times)
127+
avg_ms = statistics.mean(times)
128+
std_ms = statistics.stdev(times) if len(times) > 1 else 0
129+
130+
result = {
131+
"model_id": "depth-anything-v2-small",
132+
"variant_id": variant_id,
133+
"num_runs": args.runs,
134+
"successful_runs": len(times),
135+
"avg_time_ms": round(avg_ms, 2),
136+
"min_time_ms": round(times_sorted[0], 2),
137+
"max_time_ms": round(times_sorted[-1], 2),
138+
"std_time_ms": round(std_ms, 2),
139+
"fps": round(1000.0 / avg_ms, 2) if avg_ms > 0 else 0,
140+
"model_load_ms": round(load_time_ms, 2),
141+
"compute_units": args.compute_units,
142+
}
143+
144+
# Encode extraction result as base64 for preview
145+
if last_depth_colored is not None:
146+
import base64
147+
_, buf = cv2.imencode(".jpg", last_depth_colored, [cv2.IMWRITE_JPEG_QUALITY, 85])
148+
result["extraction_result"] = {
149+
"success": True,
150+
"feature_type": "depth_estimation",
151+
"feature_data": base64.b64encode(buf).decode("ascii"),
152+
"processing_time": round(times[0], 2),
153+
"metadata": {
154+
"model": variant_id,
155+
"colormap": args.colormap,
156+
"compute_units": args.compute_units,
157+
"input_size": list(COREML_INPUT_SIZE),
158+
},
159+
}
160+
161+
_log(f"Benchmark complete: {avg_ms:.1f}ms avg ({result['fps']:.1f} FPS)")
162+
print(json.dumps(result), flush=True)
163+
164+
165+
if __name__ == "__main__":
166+
if platform.system() != "Darwin":
167+
print(json.dumps({"error": "CoreML benchmark requires macOS"}))
168+
sys.exit(1)
169+
170+
parser = argparse.ArgumentParser()
171+
parser.add_argument("--variant", default="DepthAnythingV2SmallF16")
172+
parser.add_argument("--runs", type=int, default=10)
173+
parser.add_argument("--colormap", default="viridis")
174+
parser.add_argument("--compute-units", default="all")
175+
parser.add_argument("--test-image-url", default="https://ultralytics.com/images/bus.jpg")
176+
args = parser.parse_args()
177+
178+
run_benchmark(args)

0 commit comments

Comments
 (0)