Skip to content

Commit 772473d

Browse files
committed
feat(depth-estimation): refactor to TransformSkillBase + privacy-first defaults
Refactors depth-estimation skill to subclass TransformSkillBase, reducing transform.py from ~160 lines of boilerplate to ~100 lines of pure skill logic. Key changes: - Default blend_mode changed to 'depth_only' for privacy anonymization - Version bumped to 1.1.0, category set to 'privacy' - SKILL.md documents the TransformSkillBase interface for new skill authors - Protocol updated: frame_id tracking, config-update command, base64 output - Adds on_config_update() for live parameter changes from Aegis
1 parent e98df9d commit 772473d

2 files changed

Lines changed: 133 additions & 133 deletions

File tree

skills/transformation/depth-estimation/SKILL.md

Lines changed: 31 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
---
22
name: depth-estimation
3-
description: "Real-time depth map estimation using Depth Anything v2"
4-
version: 1.0.0
3+
description: "Real-time depth map estimation for privacy transforms using Depth Anything v2"
4+
version: 1.1.0
5+
category: privacy
56

67
parameters:
78
- name: model
@@ -14,8 +15,8 @@ parameters:
1415
- name: blend_mode
1516
label: "Display Mode"
1617
type: select
17-
options: ["overlay", "side_by_side", "depth_only"]
18-
default: "overlay"
18+
options: ["depth_only", "overlay", "side_by_side"]
19+
default: "depth_only"
1920
group: Display
2021

2122
- name: opacity
@@ -46,27 +47,50 @@ capabilities:
4647
description: "Real-time depth estimation overlay on live feed"
4748
---
4849

49-
# Depth Estimation
50+
# Depth Estimation (Privacy)
5051

5152
Real-time monocular depth estimation using Depth Anything v2. Transforms camera feeds with colorized depth maps — near objects appear warm, far objects appear cool.
5253

54+
When used for **privacy mode**, the `depth_only` blend mode fully anonymizes the scene while preserving spatial layout and activity, enabling security monitoring without revealing identities.
55+
5356
## What You Get
5457

58+
- **Privacy anonymization** — depth-only mode hides all visual identity
5559
- **Depth overlays** on live camera feeds
5660
- **Distance estimation** — approximate distance to detected objects
5761
- **3D scene understanding** — spatial layout of the scene
5862

63+
## Interface: TransformSkillBase
64+
65+
This skill implements the `TransformSkillBase` interface. Any new privacy skill can be created by subclassing `TransformSkillBase` and implementing two methods:
66+
67+
```python
68+
from transform_base import TransformSkillBase
69+
70+
class MyPrivacySkill(TransformSkillBase):
71+
def load_model(self, config):
72+
# Load your model, return {"model": "...", "device": "..."}
73+
...
74+
75+
def transform_frame(self, image, metadata):
76+
# Transform BGR image, return BGR image
77+
...
78+
```
79+
5980
## Protocol
6081

6182
### Aegis → Skill (stdin)
6283
```jsonl
63-
{"event": "frame", "camera_id": "front_door", "frame_path": "/tmp/frame.jpg", "timestamp": "..."}
84+
{"event": "frame", "frame_id": "cam1_1710001", "camera_id": "front_door", "frame_path": "/tmp/frame.jpg", "timestamp": "..."}
85+
{"command": "config-update", "config": {"opacity": 0.8, "blend_mode": "overlay"}}
86+
{"command": "stop"}
6487
```
6588

6689
### Skill → Aegis (stdout)
6790
```jsonl
6891
{"event": "ready", "model": "depth-anything-v2-small", "device": "mps"}
69-
{"event": "transformed_frame", "camera_id": "front_door", "frame_path": "/tmp/depth_001.jpg", "metadata": {"min_depth": 0.2, "max_depth": 15.0}}
92+
{"event": "transform", "frame_id": "cam1_1710001", "camera_id": "front_door", "transform_data": "<base64 JPEG>"}
93+
{"event": "perf_stats", "total_frames": 50, "timings_ms": {"transform": {"avg": 45.2, ...}}}
7094
```
7195

7296
## Setup
Lines changed: 102 additions & 126 deletions
Original file line numberDiff line numberDiff line change
@@ -1,56 +1,25 @@
11
#!/usr/bin/env python3
22
"""
3-
Depth Estimation Skill — Real-time monocular depth maps.
3+
Depth Estimation Privacy Skill — Monocular depth maps via Depth Anything v2.
44
5-
Transforms camera frames with Depth Anything v2 colorized depth overlays.
5+
Implements the TransformSkillBase interface to provide real-time depth map
6+
overlays on camera feeds. When used as a privacy skill, the depth-only mode
7+
anonymizes the scene while preserving spatial layout and activity recognition.
8+
9+
Usage:
10+
python transform.py --model depth-anything-v2-small --device auto
11+
python transform.py --config config.json
612
"""
713

814
import sys
9-
import json
1015
import argparse
11-
import signal
12-
import tempfile
1316
from pathlib import Path
1417

18+
# Import the base class from the same directory
19+
_script_dir = Path(__file__).resolve().parent
20+
sys.path.insert(0, str(_script_dir))
1521

16-
def parse_args():
17-
parser = argparse.ArgumentParser(description="Depth Estimation Skill")
18-
parser.add_argument("--config", type=str)
19-
parser.add_argument("--model", type=str, default="depth-anything-v2-small")
20-
parser.add_argument("--colormap", type=str, default="inferno")
21-
parser.add_argument("--blend-mode", type=str, default="overlay")
22-
parser.add_argument("--opacity", type=float, default=0.5)
23-
parser.add_argument("--device", type=str, default="auto")
24-
return parser.parse_args()
25-
26-
27-
def load_config(args):
28-
if args.config and Path(args.config).exists():
29-
with open(args.config) as f:
30-
return json.load(f)
31-
return {
32-
"model": args.model,
33-
"colormap": args.colormap,
34-
"blend_mode": args.blend_mode,
35-
"opacity": args.opacity,
36-
"device": args.device,
37-
}
38-
39-
40-
def select_device(pref):
41-
if pref != "auto":
42-
return pref
43-
try:
44-
import torch
45-
if torch.cuda.is_available(): return "cuda"
46-
if hasattr(torch.backends, "mps") and torch.backends.mps.is_available(): return "mps"
47-
except ImportError:
48-
pass
49-
return "cpu"
50-
51-
52-
def emit(event):
53-
print(json.dumps(event), flush=True)
22+
from transform_base import TransformSkillBase, _log # noqa: E402
5423

5524

5625
COLORMAP_MAP = {
@@ -62,94 +31,101 @@ def emit(event):
6231
}
6332

6433

65-
def main():
66-
args = parse_args()
67-
config = load_config(args)
68-
device = select_device(config.get("device", "auto"))
34+
class DepthEstimationSkill(TransformSkillBase):
35+
"""
36+
Depth estimation using Depth Anything v2.
37+
38+
Produces colorized depth maps that can be blended with the original frame
39+
(overlay mode), shown side-by-side, or displayed as depth-only anonymized view.
40+
"""
41+
42+
def __init__(self):
43+
super().__init__()
44+
self._tag = "DepthEstimation"
45+
self.model = None
46+
self.colormap_id = 1
47+
self.opacity = 0.5
48+
self.blend_mode = "depth_only" # Default for privacy: depth_only anonymizes
49+
50+
def parse_extra_args(self, parser: argparse.ArgumentParser):
51+
parser.add_argument("--model", type=str, default="depth-anything-v2-small",
52+
choices=["depth-anything-v2-small", "depth-anything-v2-base",
53+
"depth-anything-v2-large", "midas-small"])
54+
parser.add_argument("--colormap", type=str, default="inferno",
55+
choices=list(COLORMAP_MAP.keys()))
56+
parser.add_argument("--blend-mode", type=str, default="depth_only",
57+
choices=["overlay", "side_by_side", "depth_only"])
58+
parser.add_argument("--opacity", type=float, default=0.5)
59+
60+
def load_model(self, config: dict) -> dict:
61+
import torch
6962

70-
try:
63+
model_name = config.get("model", "depth-anything-v2-small")
64+
self.colormap_id = COLORMAP_MAP.get(config.get("colormap", "inferno"), 1)
65+
self.opacity = config.get("opacity", 0.5)
66+
self.blend_mode = config.get("blend_mode", "depth_only")
67+
68+
_log(f"Loading {model_name} on {self.device}", self._tag)
69+
70+
# Load model via torch hub
71+
hub_name = model_name.replace("-", "_")
72+
self.model = torch.hub.load(
73+
"LiheYoung/Depth-Anything-V2",
74+
hub_name,
75+
trust_repo=True,
76+
)
77+
self.model.to(self.device)
78+
self.model.eval()
79+
80+
_log(f"Model loaded: {model_name} on {self.device}", self._tag)
81+
82+
return {
83+
"model": model_name,
84+
"device": self.device,
85+
"blend_mode": self.blend_mode,
86+
"colormap": config.get("colormap", "inferno"),
87+
}
88+
89+
def transform_frame(self, image, metadata: dict):
7190
import torch
7291
import cv2
7392
import numpy as np
7493

75-
model_name = config.get("model", "depth-anything-v2-small")
76-
model = torch.hub.load("LiheYoung/Depth-Anything-V2", model_name.replace("-", "_"), trust_repo=True)
77-
model.to(device)
78-
model.eval()
79-
80-
emit({"event": "ready", "model": model_name, "device": device})
81-
except Exception as e:
82-
emit({"event": "error", "message": f"Failed to load model: {e}", "retriable": False})
83-
sys.exit(1)
84-
85-
running = True
86-
def handle_signal(s, f):
87-
nonlocal running
88-
running = False
89-
signal.signal(signal.SIGTERM, handle_signal)
90-
signal.signal(signal.SIGINT, handle_signal)
91-
92-
colormap_id = COLORMAP_MAP.get(config.get("colormap", "inferno"), 1)
93-
opacity = config.get("opacity", 0.5)
94-
blend_mode = config.get("blend_mode", "overlay")
95-
96-
for line in sys.stdin:
97-
if not running:
98-
break
99-
line = line.strip()
100-
if not line:
101-
continue
102-
try:
103-
msg = json.loads(line)
104-
except json.JSONDecodeError:
105-
continue
106-
107-
if msg.get("command") == "stop":
108-
break
109-
110-
if msg.get("event") == "frame":
111-
frame_path = msg.get("frame_path")
112-
if not frame_path or not Path(frame_path).exists():
113-
continue
114-
115-
try:
116-
import torch
117-
import cv2
118-
import numpy as np
119-
120-
image = cv2.imread(frame_path)
121-
rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
122-
123-
with torch.no_grad():
124-
depth = model.infer_image(rgb)
125-
126-
# Normalize depth to 0-255
127-
depth_norm = ((depth - depth.min()) / (depth.max() - depth.min() + 1e-8) * 255).astype(np.uint8)
128-
depth_colored = cv2.applyColorMap(depth_norm, colormap_id)
129-
130-
if blend_mode == "overlay":
131-
output = cv2.addWeighted(image, 1 - opacity, depth_colored, opacity, 0)
132-
elif blend_mode == "side_by_side":
133-
output = np.hstack([image, depth_colored])
134-
else: # depth_only
135-
output = depth_colored
136-
137-
out_path = tempfile.mktemp(suffix=".jpg", dir="/tmp")
138-
cv2.imwrite(out_path, output, [cv2.IMWRITE_JPEG_QUALITY, 90])
139-
140-
emit({
141-
"event": "transformed_frame",
142-
"camera_id": msg.get("camera_id", "unknown"),
143-
"timestamp": msg.get("timestamp", ""),
144-
"frame_path": out_path,
145-
"metadata": {
146-
"min_depth": float(depth.min()),
147-
"max_depth": float(depth.max()),
148-
},
149-
})
150-
except Exception as e:
151-
emit({"event": "error", "message": f"Depth error: {e}", "retriable": True})
94+
rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
95+
96+
with torch.no_grad():
97+
depth = self.model.infer_image(rgb)
98+
99+
# Normalize depth to 0-255
100+
d_min, d_max = depth.min(), depth.max()
101+
depth_norm = ((depth - d_min) / (d_max - d_min + 1e-8) * 255).astype(np.uint8)
102+
depth_colored = cv2.applyColorMap(depth_norm, self.colormap_id)
103+
104+
if self.blend_mode == "overlay":
105+
output = cv2.addWeighted(image, 1 - self.opacity, depth_colored, self.opacity, 0)
106+
elif self.blend_mode == "side_by_side":
107+
output = np.hstack([image, depth_colored])
108+
else: # depth_only — full anonymization
109+
output = depth_colored
110+
111+
return output
112+
113+
def on_config_update(self, config: dict):
114+
"""Handle live config updates from Aegis."""
115+
if "colormap" in config:
116+
self.colormap_id = COLORMAP_MAP.get(config["colormap"], self.colormap_id)
117+
_log(f"Colormap updated: {config['colormap']}", self._tag)
118+
if "opacity" in config:
119+
self.opacity = float(config["opacity"])
120+
_log(f"Opacity updated: {self.opacity}", self._tag)
121+
if "blend_mode" in config:
122+
self.blend_mode = config["blend_mode"]
123+
_log(f"Blend mode updated: {self.blend_mode}", self._tag)
124+
125+
def get_output_mode(self) -> str:
126+
"""Use base64 for privacy transforms — avoids temp file cleanup issues."""
127+
return "base64"
152128

153129

154130
if __name__ == "__main__":
155-
main()
131+
DepthEstimationSkill().run()

0 commit comments

Comments
 (0)