Skip to content

Commit 98464d2

Browse files
jrx-codeclaude
andcommitted
feat: v0.17.0 — L.5 Cross-Component Intelligence
- New module: app/learning/cross_component.py - Detects shared network domains across components (data exfil patterns) - Identifies unusual shared imports (non-stdlib/HA deps) - Outlier detection: components significantly larger/smaller than average - Suspicious domain detection (pastebin, ngrok, webhook.site, etc.) - API: GET /api/intelligence Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent d34e1ed commit 98464d2

3 files changed

Lines changed: 141 additions & 1 deletion

File tree

Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
"""L.5 — Cross-Component Intelligence.
2+
3+
Detect patterns that appear across multiple components — shared suspicious
4+
dependencies, common network domains, unusual API usage clusters, and
5+
components that deviate from the norm of their category.
6+
"""
7+
8+
import json
9+
import logging
10+
import sqlite3
11+
from collections import Counter
12+
13+
log = logging.getLogger(__name__)
14+
15+
16+
def analyze_cross_component(conn: sqlite3.Connection) -> dict:
17+
"""Analyze patterns across all scanned components.
18+
19+
Returns a dict with:
20+
- shared_domains: network domains used by multiple components
21+
- shared_imports: unusual imports shared by multiple components
22+
- outlier_components: components that deviate significantly from norms
23+
- domain_clusters: groups of components connecting to same domains
24+
"""
25+
rows = conn.execute(
26+
"SELECT domain, repo_url, imports, ha_apis, network_domains, "
27+
"py_files, js_files, total_lines "
28+
"FROM component_fingerprints "
29+
"ORDER BY created_at DESC"
30+
).fetchall()
31+
32+
if not rows:
33+
return {"message": "No fingerprint data available"}
34+
35+
# Deduplicate: keep latest fingerprint per domain/repo
36+
seen = set()
37+
fingerprints = []
38+
for row in rows:
39+
key = row["domain"] or row["repo_url"]
40+
if key in seen:
41+
continue
42+
seen.add(key)
43+
fingerprints.append({
44+
"id": key,
45+
"imports": json.loads(row["imports"]),
46+
"ha_apis": json.loads(row["ha_apis"]),
47+
"network_domains": json.loads(row["network_domains"]),
48+
"py_files": row["py_files"],
49+
"js_files": row["js_files"],
50+
"total_lines": row["total_lines"],
51+
})
52+
53+
if len(fingerprints) < 2:
54+
return {"message": "Need at least 2 scanned components for cross-analysis"}
55+
56+
# 1. Shared network domains (domains used by 2+ components)
57+
domain_counter: Counter = Counter()
58+
domain_users: dict[str, list[str]] = {}
59+
for fp in fingerprints:
60+
for d in fp["network_domains"]:
61+
domain_counter[d] += 1
62+
domain_users.setdefault(d, []).append(fp["id"])
63+
shared_domains = [
64+
{"domain": d, "count": c, "components": domain_users[d]}
65+
for d, c in domain_counter.most_common()
66+
if c >= 2
67+
]
68+
69+
# 2. Unusual shared imports (non-stdlib imports used by 2+ components)
70+
stdlib = {
71+
"os", "sys", "json", "re", "logging", "pathlib", "typing", "datetime",
72+
"collections", "functools", "itertools", "math", "hashlib", "uuid",
73+
"asyncio", "time", "io", "copy", "abc", "enum", "dataclasses",
74+
"contextlib", "unittest", "http", "urllib", "ssl", "socket",
75+
"threading", "multiprocessing", "subprocess", "shutil", "tempfile",
76+
"configparser", "argparse", "textwrap", "string", "struct", "base64",
77+
"homeassistant", "voluptuous", "aiohttp", # HA common deps
78+
}
79+
import_counter: Counter = Counter()
80+
import_users: dict[str, list[str]] = {}
81+
for fp in fingerprints:
82+
for imp in fp["imports"]:
83+
if imp not in stdlib:
84+
import_counter[imp] += 1
85+
import_users.setdefault(imp, []).append(fp["id"])
86+
shared_imports = [
87+
{"import": imp, "count": c, "components": import_users[imp]}
88+
for imp, c in import_counter.most_common()
89+
if c >= 2
90+
]
91+
92+
# 3. Outlier components (significantly larger/smaller than average)
93+
if len(fingerprints) >= 3:
94+
lines = [fp["total_lines"] for fp in fingerprints if fp["total_lines"] > 0]
95+
if lines:
96+
avg_lines = sum(lines) / len(lines)
97+
outliers = []
98+
for fp in fingerprints:
99+
if fp["total_lines"] > 0:
100+
ratio = fp["total_lines"] / avg_lines if avg_lines else 0
101+
if ratio > 3 or ratio < 0.1:
102+
outliers.append({
103+
"component": fp["id"],
104+
"total_lines": fp["total_lines"],
105+
"avg_lines": round(avg_lines),
106+
"ratio": round(ratio, 2),
107+
"direction": "larger" if ratio > 3 else "smaller",
108+
})
109+
else:
110+
outliers = []
111+
else:
112+
outliers = []
113+
114+
# 4. Suspicious patterns: components connecting to the same unusual domains
115+
suspicious_domains = {"pastebin.com", "hastebin.com", "transfer.sh", "ngrok.io",
116+
"webhook.site", "requestbin.com", "pipedream.com"}
117+
suspicious_hits = [
118+
{"domain": d, "components": domain_users[d]}
119+
for d in domain_counter
120+
if any(s in d for s in suspicious_domains) and domain_counter[d] >= 1
121+
]
122+
123+
return {
124+
"total_components": len(fingerprints),
125+
"shared_domains": shared_domains[:20],
126+
"shared_imports": shared_imports[:20],
127+
"outlier_components": outliers,
128+
"suspicious_domains": suspicious_hits,
129+
}

ha-sandbox/app/main.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -524,6 +524,17 @@ async def api_scheduler_update(request: Request):
524524
return JSONResponse(content={"ok": True, **scheduler.status()})
525525

526526

527+
# --- Cross-Component Intelligence API (L.5) ---
528+
529+
@app.get("/api/intelligence")
530+
async def api_cross_component():
531+
"""Cross-component pattern analysis across all scanned components."""
532+
from app.learning.cross_component import analyze_cross_component
533+
conn = storage.get_conn()
534+
result = analyze_cross_component(conn)
535+
return JSONResponse(content=result)
536+
537+
527538
@app.get("/api/system")
528539
async def api_system_info():
529540
repos_dir = Path(app_settings.get("repos_dir", "/data/repos"))

ha-sandbox/config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
name: "HA Security Sandbox"
2-
version: "0.16.0"
2+
version: "0.17.0"
33
slug: ha_security_sandbox
44
description: "Security scanner for Home Assistant custom components — static analysis + AI review"
55
url: "https://github.com/jrx-code/ha-security-sandbox"

0 commit comments

Comments
 (0)