Skip to content

Commit 0012553

Browse files
committed
one more test to fix
1 parent 861a0cd commit 0012553

7 files changed

Lines changed: 288 additions & 93 deletions

File tree

README.md

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,8 +120,29 @@ uvx pre-commit run --all-files
120120
git add -A
121121
uvx pre-commit run --all-files
122122

123+
# generate data
123124
uv run python -m sensor_sim.data_maker
124125

126+
# confirm
127+
uv run python -c "from pathlib import Path; [print(p, p.exists(), p.stat().st_size) for p in Path('data').glob('*.csv')]"
128+
129+
uv run python -c "import pandas as pd; print(pd.read_csv('data/sample_batch.csv').head()); print(pd.read_csv('data/sample_batch.csv').columns.tolist())"
130+
131+
# see what the analyzer is returning
132+
uv run python -c "from sensor_sim.generator import GeneratorConfig, generate_batch; from sensor_sim.processor import analyze_batch; r=generate_batch('spike', GeneratorConfig(batch_size=800, num_sensors=1, seed=1)); a=analyze_batch(r); print(a.findings)"
133+
134+
uv run python -c "from sensor_sim.generator import GeneratorConfig, generate_batch; from sensor_sim.processor import analyze_batch; r=generate_batch('multi_sensor_divergence', GeneratorConfig(batch_size=800, num_sensors=3, seed=1)); a=analyze_batch(r); print(a.findings)"
135+
136+
# inspect spikes
137+
uv run python -c "from sensor_sim.generator import GeneratorConfig, generate_batch; from sensor_sim.processor import analyze_batch; r=generate_batch('spike', GeneratorConfig(batch_size=800, num_sensors=1, seed=1)); a=analyze_batch(r); print(a.findings)"
138+
139+
# inspect divergence
140+
uv run python -c "from sensor_sim.generator import GeneratorConfig, generate_batch; r=generate_batch('multi_sensor_divergence', GeneratorConfig(batch_size=800, num_sensors=3, seed=1)); print(r[-20:])"
141+
142+
# run pytest
143+
uv run pytest
144+
145+
125146
uv run ruff format .
126147
uv run ruff check . --fix
127148
uv run zensical build

data/data_sets.toml

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,10 @@ nominal_frequency_at_25c_hz = 100000.0
88
sensitivity_hz_per_kelvin = 512.0
99
noise_stddev_hz = 8.0
1010
seed = 42
11+
spike_magnitude_hz = 250.0
12+
divergence_offset_hz = 250.0
13+
dropout_fraction = 0.05
14+
drift_extra_slope_hz_per_sample = 3.0
1115

1216
[[datasets]]
1317
name = "sample_batch"
@@ -29,3 +33,12 @@ scenario = "spike"
2933
name = "multi_sensor_batch"
3034
scenario = "multi_sensor_divergence"
3135
num_sensors = 3
36+
37+
[processor]
38+
spike_relative_threshold = 0.0000002
39+
divergence_relative_threshold = 0.00000002
40+
drift_offset_percent = 2.0
41+
baseline_window = 100
42+
comparison_window = 100
43+
early_window = 100
44+
late_window = 100

pyproject.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ version = "0.1.0"
1212

1313
dependencies = [
1414
"httpx", # WHY: HTTP client for making API requests.
15+
"pandas", # WHY: Data manipulation and analysis library.
1516
"rich", # WHY: Rich library for enhanced console output and logging.
1617
]
1718

@@ -22,6 +23,7 @@ dev = [
2223
"packaging",
2324
"pre-commit",
2425
"pyright",
26+
"pytest",
2527
"ruff",
2628
"validate-pyproject",
2729
]

src/sensor_sim/models.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,19 @@ class GeneratorConfig:
3939
seed: int = 42
4040

4141

42+
@dataclass(frozen=True)
43+
class ProcessorConfig:
44+
"""Configuration for anomaly detection processing."""
45+
46+
spike_relative_threshold: float = 0.0
47+
divergence_relative_threshold: float = 0.0
48+
drift_offset_percent: float = 2.0
49+
baseline_window: int = 100
50+
comparison_window: int = 100
51+
early_window: int = 100
52+
late_window: int = 100
53+
54+
4255
@dataclass(frozen=True)
4356
class AnomalyFinding:
4457
"""A single anomaly or suspicious finding."""

src/sensor_sim/processor.py

Lines changed: 82 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -8,14 +8,18 @@
88

99
from __future__ import annotations
1010

11-
from statistics import mean, pstdev
12-
from typing import Iterable
11+
from statistics import mean
1312

14-
from sensor_sim.models import AnomalyFinding, BatchResult, SensorReading
13+
from sensor_sim.models import (
14+
AnomalyFinding,
15+
BatchResult,
16+
ProcessorConfig,
17+
SensorReading,
18+
)
1519

1620

1721
def _group_by_sensor(
18-
readings: Iterable[SensorReading],
22+
readings: list[SensorReading],
1923
) -> dict[str, list[SensorReading]]:
2024
"""Group readings by sensor identifier."""
2125
grouped: dict[str, list[SensorReading]] = {}
@@ -26,12 +30,10 @@ def _group_by_sensor(
2630
return grouped
2731

2832

29-
def _non_null_frequencies(readings: Iterable[SensorReading]) -> list[float]:
33+
def _non_null_frequencies(readings: list[SensorReading]) -> list[float]:
3034
"""Return frequency values excluding null readings."""
3135
return [
32-
reading.frequency_hz
33-
for reading in readings
34-
if reading.frequency_hz is not None
36+
reading.frequency_hz for reading in readings if reading.frequency_hz is not None
3537
]
3638

3739

@@ -57,35 +59,34 @@ def detect_dropouts(readings: list[SensorReading]) -> list[AnomalyFinding]:
5759

5860
def detect_spikes(
5961
readings: list[SensorReading],
60-
sigma_threshold: float = 5.0,
62+
config: ProcessorConfig,
6163
) -> list[AnomalyFinding]:
62-
"""Detect abrupt single-sample spikes."""
63-
values = _non_null_frequencies(readings)
64-
if len(values) < 3:
65-
return []
64+
"""Detect abrupt single-sample spikes using local context."""
65+
findings: list[AnomalyFinding] = []
6666

67-
center = mean(values)
68-
spread = pstdev(values)
69-
if spread == 0.0:
70-
return []
67+
if len(readings) < 3:
68+
return findings
7169

72-
findings: list[AnomalyFinding] = []
70+
for i in range(1, len(readings) - 1):
71+
current = readings[i].frequency_hz
72+
prev = readings[i - 1].frequency_hz
73+
nxt = readings[i + 1].frequency_hz
7374

74-
for reading in readings:
75-
if reading.frequency_hz is None:
75+
if current is None or prev is None or nxt is None:
7676
continue
77-
z_score = abs(reading.frequency_hz - center) / spread
78-
if z_score > sigma_threshold:
77+
78+
local_mean = (prev + nxt) / 2.0
79+
delta = abs(current - local_mean)
80+
81+
# treat as spike if large relative jump
82+
if local_mean != 0 and delta / abs(local_mean) > config.spike_relative_threshold:
7983
findings.append(
8084
AnomalyFinding(
81-
sensor_id=reading.sensor_id,
85+
sensor_id=readings[i].sensor_id,
8286
kind="spike",
83-
start_sample=reading.sample_index,
84-
end_sample=reading.sample_index,
85-
message=(
86-
f"Single-sample deviation exceeds {sigma_threshold} sigma "
87-
f"(z={z_score:.2f})."
88-
),
87+
start_sample=readings[i].sample_index,
88+
end_sample=readings[i].sample_index,
89+
message=f"Single-sample spike detected (delta={delta:.2f} Hz).",
8990
severity="high",
9091
)
9192
)
@@ -115,7 +116,9 @@ def detect_drift(
115116
):
116117
window = values[start_index : start_index + comparison_window]
117118
window_mean = mean(window)
119+
118120
offset = window_mean - baseline
121+
offset_pct = abs(offset) / baseline * 100.0
119122

120123
if abs(offset) > threshold:
121124
findings.append(
@@ -127,7 +130,7 @@ def detect_drift(
127130
message=(
128131
"Sustained deviation from calibrated baseline exceeds "
129132
f"{drift_offset_percent:.1f}% "
130-
f"(offset={offset:.2f} Hz)."
133+
f"(offset={offset:.2f} Hz, {offset_pct:.1f}%)."
131134
),
132135
severity="medium",
133136
)
@@ -137,6 +140,51 @@ def detect_drift(
137140
return findings
138141

139142

143+
def detect_divergence(
144+
grouped: dict[str, list[SensorReading]],
145+
) -> list[AnomalyFinding]:
146+
"""Detect per-sample divergence between sensors."""
147+
findings: list[AnomalyFinding] = []
148+
149+
if not grouped:
150+
return findings
151+
152+
sensors = list(grouped.keys())
153+
sample_count = min(len(r) for r in grouped.values())
154+
155+
for i in range(sample_count):
156+
values = []
157+
ids = []
158+
159+
for sensor_id in sensors:
160+
v = grouped[sensor_id][i].frequency_hz
161+
if v is not None:
162+
values.append(v)
163+
ids.append(sensor_id)
164+
165+
if len(values) < 2:
166+
continue
167+
168+
avg = mean(values)
169+
if avg == 0:
170+
continue
171+
172+
for sensor_id, v in zip(ids, values, strict=False):
173+
if abs(v - avg) / avg > 0.01:
174+
findings.append(
175+
AnomalyFinding(
176+
sensor_id=sensor_id,
177+
kind="divergence",
178+
start_sample=i,
179+
end_sample=i,
180+
message="Sensor diverges from peer group.",
181+
severity="medium",
182+
)
183+
)
184+
185+
return findings
186+
187+
140188
def detect_suspicious_trend(
141189
readings: list[SensorReading],
142190
early_window: int = 100,
@@ -172,62 +220,19 @@ def detect_suspicious_trend(
172220
]
173221

174222

175-
def detect_multi_sensor_divergence(
176-
readings: list[SensorReading],
177-
deviation_percent: float = 2.0,
178-
) -> list[AnomalyFinding]:
179-
"""Detect a sensor diverging from correlated peers."""
180-
grouped = _group_by_sensor(readings)
181-
if len(grouped) < 2:
182-
return []
183-
184-
sensor_means: dict[str, float] = {}
185-
for sensor_id, sensor_readings in grouped.items():
186-
values = _non_null_frequencies(sensor_readings)
187-
if not values:
188-
continue
189-
sensor_means[sensor_id] = mean(values)
190-
191-
if len(sensor_means) < 2:
192-
return []
193-
194-
global_mean = mean(sensor_means.values())
195-
findings: list[AnomalyFinding] = []
196-
197-
for sensor_id, sensor_mean in sensor_means.items():
198-
percent_difference = abs(sensor_mean - global_mean) / global_mean * 100.0
199-
if percent_difference > deviation_percent:
200-
sensor_readings = grouped[sensor_id]
201-
findings.append(
202-
AnomalyFinding(
203-
sensor_id=sensor_id,
204-
kind="divergence",
205-
start_sample=sensor_readings[0].sample_index,
206-
end_sample=sensor_readings[-1].sample_index,
207-
message=(
208-
"Sensor mean deviates from peer group by more than "
209-
f"{deviation_percent:.1f}%."
210-
),
211-
severity="medium",
212-
)
213-
)
214-
215-
return findings
216-
217-
218-
def analyze_batch(readings: list[SensorReading]) -> BatchResult:
223+
def analyze_batch(readings: list[SensorReading], config: ProcessorConfig) -> BatchResult:
219224
"""Run all supported anomaly checks over a batch."""
220225
grouped = _group_by_sensor(readings)
221226
findings: list[AnomalyFinding] = []
222227

228+
findings.extend(detect_divergence(grouped))
229+
223230
for _sensor_id, sensor_readings in grouped.items():
224231
findings.extend(detect_dropouts(sensor_readings))
225-
findings.extend(detect_spikes(sensor_readings))
232+
findings.extend(detect_spikes(sensor_readings, config))
226233
findings.extend(detect_drift(sensor_readings))
227234
findings.extend(detect_suspicious_trend(sensor_readings))
228235

229-
findings.extend(detect_multi_sensor_divergence(readings))
230-
231236
return BatchResult(
232237
total_readings=len(readings),
233238
sensors=tuple(sorted(grouped.keys())),

0 commit comments

Comments
 (0)