|
1 | 1 | """Generate synthetic time series with anomaly injection for groundwater wells.""" |
| 2 | + |
2 | 3 | import json |
3 | | -import os |
4 | 4 | from datetime import datetime, timedelta |
5 | 5 | from pathlib import Path |
6 | 6 |
|
@@ -178,59 +178,64 @@ def generate_well_timeseries( |
178 | 178 | data["debit_ls"] = AnomalyInjector.gradual_decline( |
179 | 179 | data["debit_ls"], start, duration, decline_pct |
180 | 180 | ) |
181 | | - anomaly_log.append({ |
182 | | - "type": "debit_decline", |
183 | | - "start_idx": start, |
184 | | - "duration": duration, |
185 | | - "decline_pct": round(decline_pct, 3), |
186 | | - "parameter": "debit_ls", |
187 | | - }) |
| 181 | + anomaly_log.append( |
| 182 | + { |
| 183 | + "type": "debit_decline", |
| 184 | + "start_idx": start, |
| 185 | + "duration": duration, |
| 186 | + "decline_pct": round(decline_pct, 3), |
| 187 | + "parameter": "debit_ls", |
| 188 | + } |
| 189 | + ) |
188 | 190 |
|
189 | 191 | elif atype == "tds_spike": |
190 | 192 | spike_factor = float(rng.uniform(1.5, 3.0)) |
191 | 193 | spike_dur = max(duration // 3, 10) |
192 | 194 | data["tds_mgl"] = AnomalyInjector.sudden_spike( |
193 | 195 | data["tds_mgl"], start, spike_dur, spike_factor |
194 | 196 | ) |
195 | | - anomaly_log.append({ |
196 | | - "type": "tds_spike", |
197 | | - "start_idx": start, |
198 | | - "duration": spike_dur, |
199 | | - "spike_factor": round(spike_factor, 2), |
200 | | - "parameter": "tds_mgl", |
201 | | - }) |
| 197 | + anomaly_log.append( |
| 198 | + { |
| 199 | + "type": "tds_spike", |
| 200 | + "start_idx": start, |
| 201 | + "duration": spike_dur, |
| 202 | + "spike_factor": round(spike_factor, 2), |
| 203 | + "parameter": "tds_mgl", |
| 204 | + } |
| 205 | + ) |
202 | 206 |
|
203 | 207 | elif atype == "sensor_fault": |
204 | 208 | fault_dur = max(duration // 5, 5) |
205 | 209 | data["ph"] = AnomalyInjector.sensor_fault( |
206 | 210 | data["ph"], start, fault_dur, fault_value=0.0 |
207 | 211 | ) |
208 | | - anomaly_log.append({ |
209 | | - "type": "sensor_fault", |
210 | | - "start_idx": start, |
211 | | - "duration": fault_dur, |
212 | | - "fault_value": 0.0, |
213 | | - "parameter": "ph", |
214 | | - }) |
| 212 | + anomaly_log.append( |
| 213 | + { |
| 214 | + "type": "sensor_fault", |
| 215 | + "start_idx": start, |
| 216 | + "duration": fault_dur, |
| 217 | + "fault_value": 0.0, |
| 218 | + "parameter": "ph", |
| 219 | + } |
| 220 | + ) |
215 | 221 |
|
216 | 222 | # Build timestamps |
217 | 223 | start_date = datetime(2024, 1, 1) |
218 | 224 | hours_step = 24 // measurements_per_day |
219 | | - timestamps = [ |
220 | | - start_date + timedelta(hours=i * hours_step) |
221 | | - for i in range(n_points) |
222 | | - ] |
223 | | - |
224 | | - df = pd.DataFrame({ |
225 | | - "timestamp": timestamps, |
226 | | - "well_id": well_id, |
227 | | - "debit_ls": np.round(data["debit_ls"], 3), |
228 | | - "tds_mgl": np.round(data["tds_mgl"], 1), |
229 | | - "ph": np.round(data["ph"], 2), |
230 | | - "chloride_mgl": np.round(data["chloride_mgl"], 1), |
231 | | - "water_level_m": np.round(data["water_level_m"], 2), |
232 | | - "temperature_c": np.round(data["temperature_c"], 1), |
233 | | - }) |
| 225 | + timestamps = [start_date + timedelta(hours=i * hours_step) for i in range(n_points)] |
| 226 | + |
| 227 | + df = pd.DataFrame( |
| 228 | + { |
| 229 | + "timestamp": timestamps, |
| 230 | + "well_id": well_id, |
| 231 | + "debit_ls": np.round(data["debit_ls"], 3), |
| 232 | + "tds_mgl": np.round(data["tds_mgl"], 1), |
| 233 | + "ph": np.round(data["ph"], 2), |
| 234 | + "chloride_mgl": np.round(data["chloride_mgl"], 1), |
| 235 | + "water_level_m": np.round(data["water_level_m"], 2), |
| 236 | + "temperature_c": np.round(data["temperature_c"], 1), |
| 237 | + } |
| 238 | + ) |
234 | 239 |
|
235 | 240 | return df, anomaly_log |
236 | 241 |
|
@@ -274,9 +279,7 @@ def main(): |
274 | 279 | well_ids = [f["properties"]["id"] for f in geojson["features"]] |
275 | 280 | obs_dir = data_dir / "observations" |
276 | 281 |
|
277 | | - result = generate_all_timeseries( |
278 | | - well_ids, days=365, output_dir=str(obs_dir) |
279 | | - ) |
| 282 | + result = generate_all_timeseries(well_ids, days=365, output_dir=str(obs_dir)) |
280 | 283 | print(f"Generated {len(result)} time series -> observations/") |
281 | 284 |
|
282 | 285 |
|
|
0 commit comments