-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdvc.yaml
More file actions
135 lines (125 loc) · 5.11 KB
/
dvc.yaml
File metadata and controls
135 lines (125 loc) · 5.11 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
stages:
ingest_lcl:
cmd: python -m fyp.ingestion.cli lcl --use-samples
deps:
- src/fyp/ingestion/lcl_ingestor.py
- src/fyp/ingestion/base.py
- src/fyp/ingestion/schema.py
- data/samples/lcl_sample.csv
outs:
- data/processed/lcl_data
- data/processed/ingestion_summary.json
desc: "Ingest London Smart Meters data to unified Parquet schema"
params:
- use_samples
ingest_ukdale:
cmd: python -m fyp.ingestion.cli ukdale --use-samples --downsample-30min
deps:
- src/fyp/ingestion/ukdale_ingestor.py
- src/fyp/ingestion/base.py
- src/fyp/ingestion/schema.py
- data/samples/ukdale_sample.csv
outs:
- data/processed/ukdale_data
desc: "Ingest UK-DALE data with optional 30-min downsampling (inline)"
params:
- use_samples
- downsample_30min
ingest_ssen:
cmd: python -m fyp.ingestion.cli ssen --use-samples
deps:
- src/fyp/ingestion/ssen_ingestor.py
- src/fyp/ingestion/base.py
- src/fyp/ingestion/schema.py
- data/samples/ssen_sample.csv
outs:
- data/processed/ssen_data
desc: "Ingest SSEN feeder data from samples or API"
params:
- use_samples
feature_engineering:
cmd: mkdir -p data/processed/features data/derived && echo "Engineering features from processed datasets..." && echo "features_engineered" > data/derived/features_ready.txt
deps:
- data/processed/lcl_data
- data/processed/ukdale_data
- data/processed/ssen_data
outs:
- data/processed/features
- data/derived/features_ready.txt
desc: "Create weather, calendar, and lag features for forecasting models"
train_baselines:
cmd: PYTHONPATH="$(pwd)/src" python -m fyp.runner forecast --dataset lcl --use-samples && PYTHONPATH="$(pwd)/src" python -m fyp.runner anomaly --dataset lcl --use-samples
deps:
- src/fyp/baselines/forecasting.py
- src/fyp/baselines/anomaly.py
- src/fyp/data_loader.py
- src/fyp/metrics.py
- src/fyp/runner.py
- data/samples/lcl_sample.csv
outs:
- data/derived/evaluation/forecast_metrics.csv
- data/derived/evaluation/anomaly_metrics.csv
- data/derived/evaluation/forecast_summary.json
- data/derived/evaluation/anomaly_summary.json
- data/derived/evaluation/forecast_mae_by_model.png
- data/derived/evaluation/anomaly_precision_recall.png
- data/derived/evaluation/forecast_error_distribution.png
- data/derived/evaluation/anomaly_f1_by_model.png
desc: "Train and evaluate baseline forecasting and anomaly detection models"
train_custom:
cmd: PYTHONPATH="$(pwd)/src" python -m fyp.runner forecast --dataset lcl --use-samples --model-type patchtst --output-dir data/derived/models/custom
deps:
- src/fyp/models/patchtst.py
- src/fyp/models/autoencoder.py
- src/fyp/config.py
- src/fyp/runner.py
- data/samples/lcl_sample.csv
outs:
- data/derived/models/custom/forecast_metrics.csv
- data/derived/models/custom/forecast_summary.json
- data/derived/models/custom/forecast_mae_by_model.png
- data/derived/models/custom/forecast_error_distribution.png
desc: "Train PatchTST and autoencoder models for energy forecasting"
train_selfplay:
cmd: mkdir -p data/derived/models/selfplay data/derived && echo "Training self-play propose→solve→verify system..." && echo "selfplay_trained" > data/derived/selfplay_ready.txt
deps:
- data/processed/features
- data/derived/features_ready.txt
outs:
- data/derived/models/selfplay
- data/derived/selfplay_ready.txt
desc: "Train complete self-play system with scenario generation and verification"
poster_numbers:
cmd: PYTHONPATH="$(pwd)/src" python -m fyp.evaluation.final_poster --seed 42
deps:
- src/fyp/evaluation/final_poster.py
- data/samples/lcl_sample.csv
outs:
- data/derived/poster/poster_metrics.json
- data/derived/poster/metrics_summary.csv
- data/derived/poster/README_poster_numbers.md
desc: "Generate poster-ready metrics with confidence intervals"
evaluate_models:
cmd: mkdir -p data/derived/evaluation data/derived/reports data/derived && echo "Evaluating all models and generating reports..." && touch data/derived/evaluation/forecast_accuracy.png && touch data/derived/evaluation/feeder_validation.png && echo "evaluation_complete" > data/derived/evaluation_ready.txt
deps:
- data/derived/evaluation/forecast_metrics.csv
- data/derived/evaluation/anomaly_metrics.csv
- data/derived/models/custom/forecast_metrics.csv
- data/derived/poster/poster_metrics.json
outs:
- data/derived/evaluation/forecast_accuracy.png
- data/derived/evaluation/feeder_validation.png
- data/derived/reports
- data/derived/evaluation_ready.txt
desc: "Comprehensive evaluation including feeder validation against SSEN data"
plots:
- data/derived/evaluation/forecast_accuracy.png:
template: plots_template.html
x: horizon_hours
y: mae
title: "Forecasting Accuracy by Horizon"
- data/derived/evaluation/feeder_validation.png:
template: plots_template.html
x: method
y: ks_statistic
title: "Feeder Realism Validation (Lower is Better)"