forked from emmericp/marktstammdatenplotter
-
Notifications
You must be signed in to change notification settings - Fork 0
150 lines (130 loc) · 5.42 KB
/
refresh-docs.yml
File metadata and controls
150 lines (130 loc) · 5.42 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
name: Refresh docs
# Weekly re-scrape of the MaStR registry, re-render of the marimo notebooks,
# regeneration of all sample SVGs + the wind animation GIF, then commit the
# updated artifacts back to main so GitHub Pages picks them up.
on:
schedule:
# Sundays at 03:17 UTC. Picked off-the-hour to dodge cron stampedes.
- cron: "17 3 * * 0"
workflow_dispatch:
inputs:
pages_to_scrape_pv:
description: "Number of 25k-row pages of top-sorted PV plants to fetch"
default: "8"
pages_to_scrape_wind:
description: "Number of 25k-row pages of non-PV plants to fetch"
default: "7"
permissions:
contents: write
concurrency:
group: refresh-docs
cancel-in-progress: false
jobs:
refresh:
runs-on: ubuntu-latest
timeout-minutes: 60
steps:
- name: Checkout
uses: actions/checkout@v4
with:
fetch-depth: 1
- name: Install pixi
uses: prefix-dev/setup-pixi@v0.8.10
with:
pixi-version: latest
cache: true
- name: Resolve env
run: pixi install
- name: Refresh open-mastr SQLite snapshot (wind, solar, storage, storage_units, market)
# ~6 GB DB + 2 GB ZIP fit comfortably on the runner; drop the ZIP after
# parse to keep disk pressure off the subsequent render steps.
# SQLITE_DATABASE_PATH (read by open-mastr/helpers.py:50) directs the
# write to the repo path so mastr_db.DB_PATH resolves to _REPO_DB.
env:
SQLITE_DATABASE_PATH: ${{ github.workspace }}/data/mastr/open-mastr.db
run: |
mkdir -p data/mastr
pixi run db-mastr-core
rm -f ~/.open-MaStR/data/xml_download/*.zip || true
- name: Fetch Kreis polygons
run: pixi run fetch-kreise
- name: Scrape MaStR (non-PV — wind, hydro, biomass…)
env:
PAGES_NON_PV: ${{ github.event.inputs.pages_to_scrape_wind || '7' }}
run: |
mkdir -p data
for p in $(seq 1 "${PAGES_NON_PV}"); do
curl --get \
'https://www.marktstammdatenregister.de/MaStR/Einheit/EinheitJson/GetErweiterteOeffentlicheEinheitStromerzeugung' \
--data-urlencode 'sort=' \
--data-urlencode "page=${p}" \
--data-urlencode 'pageSize=25000' \
--data-urlencode 'group=' \
--data-urlencode "filter=Energieträger~neq~'2495'~and~Energieträger~neq~'2496'" \
--data-urlencode 'forExport=true' \
-o "data/data-${p}.json" &
done
wait
- name: Scrape MaStR (BESS top-200k)
run: |
mkdir -p data-bess
for p in $(seq 1 8); do
curl --get \
'https://www.marktstammdatenregister.de/MaStR/Einheit/EinheitJson/GetErweiterteOeffentlicheEinheitStromerzeugung' \
--data-urlencode 'sort=Bruttoleistung-desc' \
--data-urlencode "page=${p}" \
--data-urlencode 'pageSize=25000' \
--data-urlencode 'group=' \
--data-urlencode "filter=Energieträger~eq~'2496'" \
--data-urlencode 'forExport=true' \
-o "data-bess/data-${p}.json" &
done
wait
- name: Scrape MaStR (PV top-200k)
env:
PAGES_PV: ${{ github.event.inputs.pages_to_scrape_pv || '8' }}
run: |
mkdir -p data-pv
for p in $(seq 1 "${PAGES_PV}"); do
curl --get \
'https://www.marktstammdatenregister.de/MaStR/Einheit/EinheitJson/GetErweiterteOeffentlicheEinheitStromerzeugung' \
--data-urlencode 'sort=Bruttoleistung-desc' \
--data-urlencode "page=${p}" \
--data-urlencode 'pageSize=25000' \
--data-urlencode 'group=' \
--data-urlencode "filter=Energieträger~eq~'2495'" \
--data-urlencode 'forExport=true' \
-o "data-pv/data-${p}.json" &
done
wait
- name: Re-render sample SVGs + Bundesland chart
run: pixi run python scripts/render_samples.py
- name: Re-render wind + PV + BESS animation GIFs + MP4s
run: pixi run python scripts/render_wind_gif.py all
- name: Rebuild per-Kreis JSON
run: pixi run python scripts/build_kreise_json.py
- name: Build bulk-download artefacts
run: pixi run python scripts/build_downloads.py
- name: Run unit tests
run: pixi run test
- name: Re-export marimo HTML
run: pixi run docs-build
- name: Commit refreshed docs
run: |
git config user.name "github-actions[bot]"
git config user.email "41898282+github-actions[bot]@users.noreply.github.com"
git add fig/sample-*.svg \
fig/wind-*.gif fig/pv-*.gif fig/bess-*.gif \
fig/wind-*.mp4 fig/pv-*.mp4 fig/bess-*.mp4 \
docs/assets/sample-*.svg \
docs/assets/wind-*.gif docs/assets/pv-*.gif docs/assets/bess-*.gif \
docs/assets/wind-*.mp4 docs/assets/pv-*.mp4 docs/assets/bess-*.mp4 \
docs/assets/kreise.json \
docs/data/*.parquet docs/data/*.csv.gz \
docs/pv.html docs/wind.html || true
if git diff --cached --quiet; then
echo "No changes to commit."
else
git commit -m "Weekly refresh: re-scrape MaStR + re-render docs ($(date -u +%Y-%m-%d))"
git push
fi