Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 15 additions & 3 deletions powerplantmatching/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -2408,8 +2408,12 @@ def MASTR(

cols = ["NutzbareSpeicherkapazitaet", "VerknuepfteEinheit"]
with ZipFile(fn, "r") as file:
fn_storage_units = (
"bnetza_open_mastr_2025-02-09/bnetza_mastr_storage_units_raw.csv"
# Match by suffix rather than a hardcoded dated folder, so newer
# open-mastr dumps (different bnetza_open_mastr_<date>/ prefix) load.
fn_storage_units = next(
name
for name in file.namelist()
if name.endswith("bnetza_mastr_storage_units_raw.csv")
)
storage_units = pd.read_csv(file.open(fn_storage_units), usecols=cols)

Expand Down Expand Up @@ -2473,8 +2477,16 @@ def MASTR(
parse_columns=PARSE_COLUMNS,
)
.assign(
# ThermischeNutzleistung is present in the Zenodo CSV dump but absent
# from the open-mastr bulk export; fall back to KwkMastrNummer alone.
Set=lambda df: df["Set"].where(
df["KwkMastrNummer"].isna() & df["ThermischeNutzleistung"].isna(), "CHP"
df["KwkMastrNummer"].isna()
& (
df["ThermischeNutzleistung"].isna()
if "ThermischeNutzleistung" in df.columns
else True
),
"CHP",
),
)
)
Expand Down
5 changes: 4 additions & 1 deletion powerplantmatching/package_data/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,10 @@ MASTR:
reliability_score: 7
status: ["In Betrieb", "In Planung", "Endgültig stillgelegt", "Vorübergehend stillgelegt"]
capacity_threshold: 0.1 # all values below will be filtered out, given in MW
fn: bnetza_open_mastr_2025-02-09.zip
# Glob selects the newest local dump (see get_raw_file). Build a current one
# from the open-mastr bulk export with scripts/build_mastr_zip_from_open_mastr.py;
# otherwise the frozen Zenodo dump below (last published 2025-02-09) is fetched.
fn: bnetza_open_mastr_*.zip
url: https://zenodo.org/records/14783581/files/bnetza_open_mastr_2025-02-09.zip
EESI:
net_capacity: true
Expand Down
19 changes: 18 additions & 1 deletion powerplantmatching/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,9 @@
import os
import re
from ast import literal_eval as liteval
from glob import glob
from importlib.metadata import version
from urllib.parse import urlparse

import country_converter as coco
import numpy as np
Expand Down Expand Up @@ -76,7 +78,22 @@ def get_raw_file(name, update=False, config=None, skip_retrieve=False):
if config is None:
config = get_config()
df_config = config[name]
path = _data_in(df_config["fn"])
fn = df_config["fn"]

# A glob pattern in `fn` selects the most recent matching local file
# (e.g. a locally built, dated dump such as a fresh open-mastr export).
# ISO-dated filenames sort chronologically, so the last match is the
# newest. The newest local match wins even when update=True: the URL is
# only a seed and can never be newer than a dated local build, so a forced
# refresh must not silently regress to stale data. Falls back to
# downloading the URL's basename when nothing local matches.
if any(c in fn for c in "*?["):
matches = sorted(glob(_data_in(fn)))
if matches:
return matches[-1]
path = _data_in(os.path.basename(urlparse(df_config["url"]).path))
else:
path = _data_in(fn)

if (not os.path.exists(path) or update) and not skip_retrieve:
url = df_config["url"]
Expand Down
145 changes: 145 additions & 0 deletions scripts/build_mastr_zip_from_open_mastr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
# SPDX-FileCopyrightText: Contributors to powerplantmatching <https://github.com/pypsa/powerplantmatching>
#
# SPDX-License-Identifier: MIT

"""
Build a powerplantmatching-compatible MaStR zip from an open-mastr SQLite DB.

open-mastr's bulk download (`Mastr().download()`) writes the Marktstammdatenregister
into ~/.open-MaStR/data/sqlite/open-mastr.db. powerplantmatching's MASTR() loader,
however, reads a zip of `*_raw.csv` files (the layout of the Zenodo open-mastr dump).

This script bridges the two: it exports the technology tables that ppm's loader
consumes into exactly those CSVs and zips them, so a fresh bulk download can refresh
ppm's MaStR source without waiting for the (infrequent) Zenodo re-release.

Usage:
python scripts/build_mastr_zip_from_open_mastr.py [--db PATH] [--out PATH] [--date YYYY-MM-DD]

Then point config.yaml MASTR.fn at the produced filename (it lands in ppm's data dir).
"""

from __future__ import annotations

import argparse
import os
import sqlite3
import tempfile
import zipfile
from pathlib import Path

import pandas as pd

# DB table -> ppm CSV filename suffix (ppm matches by str.endswith).
TABLE_TO_CSV = {
"biomass_extended": "bnetza_mastr_biomass_raw.csv",
"combustion_extended": "bnetza_mastr_combustion_raw.csv",
"nuclear_extended": "bnetza_mastr_nuclear_raw.csv",
"hydro_extended": "bnetza_mastr_hydro_raw.csv",
"wind_extended": "bnetza_mastr_wind_raw.csv",
"solar_extended": "bnetza_mastr_solar_raw.csv",
"storage_extended": "bnetza_mastr_storage_raw.csv",
"storage_units": "bnetza_mastr_storage_units_raw.csv",
}

# Columns ppm's MASTR() loader reads from the *_extended tables (see data.py).
# Exporting only these keeps the 6M-row solar CSV to a sane size; ppm intersects
# with what is present, so a missing column in one table is harmless.
EXTENDED_COLUMNS = [
# target_columns
"GeplantesInbetriebnahmedatum",
"ThermischeNutzleistung",
"KwkMastrNummer",
"Batterietechnologie",
"DatumBeginnVoruebergehendeStilllegung",
"DatumWiederaufnahmeBetrieb",
"Postleitzahl",
"Ort",
"Gemeinde",
"Landkreis",
"Lage",
# PARSE_COLUMNS (Filesuffix is added by ppm, not sourced)
"ArtDerWasserkraftanlage",
"Biomasseart",
"Energietraeger",
"Hauptbrennstoff",
"NameStromerzeugungseinheit",
"NameKraftwerksblock",
"NameWindpark",
"Technologie",
# RENAME_COLUMNS keys
"EinheitMastrNummer",
"NameKraftwerk",
"Land",
"Nettonennleistung",
"Inbetriebnahmedatum",
"DatumEndgueltigeStilllegung",
"EinheitBetriebsstatus",
"Laengengrad",
"Breitengrad",
"WEIC",
]
STORAGE_UNITS_COLUMNS = ["NutzbareSpeicherkapazitaet", "VerknuepfteEinheit"]

DEFAULT_DB = Path.home() / ".open-MaStR" / "data" / "sqlite" / "open-mastr.db"
DEFAULT_OUT_DIR = (
Path.home() / ".local" / "share" / "powerplantmatching" / "data" / "in"
)


def _existing_columns(con: sqlite3.Connection, table: str) -> list[str]:
return [r[1] for r in con.execute(f"PRAGMA table_info({table})")]


def build(db_path: Path, out_path: Path, date_tag: str) -> None:
con = sqlite3.connect(db_path)
folder = f"bnetza_open_mastr_{date_tag}"
out_path.parent.mkdir(parents=True, exist_ok=True)

with (
tempfile.TemporaryDirectory() as tmp,
zipfile.ZipFile(
out_path, "w", compression=zipfile.ZIP_DEFLATED, compresslevel=6
) as zf,
):
for table, csv_name in TABLE_TO_CSV.items():
avail = _existing_columns(con, table)
if not avail:
print(f" skip {table}: table missing/empty")
continue
wanted = (
STORAGE_UNITS_COLUMNS if table == "storage_units" else EXTENDED_COLUMNS
)
cols = [c for c in wanted if c in avail]
if not cols:
print(f" skip {table}: none of the wanted columns present")
continue
df = pd.read_sql(f"SELECT {', '.join(cols)} FROM {table}", con)
csv_path = Path(tmp) / csv_name
df.to_csv(csv_path, index=False)
zf.write(csv_path, arcname=f"{folder}/{csv_name}")
print(f" {table:22} -> {csv_name:38} rows={len(df):>9} cols={len(cols)}")
con.close()
print(f"\nWrote {out_path} ({out_path.stat().st_size / 1e6:.0f} MB)")


def main() -> None:
ap = argparse.ArgumentParser()
ap.add_argument("--db", type=Path, default=DEFAULT_DB)
ap.add_argument(
"--date", default=None, help="date tag for folder/filename, e.g. 2026-06-14"
)
ap.add_argument("--out", type=Path, default=None)
args = ap.parse_args()

date_tag = args.date or os.environ.get("MASTR_DATE_TAG")
if not date_tag:
raise SystemExit("pass --date YYYY-MM-DD (the bulk export date)")

out_path = args.out or (DEFAULT_OUT_DIR / f"bnetza_open_mastr_{date_tag}.zip")
print(f"DB: {args.db}\nOut: {out_path}\n")
build(args.db, out_path, date_tag)


if __name__ == "__main__":
main()