PyPSA · MaykThewessen · Jun 17, 2026 · Jun 17, 2026 · Jun 19, 2026
diff --git a/powerplantmatching/data.py b/powerplantmatching/data.py
@@ -2408,8 +2408,12 @@ def MASTR(
 
     cols = ["NutzbareSpeicherkapazitaet", "VerknuepfteEinheit"]
     with ZipFile(fn, "r") as file:
-        fn_storage_units = (
-            "bnetza_open_mastr_2025-02-09/bnetza_mastr_storage_units_raw.csv"
+        # Match by suffix rather than a hardcoded dated folder, so newer
+        # open-mastr dumps (different bnetza_open_mastr_<date>/ prefix) load.
+        fn_storage_units = next(
+            name
+            for name in file.namelist()
+            if name.endswith("bnetza_mastr_storage_units_raw.csv")
         )
         storage_units = pd.read_csv(file.open(fn_storage_units), usecols=cols)
 
@@ -2473,8 +2477,16 @@ def MASTR(
             parse_columns=PARSE_COLUMNS,
         )
         .assign(
+            # ThermischeNutzleistung is present in the Zenodo CSV dump but absent
+            # from the open-mastr bulk export; fall back to KwkMastrNummer alone.
             Set=lambda df: df["Set"].where(
-                df["KwkMastrNummer"].isna() & df["ThermischeNutzleistung"].isna(), "CHP"
+                df["KwkMastrNummer"].isna()
+                & (
+                    df["ThermischeNutzleistung"].isna()
+                    if "ThermischeNutzleistung" in df.columns
+                    else True
+                ),
+                "CHP",
             ),
         )
     )

diff --git a/powerplantmatching/package_data/config.yaml b/powerplantmatching/package_data/config.yaml
@@ -226,7 +226,10 @@ MASTR:
   reliability_score: 7
   status: ["In Betrieb", "In Planung", "Endgültig stillgelegt", "Vorübergehend stillgelegt"]
   capacity_threshold: 0.1  # all values below will be filtered out, given in MW
-  fn: bnetza_open_mastr_2025-02-09.zip
+  # Glob selects the newest local dump (see get_raw_file). Build a current one
+  # from the open-mastr bulk export with scripts/build_mastr_zip_from_open_mastr.py;
+  # otherwise the frozen Zenodo dump below (last published 2025-02-09) is fetched.
+  fn: bnetza_open_mastr_*.zip
   url: https://zenodo.org/records/14783581/files/bnetza_open_mastr_2025-02-09.zip
 EESI:
   net_capacity: true

diff --git a/powerplantmatching/utils.py b/powerplantmatching/utils.py
@@ -10,7 +10,9 @@
 import os
 import re
 from ast import literal_eval as liteval
+from glob import glob
 from importlib.metadata import version
+from urllib.parse import urlparse
 
 import country_converter as coco
 import numpy as np
@@ -76,7 +78,22 @@ def get_raw_file(name, update=False, config=None, skip_retrieve=False):
     if config is None:
         config = get_config()
     df_config = config[name]
-    path = _data_in(df_config["fn"])
+    fn = df_config["fn"]
+
+    # A glob pattern in `fn` selects the most recent matching local file
+    # (e.g. a locally built, dated dump such as a fresh open-mastr export).
+    # ISO-dated filenames sort chronologically, so the last match is the
+    # newest. The newest local match wins even when update=True: the URL is
+    # only a seed and can never be newer than a dated local build, so a forced
+    # refresh must not silently regress to stale data. Falls back to
+    # downloading the URL's basename when nothing local matches.
+    if any(c in fn for c in "*?["):
+        matches = sorted(glob(_data_in(fn)))
+        if matches:
+            return matches[-1]
+        path = _data_in(os.path.basename(urlparse(df_config["url"]).path))
+    else:
+        path = _data_in(fn)
 
     if (not os.path.exists(path) or update) and not skip_retrieve:
         url = df_config["url"]

diff --git a/scripts/build_mastr_zip_from_open_mastr.py b/scripts/build_mastr_zip_from_open_mastr.py
@@ -0,0 +1,145 @@
+# SPDX-FileCopyrightText: Contributors to powerplantmatching <https://github.com/pypsa/powerplantmatching>
+#
+# SPDX-License-Identifier: MIT
+
+"""
+Build a powerplantmatching-compatible MaStR zip from an open-mastr SQLite DB.
+
+open-mastr's bulk download (`Mastr().download()`) writes the Marktstammdatenregister
+into ~/.open-MaStR/data/sqlite/open-mastr.db. powerplantmatching's MASTR() loader,
+however, reads a zip of `*_raw.csv` files (the layout of the Zenodo open-mastr dump).
+
+This script bridges the two: it exports the technology tables that ppm's loader
+consumes into exactly those CSVs and zips them, so a fresh bulk download can refresh
+ppm's MaStR source without waiting for the (infrequent) Zenodo re-release.
+
+Usage:
+    python scripts/build_mastr_zip_from_open_mastr.py [--db PATH] [--out PATH] [--date YYYY-MM-DD]
+
+Then point config.yaml MASTR.fn at the produced filename (it lands in ppm's data dir).
+"""
+
+from __future__ import annotations
+
+import argparse
+import os
+import sqlite3
+import tempfile
+import zipfile
+from pathlib import Path
+
+import pandas as pd
+
+# DB table -> ppm CSV filename suffix (ppm matches by str.endswith).
+TABLE_TO_CSV = {
+    "biomass_extended": "bnetza_mastr_biomass_raw.csv",
+    "combustion_extended": "bnetza_mastr_combustion_raw.csv",
+    "nuclear_extended": "bnetza_mastr_nuclear_raw.csv",
+    "hydro_extended": "bnetza_mastr_hydro_raw.csv",
+    "wind_extended": "bnetza_mastr_wind_raw.csv",
+    "solar_extended": "bnetza_mastr_solar_raw.csv",
+    "storage_extended": "bnetza_mastr_storage_raw.csv",
+    "storage_units": "bnetza_mastr_storage_units_raw.csv",
+}
+
+# Columns ppm's MASTR() loader reads from the *_extended tables (see data.py).
+# Exporting only these keeps the 6M-row solar CSV to a sane size; ppm intersects
+# with what is present, so a missing column in one table is harmless.
+EXTENDED_COLUMNS = [
+    # target_columns
+    "GeplantesInbetriebnahmedatum",
+    "ThermischeNutzleistung",
+    "KwkMastrNummer",
+    "Batterietechnologie",
+    "DatumBeginnVoruebergehendeStilllegung",
+    "DatumWiederaufnahmeBetrieb",
+    "Postleitzahl",
+    "Ort",
+    "Gemeinde",
+    "Landkreis",
+    "Lage",
+    # PARSE_COLUMNS (Filesuffix is added by ppm, not sourced)
+    "ArtDerWasserkraftanlage",
+    "Biomasseart",
+    "Energietraeger",
+    "Hauptbrennstoff",
+    "NameStromerzeugungseinheit",
+    "NameKraftwerksblock",
+    "NameWindpark",
+    "Technologie",
+    # RENAME_COLUMNS keys
+    "EinheitMastrNummer",
+    "NameKraftwerk",
+    "Land",
+    "Nettonennleistung",
+    "Inbetriebnahmedatum",
+    "DatumEndgueltigeStilllegung",
+    "EinheitBetriebsstatus",
+    "Laengengrad",
+    "Breitengrad",
+    "WEIC",
+]
+STORAGE_UNITS_COLUMNS = ["NutzbareSpeicherkapazitaet", "VerknuepfteEinheit"]
+
+DEFAULT_DB = Path.home() / ".open-MaStR" / "data" / "sqlite" / "open-mastr.db"
+DEFAULT_OUT_DIR = (
+    Path.home() / ".local" / "share" / "powerplantmatching" / "data" / "in"
+)
+
+
+def _existing_columns(con: sqlite3.Connection, table: str) -> list[str]:
+    return [r[1] for r in con.execute(f"PRAGMA table_info({table})")]
+
+
+def build(db_path: Path, out_path: Path, date_tag: str) -> None:
+    con = sqlite3.connect(db_path)
+    folder = f"bnetza_open_mastr_{date_tag}"
+    out_path.parent.mkdir(parents=True, exist_ok=True)
+
+    with (
+        tempfile.TemporaryDirectory() as tmp,
+        zipfile.ZipFile(
+            out_path, "w", compression=zipfile.ZIP_DEFLATED, compresslevel=6
+        ) as zf,
+    ):
+        for table, csv_name in TABLE_TO_CSV.items():
+            avail = _existing_columns(con, table)
+            if not avail:
+                print(f"  skip {table}: table missing/empty")
+                continue
+            wanted = (
+                STORAGE_UNITS_COLUMNS if table == "storage_units" else EXTENDED_COLUMNS
+            )
+            cols = [c for c in wanted if c in avail]
+            if not cols:
+                print(f"  skip {table}: none of the wanted columns present")
+                continue
+            df = pd.read_sql(f"SELECT {', '.join(cols)} FROM {table}", con)
+            csv_path = Path(tmp) / csv_name
+            df.to_csv(csv_path, index=False)
+            zf.write(csv_path, arcname=f"{folder}/{csv_name}")
+            print(f"  {table:22} -> {csv_name:38} rows={len(df):>9} cols={len(cols)}")
+    con.close()
+    print(f"\nWrote {out_path}  ({out_path.stat().st_size / 1e6:.0f} MB)")
+
+
+def main() -> None:
+    ap = argparse.ArgumentParser()
+    ap.add_argument("--db", type=Path, default=DEFAULT_DB)
+    ap.add_argument(
+        "--date", default=None, help="date tag for folder/filename, e.g. 2026-06-14"
+    )
+    ap.add_argument("--out", type=Path, default=None)
+    args = ap.parse_args()
+
+    date_tag = args.date or os.environ.get("MASTR_DATE_TAG")
+    if not date_tag:
+        raise SystemExit("pass --date YYYY-MM-DD (the bulk export date)")
+
+    out_path = args.out or (DEFAULT_OUT_DIR / f"bnetza_open_mastr_{date_tag}.zip")
+    print(f"DB:  {args.db}\nOut: {out_path}\n")
+    build(args.db, out_path, date_tag)
+
+
+if __name__ == "__main__":
+    main()