forked from emmericp/marktstammdatenplotter
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathparser.py
More file actions
237 lines (225 loc) · 8.65 KB
/
parser.py
File metadata and controls
237 lines (225 loc) · 8.65 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
import os
import json
import re
from glob import glob
from datetime import date, datetime, UTC
from dataclasses import dataclass
import numpy as np
import pandas as pd
def parse_dotnet_date(date_str):
if not date_str:
return None
match = re.match(r"/Date\((\d+)\)/", date_str)
if match:
timestamp_ms = int(match.group(1))
return datetime.fromtimestamp(timestamp_ms / 1000, UTC)
@dataclass
class PowerPlant:
id: int
num_panels: int
power: float
inverter: float
install_date: date
removal_date: date
postal_code: str
is_private: bool
facing: int|str
tilt: tuple[int, int]|str
installation_type: str
building_type: str
owner_name: str
energy_type: str
longitude: float
latitude: float
off_shore: str|None
@classmethod
def from_json(cls, entry: dict):
inverter_power = entry["Nettonennleistung"]
match entry["Leistungsbegrenzung"]:
case 805:
inverter_power *= 0.5
case 804:
inverter_power *= 0.6
case 803:
inverter_power *= 0.7
case 802: # "No"
pass
case 1535: # "Sonstige"
pass # Would be great if this wasn't an enum, right?
facing = entry["HauptausrichtungSolarModule"]
match facing:
case 703: # nachgeführt (tracked)
facing = "tracked"
case 695: # Nord
facing = 0
case 696: # Nord-Ost
facing = 45
case 702: # Nord-West
facing = 315
case 697: # Ost
facing = 90
case 704: # Ost-West (East-West) - This implies a range or general alignment.
facing = "east-west"
case 699: # Süd
facing = 180
case 698: # Süd-Ost
facing = 135
case 700: # Süd-West
facing = 225
case 701: # West
facing = 270
case _:
facing = None
tilt = entry["HauptneigungswinkelSolarmodule"]
match tilt:
case 810: # < 20 Grad
tilt = (0, 19) # Represents a range from 0 to less than 20 degrees
case 807: # > 60 Grad
tilt = (61, 90) # Represents a range from more than 60 to 90 degrees (vertical)
case 809: # 20 - 40 Grad
tilt = (20, 40)
case 808: # 40 - 60 Grad
tilt = (40, 60)
case 806: # Fassadenintegriert (Facade-integrated)
tilt = 90 # This is a descriptive state, not a specific angle
case 811: # Nachgeführt (Tracked)
tilt = "tracked" # This implies dynamic adjustment, not a fixed angle
case _:
tilt = None # Handle unknown or unmapped values
installation_type = entry["ArtDerSolaranlageId"]
match installation_type:
case 853:
installation_type = "building"
case 2484:
installation_type = "building_other"
case 852:
installation_type = "free"
case 3002:
installation_type = "water"
case 3058:
installation_type = "parking_lot"
case 2961:
installation_type = "balkonkraftwerk"
case _:
installation_type = None
power = entry["Bruttoleistung"]
panels = entry["AnzahlSolarModule"]
if panels and power / panels <= 0.1:
panels = None
building_type = entry["NutzungsbereichGebSA"]
match building_type:
case 714:
building_type = "commercial"
case 713:
building_type = "household"
case 715:
building_type = "industry"
case 716:
building_type = "farming"
case 717:
building_type = "public"
case 718:
building_type = "other"
case _:
building_type = None # Handle unknown or unmapped values
off_shore = None
if entry["WindAnLandOderSeeId"] == 889:
if entry["StandortAnonymisiert"].startswith("Ostsee"):
off_shore = "Ostsee"
elif entry["StandortAnonymisiert"].startswith("Nordsee"):
off_shore = "Nordsee"
return cls(
id=entry["Id"],
num_panels=panels,
power=power,
inverter=inverter_power,
install_date=parse_dotnet_date(entry["InbetriebnahmeDatum"]),
removal_date=parse_dotnet_date(entry["EndgueltigeStilllegungDatum"]),
postal_code=entry["Plz"],
is_private=entry["AnlagenbetreiberPersonenArt"] == 518,
facing=facing,
tilt=tilt,
installation_type=installation_type,
building_type=building_type,
owner_name=entry["AnlagenbetreiberName"],
energy_type=entry["EnergietraegerName"],
longitude=entry["Laengengrad"],
latitude=entry["Breitengrad"],
off_shore=off_shore,
)
def load_data(data_dir, max_files=1000000):
merged_data = []
json_files = sorted(glob(os.path.join(data_dir, "*.json")))[:max_files]
for file_path in json_files:
with open(file_path, "r", encoding="utf-8") as f:
content = json.load(f)
for entry in content["Data"]:
bkw = PowerPlant.from_json(entry)
if bkw:
merged_data.append(bkw)
print(f"Entries loaded from files: {len(merged_data)}")
return merged_data
# ---------------------------------------------------------------------------
# Battery / energy-storage units (Energieträger 2496)
# ---------------------------------------------------------------------------
@dataclass
class BatteryUnit:
id: int
name: str
power_kw: float # Bruttoleistung
energy_kwh: float # NutzbareSpeicherkapazitaet
install_date: date | None
planned_date: date | None
removal_date: date | None
storage_tech: str | None # Batterie / Pumpspeicher / Wasserstoffspeicher / ...
battery_tech_code: int | None # raw Batterietechnologie code (lookup TBD)
feed_in_mode: str | None # Volleinspeisung / Überschusseinspeisung / ...
status: str | None # In Betrieb / In Planung / ...
voltage_level: str | None # Niederspannung / Mittelspannung / ... / Höchstspannung
longitude: float | None
latitude: float | None
postal_code: str
municipality: str | None
landkreis: str | None
bundesland: str | None
owner_name: str
is_private: bool
@classmethod
def from_json(cls, entry: dict):
if entry.get("EnergietraegerId") != 2496:
return None
return cls(
id=entry["Id"],
name=entry.get("EinheitName") or "",
power_kw=entry.get("Bruttoleistung") or 0.0,
energy_kwh=entry.get("NutzbareSpeicherkapazitaet") or 0.0,
install_date=parse_dotnet_date(entry.get("InbetriebnahmeDatum")),
planned_date=parse_dotnet_date(entry.get("GeplantesInbetriebsnahmeDatum")),
removal_date=parse_dotnet_date(entry.get("EndgueltigeStilllegungDatum")),
storage_tech=entry.get("StromspeichertechnologieBezeichnung"),
battery_tech_code=entry.get("Batterietechnologie"),
feed_in_mode=entry.get("VollTeilEinspeisungBezeichnung"),
status=entry.get("BetriebsStatusName"),
voltage_level=entry.get("SpannungsebenenNamen"),
longitude=entry.get("Laengengrad"),
latitude=entry.get("Breitengrad"),
postal_code=entry.get("Plz") or "",
municipality=entry.get("Gemeinde"),
landkreis=entry.get("Landkreis"),
bundesland=entry.get("Bundesland"),
owner_name=entry.get("AnlagenbetreiberName") or "",
is_private=entry.get("AnlagenbetreiberPersonenArt") == 518,
)
def load_bess(data_dir, max_files=1000000):
"""Load BatteryUnit records from a directory of MaStR JSON scrapes."""
merged = []
json_files = sorted(glob(os.path.join(data_dir, "*.json")))[:max_files]
for file_path in json_files:
with open(file_path, "r", encoding="utf-8") as f:
content = json.load(f)
for entry in content.get("Data", []):
unit = BatteryUnit.from_json(entry)
if unit:
merged.append(unit)
print(f"BESS entries loaded from files: {len(merged)}")
return merged