diff --git a/config/config.default.yaml b/config/config.default.yaml index f0b34b6a7d..a0fef79a73 100644 --- a/config/config.default.yaml +++ b/config/config.default.yaml @@ -1455,6 +1455,9 @@ data: bidding_zones_entsoepy: source: archive version: latest + lake_data: + source: primary + version: latest # docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#overpass_api overpass_api: diff --git a/config/plotting.default.yaml b/config/plotting.default.yaml index 0371103fd3..0c70c85cc5 100644 --- a/config/plotting.default.yaml +++ b/config/plotting.default.yaml @@ -618,6 +618,10 @@ plotting: river_water heat preheater: '#4bb9f2' river_water heat utilisation: '#4bb9f2' river_water heat pump: '#4bb9f2' + lake_water heat: '#8db2f7' + lake_water heat preheater: '#8db2f7' + lake_water heat utilisation: '#8db2f7' + lake_water heat pump: '#b1c8f2' sea_water heat: '#0b222e' sea_water heat pump: '#0b222e' ground heat pump: '#2fb537' diff --git a/config/test/config.myopic.yaml b/config/test/config.myopic.yaml index 74853c8d14..85b18278ad 100644 --- a/config/test/config.myopic.yaml +++ b/config/test/config.myopic.yaml @@ -41,6 +41,7 @@ sector: - air - geothermal - river_water + - lake_water - sea_water - ptes urban decentral: diff --git a/config/test/config.overnight.yaml b/config/test/config.overnight.yaml index 809eeb6355..f3e13f93a2 100644 --- a/config/test/config.overnight.yaml +++ b/config/test/config.overnight.yaml @@ -81,6 +81,7 @@ sector: - air - geothermal - river_water + - lake_water - sea_water - ptes urban decentral: diff --git a/config/test/config.perfect.yaml b/config/test/config.perfect.yaml index 09e5ef2be3..fd48d95139 100644 --- a/config/test/config.perfect.yaml +++ b/config/test/config.perfect.yaml @@ -53,6 +53,7 @@ sector: - ptes - river_water - sea_water + - lake_water - geothermal - ptes urban decentral: diff --git a/data/versions.csv b/data/versions.csv index e4ebd18957..eafd317399 100644 --- a/data/versions.csv +++ b/data/versions.csv @@ -160,3 +160,5 @@ worldbank_commodity_prices,jan-2026,primary,latest supported,2026-02-03,,https:/ worldbank_commodity_prices,jan-2026,archive,latest supported,2026-02-12,,https://data.pypsa.org/workflows/eur/worldbank_commodity_prices/jan-2026/CMO-Historical-Data-Monthly.xlsx worldbank_urban_population,unknown,primary,latest might-work,2025-12-02,"This is the original World Bank API link, which is sometimes updated; it is not guaranteed to work with the current codebase and data changes without notice.",https://api.worldbank.org/v2/en/indicator/SP.URB.TOTL.IN.ZS?downloadformat=csv worldbank_urban_population,2025-08-14,archive,latest supported,2026-01-13,,https://data.pypsa.org/workflows/eur/worldbank_urban_population/2025-08-14/API_SP.URB.TOTL.IN.ZS_DS2_en_csv_v2_22447.zip +lake_data,v10,primary,latest supported,2025-12-02,,https://data.hydrosheds.org/file/hydrolakes/HydroLAKES_polys_v10.gdb.zip +lake_data,v10,archive,latest supported,2026-01-13,,https://zenodo.org/records/18164492/files/HydroLAKES_polys_v10_europe.gdb.zip?download=1 diff --git a/doc/data_inventory.csv b/doc/data_inventory.csv index 6b2a9ce831..9352ec5ded 100644 --- a/doc/data_inventory.csv +++ b/doc/data_inventory.csv @@ -54,3 +54,4 @@ "ons_lad","UK Local Authority Districts May 2024 Boundaries","Contains shapefiles of local authorities in the United Kingdom.","UK Office for National Statistics","https://geoportal.statistics.gov.uk/datasets/ons::local-authority-districts-may-2024-boundaries-uk-bsc-2/about","Open Government Licence v.3.0" "bidding_zones_electricitymaps","Electricity Maps Bidding Zones","Geospatial data defining bidding zones for electricity markets in Europe","Electricity Maps","https://github.com/electricitymaps/electricitymaps-contrib","AGPL-3.0" "bidding_zones_entsoepy","ENTSOE-PY Bidding Zones","Geospatial data defining bidding zones for electricity markets in Europe","EnergieID","https://github.com/EnergieID/entsoe-py","MIT" +"lake_data","HydroLakes database","Shapes, volumes and further data on global lakes","Messager, M.L., Lehner, B., Grill, G., Nedeva, I., Schmitt, O. (2016)","https://www.hydrosheds.org/products/hydrolakes","CC-BY-4.0" \ No newline at end of file diff --git a/doc/release_notes.rst b/doc/release_notes.rst index 321c4bedc5..e0885edcaf 100644 --- a/doc/release_notes.rst +++ b/doc/release_notes.rst @@ -9,6 +9,11 @@ Release Notes <<<<<<< HEAD Upcoming Release ================ + +* Feat: added support for lake-water sourced heat pumps in district heating, based on data from HydroLakes and a methods from Triebs (https://github.com/PyPSA/pypsa-eur/pull/1951) + +* Refactor: Integrated excess heat from Power-to-X processes into the new heat-source structure and moved some code from `scripts/prepare_sector_network.py` to `scripts/def/heat_source.py`. Also updated PtX excess heat efficiencies (https://github.com/PyPSA/pypsa-eur/pull/1944). + * Update heat source handling in `prepare_sector_network` and introduce preheating of heat sources for more realistic system integrations (https://github.com/PyPSA/pypsa-eur/pull/1893). ======= .. Upcoming Release diff --git a/rules/build_sector.smk b/rules/build_sector.smk index 98421fe3b4..ccad6b91b0 100755 --- a/rules/build_sector.smk +++ b/rules/build_sector.smk @@ -524,6 +524,41 @@ rule build_river_heat_potential: ) +rule build_lake_heat_potential: + params: + drop_leap_day=config_provider("enable", "drop_leap_day"), + snapshots=config_provider("snapshots"), + dh_area_buffer=config_provider( + "sector", "district_heating", "dh_areas", "buffer" + ), + enable_heat_source_maps=config_provider("plotting", "enable_heat_source_maps"), + input: + unpack(input_hera_data), + lake_data=rules.retrieve_lake_data.output["lake_data"], + regions_onshore=resources("regions_onshore_base_s_{clusters}.geojson"), + dh_areas=resources("dh_areas_base_s_{clusters}.geojson"), + output: + heat_source_power=resources( + "heat_source_power_lake_water_base_s_{clusters}.csv" + ), + heat_source_temperature=resources("temp_lake_water_base_s_{clusters}.nc"), + heat_source_temperature_temporal_aggregate=resources( + "temp_lake_water_base_s_{clusters}_temporal_aggregate.nc" + ), + heat_source_energy_temporal_aggregate=resources( + "heat_source_energy_lake_water_base_s_{clusters}_temporal_aggregate.nc" + ), + resources: + mem_mb=20000, + log: + logs("build_lake_water_heat_potential_base_s_{clusters}.log"), + benchmark: + benchmarks("build_lake_water_heat_potential_base_s_{clusters}") + threads: 1 + script: + "../scripts/build_surface_water_heat_potentials/build_lake_water_heat_potential.py" + + def input_heat_source_temperature( w, replace_names: dict[str, str] = { diff --git a/rules/retrieve.smk b/rules/retrieve.smk index dbb9c8662c..2c1976854f 100755 --- a/rules/retrieve.smk +++ b/rules/retrieve.smk @@ -1689,3 +1689,24 @@ if (MOBILITY_PROFILES_DATASET := dataset_version("mobility_profiles"))["source"] run: copy2(input["kfz"], output["kfz"]) copy2(input["pkw"], output["pkw"]) + + +if (LAKE_DATA_DATASET := dataset_version("lake_data"))["source"] in [ + "primary", + "archive", +]: + + rule retrieve_lake_data: + input: + zip_file=storage(LAKE_DATA_DATASET["url"]), + output: + zip_file=f"{LAKE_DATA_DATASET['folder']}/HydroLAKES_polys_v10.gdb.zip", + lake_data=directory( + f"{LAKE_DATA_DATASET['folder']}/HydroLAKES_polys_v10.gdb/HydroLAKES_polys_v10.gdb" + ), + run: + copy2(input["zip_file"], output["zip_file"]) + unpack_archive( + output["zip_file"], + output["lake_data"], + ) diff --git a/scripts/build_surface_water_heat_potentials/approximators/lake_water_heat_approximator.py b/scripts/build_surface_water_heat_potentials/approximators/lake_water_heat_approximator.py new file mode 100644 index 0000000000..de15bd2b55 --- /dev/null +++ b/scripts/build_surface_water_heat_potentials/approximators/lake_water_heat_approximator.py @@ -0,0 +1,277 @@ +# SPDX-FileCopyrightText: Contributors to PyPSA-Eur +# +# SPDX-License-Identifier: MIT +"""Lake water heat approximator for district heating systems.""" + +from functools import cached_property + +import geopandas as gpd +import shapely +import xarray as xr +import numpy as np +import rioxarray +import logging + +from scripts.build_surface_water_heat_potentials.approximators.river_water_heat_approximator import ( + RiverWaterHeatApproximator, +) +from scripts.build_surface_water_heat_potentials.approximators.surface_water_heat_approximator import ( + SurfaceWaterHeatApproximator, +) + +logger = logging.getLogger(__name__) + +class LakeWaterHeatApproximator(SurfaceWaterHeatApproximator): + """ + Lake water heat approximator for district heating systems. + + Estimates the thermal potential of lakes as a heat source for district + heating applications. Uses lake volume data from HydroLAKES and ambient + temperature from HERA to compute available heating power. + + The water temperature approximation follows the methodology from + Triebs & Tsatsaronis 2022 (originally developed for rivers). + + Parameters + ---------- + ambient_temperature : xr.DataArray + Ambient air temperature data (HERA), used to approximate water temperature. + region : Union[shapely.geometry.polygon.Polygon, gpd.GeoSeries] + Geographic region of interest. + lake_shapes : gpd.GeoDataFrame + Lake polygons from HydroLAKES with Vol_total attribute. + delta_t_max : float, optional + Maximum temperature difference for heat extraction, by default 1 K. + min_outlet_temperature : float, optional + Minimum outlet water temperature, by default 1 °C. + + References + ---------- + .. [1] Messager et al. (2016). Estimating the volume and age of water + stored in global lakes. Nature Communications, 13603. + .. [2] Triebs & Tsatsaronis (2022). Estimating the local renewable + potentials for the transformation of district heating systems. + """ + + def __init__( + self, + ambient_temperature: xr.DataArray, + region: shapely.geometry.polygon.Polygon | gpd.GeoSeries, + lake_shapes: gpd.GeoDataFrame, + delta_t_max: float = 1, + min_outlet_temperature: float = 1, + ) -> None: + self.ambient_temperature = ambient_temperature + self.lake_shapes = self._simplify_lake_geometries(lake_shapes) + self.region = region + + water_temperature = self._approximate_lake_temperature( + ambient_temperature=ambient_temperature + ) + water_temperature = water_temperature.rio.write_crs( + f"EPSG:{ambient_temperature.rio.crs.to_epsg()}" + ) + + super().__init__( + volume_flow=self._volume_flow_in_region, + water_temperature=water_temperature, + region=region, + max_relative_volume_flow=1.0, + delta_t_max=delta_t_max, + min_outlet_temperature=min_outlet_temperature, + ) + + @property + def _scaling_factor(self) -> float: + """Scaling factor for power calculation (disabled for lakes).""" + return 1.0 + + @staticmethod + def _simplify_lake_geometries( + lake_shapes: gpd.GeoDataFrame, tolerance: float = 0.001 + ) -> gpd.GeoDataFrame: + """ + Simplify lake polygon geometries for faster spatial operations. + + Removes interior holes and simplifies exterior boundaries. + + Parameters + ---------- + lake_shapes : gpd.GeoDataFrame + Lake polygons from HydroLAKES. + tolerance : float, optional + Simplification tolerance in CRS units (~100m for EPSG:3035), + by default 0.001. + + Returns + ------- + gpd.GeoDataFrame + Lake shapes with simplified geometries. + """ + from shapely.geometry import MultiPolygon, Polygon + + def remove_holes(geom): + """Remove interior holes from polygon geometries.""" + if isinstance(geom, Polygon): + return Polygon(geom.exterior) + elif isinstance(geom, MultiPolygon): + return MultiPolygon([Polygon(p.exterior) for p in geom.geoms]) + return geom + + lake_shapes = lake_shapes.copy() + lake_shapes.geometry = lake_shapes.geometry.map(remove_holes).simplify( + tolerance, preserve_topology=True + ) + return lake_shapes + + @staticmethod + def _approximate_lake_temperature(**kwargs) -> xr.DataArray: + """ + Approximate lake water temperature from ambient air temperature. + + Uses the river temperature approximation from Triebs & Tsatsaronis 2022, + which applies a moving average and empirical formula to derive water + temperature from ambient air temperature. + + Parameters + ---------- + **kwargs + Keyword arguments passed to + ``RiverWaterHeatApproximator._approximate_river_temperature``. + + Returns + ------- + xr.DataArray + Approximated lake water temperature. + """ + return RiverWaterHeatApproximator._approximate_river_temperature(**kwargs) + + @cached_property + def _volume_flow_in_region(self) -> float: + """ + Calculate volume flow rate from total lake volume. + + Assumes annual turnover of lake volume, converted to hourly flow rate. + Vol_total in HydroLAKES is in MCM (million cubic meters = 1e6 m³). + + Returns + ------- + float + Volume flow rate in m³/h. + """ + lake_volume_mcm = self._lake_parts_in_region["Vol_total"].sum() + lake_volume_m3 = lake_volume_mcm * 1e6 + volume_flow_m3_per_h = lake_volume_m3 / 8760 + return volume_flow_m3_per_h + + @cached_property + def _lake_parts_in_region(self) -> gpd.GeoDataFrame: + """ + Get lake polygons intersected with the region. + + Returns + ------- + gpd.GeoDataFrame + Lake parts that fall within the defined region. + """ + if isinstance(self.region, gpd.GeoSeries): + region_gdf = gpd.GeoDataFrame(geometry=self.region, crs=self.region.crs) + else: + region_gdf = gpd.GeoDataFrame( + geometry=[self.region], crs=self.lake_shapes.crs + ) + return gpd.overlay(self.lake_shapes, region_gdf, how="intersection") + + def _air_temperature_in_lakes( + self, eligible_lake_parts: gpd.GeoDataFrame + ) -> xr.DataArray: + """ + Clip ambient temperature raster to lake areas. + + Falls back to nearest-neighbor sampling at the centroid if the + geometries are too small to contain any raster cell center. + + Parameters + ---------- + eligible_lake_parts : gpd.GeoDataFrame + Lake polygons to clip temperature data to. + + Returns + ------- + xr.DataArray + Ambient temperature clipped to lake areas. + """ + if eligible_lake_parts.empty: + logger.warning( + "No raster cells found within lake geometries. Returning NaN." + ) + return xr.full_like(self.ambient_temperature.sel( + x=[self.region.centroid.x[0]], y=[self.region.centroid.y[0]], method="nearest" + ), fill_value=np.nan) + else: + return self.ambient_temperature.rio.clip( + eligible_lake_parts.geometry.buffer(self._data_resolution), # Buffer to ensure we capture nearby raster cells in case there is no raster cell center within the lake polygon + eligible_lake_parts.crs, + drop=True + ) + + + + @cached_property + def _water_temperature_in_region_raster(self) -> xr.DataArray: + """ + Compute lake water temperature raster for the region. + + Returns + ------- + xr.DataArray + Water temperature raster with spatial (x, y) and time dimensions. + """ + air_temperature_in_lakes = self._air_temperature_in_lakes( + self._lake_parts_in_region + ) + return self._approximate_lake_temperature( + ambient_temperature=air_temperature_in_lakes + ) + + @cached_property + def _water_temperature_in_region(self) -> xr.DataArray: + """ + Compute spatially-averaged lake water temperature. + + Returns + ------- + xr.DataArray + Mean water temperature over time (spatial dimensions averaged). + """ + return self._water_temperature_in_region_raster.mean(dim=("x", "y")) + + @cached_property + def _power_sum_spatial(self) -> xr.DataArray: + """ + Get total power (already spatially aggregated for lakes). + + Returns + ------- + xr.DataArray + Total thermal power over time. + """ + return self._power_in_region + + def get_spatial_aggregate(self) -> xr.Dataset: + """ + Get spatially aggregated heat potential results. + + Returns + ------- + xr.Dataset + Dataset with variables: + - total_power : Total thermal power [MW] over time. + - average_temperature : Mean water temperature [°C] over time. + """ + return xr.Dataset( + data_vars={ + "total_power": self._power_sum_spatial, + "average_temperature": self._water_temperature_in_region, + } + ) diff --git a/scripts/build_surface_water_heat_potentials/approximators/river_water_heat_approximator.py b/scripts/build_surface_water_heat_potentials/approximators/river_water_heat_approximator.py index b23f291144..bc350ead92 100644 --- a/scripts/build_surface_water_heat_potentials/approximators/river_water_heat_approximator.py +++ b/scripts/build_surface_water_heat_potentials/approximators/river_water_heat_approximator.py @@ -51,8 +51,9 @@ def __init__( min_distance_meters=min_distance_meters, ) + @staticmethod def _round_coordinates( - self, da: xr.DataArray, decimal_precision: int = 4 + da: xr.DataArray, decimal_precision: int = 4 ) -> xr.DataArray: """ Round the coordinates of the HERA dataset to the defined precision. diff --git a/scripts/build_surface_water_heat_potentials/approximators/surface_water_heat_approximator.py b/scripts/build_surface_water_heat_potentials/approximators/surface_water_heat_approximator.py index 41c8f283ea..e19eaf40d9 100644 --- a/scripts/build_surface_water_heat_potentials/approximators/surface_water_heat_approximator.py +++ b/scripts/build_surface_water_heat_potentials/approximators/surface_water_heat_approximator.py @@ -186,10 +186,6 @@ def _validate_and_reproject_input(self) -> None: f"{coord} coordinate '{coord}' not found in both datasets" ) - # For region geometry, we just check the type - # if not isinstance(self.region, shapely.geometry.multipolygon.MultiPolygon): - # raise ValueError(f"region_geometry must be a shapely MultiPolygon, got {type(self.region)}") - @cached_property def _volume_flow_in_region(self) -> xr.DataArray: """ diff --git a/scripts/build_surface_water_heat_potentials/build_lake_water_heat_potential.py b/scripts/build_surface_water_heat_potentials/build_lake_water_heat_potential.py new file mode 100644 index 0000000000..c6f1f3aadb --- /dev/null +++ b/scripts/build_surface_water_heat_potentials/build_lake_water_heat_potential.py @@ -0,0 +1,446 @@ +# SPDX-FileCopyrightText: Contributors to PyPSA-Eur +# +# SPDX-License-Identifier: MIT +""" +Calculate lake water heat potential for district heating systems. + +This script computes the thermal potential of lakes as a heat source for district +heating applications. It uses HydroLAKES lake volume data and HERA ambient temperature +data to estimate available heating power and average water temperatures across regions +intersected with district heating areas. + +The approximation accounts for temporal and spatial variations in lake volume and +temperature, providing both spatial and temporal aggregates. Temporal aggregates are +only used for plotting. + +Relevant Settings +----------------- + +.. code:: yaml + + sector: + district_heating: + dh_area_buffer: # Buffer around DH areas in meters to include nearby lakes + heat_source_cooling: # Exploitable temperature delta + heat_sources: + - lake_water + snapshots: + start: + end: + enable: + drop_leap_day: + +Inputs +------ +- ``data/lake_data/HydroLAKES_polys_v10.gdb/``: Lake polygons from HydroLAKES +- ``data/hera_{year}/ambient_temp_{year}.nc``: Ambient temperature data from HERA +- ``resources//regions_onshore_base_s_{clusters}.geojson``: Onshore regions +- ``resources//dh_areas_base_s_{clusters}.geojson``: District heating areas + +Outputs +------- +- ``resources//heat_source_power_lake_water_base_s_{clusters}.csv``: Lake heating power potentials by region +- ``resources//temp_lake_water_base_s_{clusters}.nc``: Lake water temperature profiles by region +- ``resources//temp_lake_water_base_s_{clusters}_temporal_aggregate.nc``: Temporal aggregated temperature data +- ``resources//heat_source_energy_lake_water_base_s_{clusters}_temporal_aggregate.nc``: Temporal aggregated energy data +""" + +import gc +import logging +import warnings + +import dask +import geopandas as gpd +import numpy as np +import pandas as pd +import xarray as xr +from _helpers import ( + configure_logging, + get_snapshots, + set_scenario_config, + update_config_from_wildcards, +) +from approximators.lake_water_heat_approximator import LakeWaterHeatApproximator + +warnings.filterwarnings( + "ignore", + message=".*organizePolygons\\(\\) received a polygon with more than 100 parts.*", +) + + +logger = logging.getLogger(__name__) + +MEMORY_SAFETY_FACTOR = 0.7 # Use 70% of available memory for Dask arrays + + +def load_hera_data( + hera_inputs: dict, + snapshots: pd.DatetimeIndex, + minx: float, + miny: float, + maxx: float, + maxy: float, +) -> xr.DataArray: + """ + Load and concatenate HERA ambient temperature data with spatial clipping. + + Parameters + ---------- + hera_inputs : dict + Dictionary with year-specific HERA file paths. + Expected keys: hera_ambient_temperature_{year}. + snapshots : pd.DatetimeIndex + Target snapshots to select from the combined data. + minx, miny, maxx, maxy : float + Bounding box coordinates for spatial clipping (EPSG:4326). + + Returns + ------- + xr.DataArray + Ambient temperature data reprojected to EPSG:3035. + """ + temp_files = [ + v for k, v in hera_inputs.items() if k.startswith("hera_ambient_temperature_") + ] + + # Determine time range from snapshots with buffer to ensure HERA coverage + # HERA data is on 6h intervals, so we need to extend the range to capture + # HERA timestamps that bracket our actual snapshots + buffer = pd.Timedelta(hours=12) # Buffer to ensure we capture surrounding HERA data + start_time = snapshots.min() - buffer + end_time = snapshots.max() + buffer + + # Load and concatenate ambient temperature files using open_mfdataset + ambient_temperature = xr.open_mfdataset( + temp_files, + chunks={"time": -1, "lat": 990, "lon": 1510}, + concat_dim="time", + combine="nested", + )["ta6"] + + # Select time range that covers our snapshots (using native HERA resolution) + ambient_temperature = ambient_temperature.sel(time=slice(start_time, end_time)) + + # Process ambient temperature data + return ( + ambient_temperature.rename({"lat": "latitude", "lon": "longitude"}) + .rio.write_crs("EPSG:4326") + .rio.clip_box(minx, miny, maxx, maxy) + .rio.reproject("EPSG:3035") + ) + + +def _create_empty_datasets( + snapshots: pd.DatetimeIndex, center_lon: float, center_lat: float +) -> tuple[xr.Dataset, xr.Dataset]: + """ + Create empty datasets for regions without DH areas. + + When a region has no intersection with district heating areas, we still need + to provide valid datasets with zero values to maintain consistent data structure. + + Parameters + ---------- + snapshots : pd.DatetimeIndex + Time snapshots for the spatial aggregate. + center_lon : float + Longitude of region center (for fallback coordinate). + center_lat : float + Latitude of region center (for fallback coordinate). + + Returns + ------- + tuple[xr.Dataset, xr.Dataset] + Tuple of (spatial_aggregate, temporal_aggregate) datasets with zero values. + """ + spatial_aggregate = xr.Dataset( + data_vars={ + "total_power": xr.DataArray( + np.zeros(len(snapshots)), + dims=["time"], + coords={"time": snapshots}, + ), + "average_temperature": xr.DataArray( + np.zeros(len(snapshots)), + dims=["time"], + coords={"time": snapshots}, + ), + } + ) + + temporal_aggregate = xr.Dataset( + data_vars={ + "total_energy": xr.DataArray( + [[0.0]], + dims=["longitude", "latitude"], + coords={"longitude": [center_lon], "latitude": [center_lat]}, + ), + "average_temperature": xr.DataArray( + [[0.0]], + dims=["longitude", "latitude"], + coords={"longitude": [center_lon], "latitude": [center_lat]}, + ), + } + ) + + return spatial_aggregate, temporal_aggregate + + +def get_regional_result( + hera_inputs: dict, + region: gpd.GeoSeries, + dh_areas: gpd.GeoDataFrame, + lake_shapes: gpd.GeoDataFrame, + snapshots: pd.DatetimeIndex, + enable_heat_source_maps: bool = False, +) -> dict[str, xr.Dataset]: + """ + Calculate lake water heat potential for a given region. + + Parameters + ---------- + hera_inputs : dict + Dictionary containing HERA input file paths with year-specific keys. + region : gpd.GeoSeries + Geographical region for which to compute the heat potential. + dh_areas : gpd.GeoDataFrame + District heating areas to intersect with the region. + lake_shapes : gpd.GeoDataFrame + Lake polygons from HydroLAKES with Vol_total attribute. + snapshots : pd.DatetimeIndex + Time snapshots for temporal alignment. + enable_heat_source_maps : bool, optional + Whether to compute temporal aggregate for heat source maps, + by default False. + + Returns + ------- + dict[str, xr.Dataset] + Dictionary with keys: + - 'spatial aggregate': Dataset with total_power [MW] and + average_temperature [°C] time series. + - 'temporal aggregate' (optional): Dataset with spatial distribution + of total_energy [MWh] and average_temperature [°C]. + """ + original_region = region.copy() + + intersected_geometry = gpd.overlay( + region.to_frame(), + dh_areas, + how="intersection", + ).union_all() + + region.geometry = intersected_geometry + + # Return empty datasets for regions without DH area intersection + if region.geometry.is_empty.any(): + region_center = ( + original_region.to_crs("EPSG:3035").centroid.to_crs("EPSG:4326").iloc[0] + ) + spatial_aggregate, temporal_aggregate = _create_empty_datasets( + snapshots, region_center.x, region_center.y + ) + return { + "spatial aggregate": spatial_aggregate, + "temporal aggregate": temporal_aggregate, + } + + minx, miny, maxx, maxy = region.total_bounds + ambient_temperature = load_hera_data(hera_inputs, snapshots, minx, miny, maxx, maxy) + + region = region.to_crs("EPSG:3035") + lake_shapes = lake_shapes.to_crs("EPSG:3035") + + lake_water_heat_approximator = LakeWaterHeatApproximator( + ambient_temperature=ambient_temperature, + region=region, + lake_shapes=lake_shapes, + ) + + spatial_aggregate = lake_water_heat_approximator.get_spatial_aggregate() + + if enable_heat_source_maps: + temporal_aggregate = ( + lake_water_heat_approximator.get_temporal_aggregate() + .rio.reproject("EPSG:4326") + .compute() + ) + else: + temporal_aggregate = None + + spatial_aggregate = spatial_aggregate.compute() + + # Free memory for next region + del lake_water_heat_approximator, ambient_temperature + gc.collect() + + result = {"spatial aggregate": spatial_aggregate} + if temporal_aggregate is not None: + result["temporal aggregate"] = temporal_aggregate + + return result + + +def set_dask_chunk_size( + n_threads: int, + memory_mb: int, + memory_safety_factor: float = MEMORY_SAFETY_FACTOR, + n_datasets: int = 1, + operation_multiplier: int = 3, +) -> None: + """ + Configure Dask chunk size based on available memory. + + Parameters + ---------- + n_threads : int + Number of threads per worker. + memory_mb : int + Memory per worker in MB. + memory_safety_factor : float, optional + Fraction of memory to use, by default 0.7. + n_datasets : int, optional + Number of concurrent datasets in memory, by default 1. + operation_multiplier : int, optional + Multiplier for operation overhead, by default 3. + """ + chunk_size = ( + memory_mb * memory_safety_factor / n_threads / n_datasets / operation_multiplier + ) + dask.config.set({"array.chunk-size": f"{chunk_size}MB"}) + + +if __name__ == "__main__": + if "snakemake" not in globals(): + from _helpers import mock_snakemake + + snakemake = mock_snakemake( + "build_lake_water_heat_potential", + clusters="39", + opts="", + ll="vopt", + sector_opts="", + planning_horizons=2050, + ) + + # Configure logging and scenario + configure_logging(snakemake) + set_scenario_config(snakemake) + update_config_from_wildcards(snakemake.config, snakemake.wildcards) + + # Get simulation snapshots + snapshots: pd.DatetimeIndex = get_snapshots( + snakemake.params.snapshots, snakemake.params.drop_leap_day + ) + + # Load regions and district heating areas + regions_onshore = gpd.read_file(snakemake.input["regions_onshore"]) + regions_onshore.set_index("name", inplace=True) + regions_onshore = regions_onshore.to_crs("EPSG:4326") + + dh_areas = gpd.read_file(snakemake.input["dh_areas"]).to_crs("EPSG:3035") + # Buffer district heating areas by specified amount + dh_areas["geometry"] = dh_areas.geometry.buffer(snakemake.params.dh_area_buffer) + dh_areas = dh_areas.to_crs("EPSG:4326") + + lake_data = gpd.read_file(snakemake.input["lake_data"]).to_crs("EPSG:4326") + + # Configure Dask for multi-threading within operations (no distributed cluster) + dask.config.set(scheduler="threads") # Use threaded scheduler + dask.config.set(num_workers=snakemake.threads) # Use specified number of threads + + set_dask_chunk_size( + n_threads=snakemake.threads, memory_mb=snakemake.resources.mem_mb + ) + + # Process regions sequentially but with multi-threaded Dask operations + results = [] + for i, region_name in enumerate(regions_onshore.index, 1): + logger.info(f"Processing region {i}/{len(regions_onshore)}: {region_name}") + # Extract region geometry and create a copy to avoid modification conflicts + region = gpd.GeoSeries(regions_onshore.loc[region_name].copy(deep=True)) + + # Process region with multi-threaded Dask operations + result = get_regional_result( + hera_inputs=dict(snakemake.input), + region=region, + dh_areas=dh_areas, + lake_shapes=lake_data, + snapshots=snapshots, + enable_heat_source_maps=snakemake.params.enable_heat_source_maps, + ) + results.append(result) + + # Explicit cleanup to free memory between regions + del result, region + gc.collect() + + # Build DataFrame of total power for each region + # Regions as columns and time as rows + power = pd.DataFrame( + { + region_name: res["spatial aggregate"]["total_power"].to_pandas() + for region_name, res in zip(regions_onshore.index, results) + } + ).fillna(0) + + power = power.reindex( + snapshots, method="nearest" + ) # Use "nearest" method to handle any minor timestamp differences due to floating point precision + + # Save power potentials in MW + power.to_csv(snakemake.output.heat_source_power) + + # Log computed power and temperature values + power_mean = power.mean() + logger.info("=== Lake heat potential summary ===") + logger.info("Mean power per region (MW):") + for region in power_mean.index: + logger.info(f" {region}: {power_mean[region]:.2f} MW") + + # Concatenate average temperature for all regions into single dataset + temperature = ( + xr.concat( + [res["spatial aggregate"]["average_temperature"] for res in results], + dim="name", + ) + .assign_coords(name=regions_onshore.index) + ) + + # Align temperature data to snapshots, use nearest to handle any minor decimal differences + temperature = temperature.sel(time=snapshots, method="nearest").assign_coords( + time=snapshots + ) + + # Save temperature profiles as NetCDF for heat pump COP calculations + # Units: °C (degrees Celsius) + temperature.to_netcdf(snakemake.output.heat_source_temperature) + + # Log temperature values + temp_mean = temperature.mean(dim="time").to_pandas() + logger.info("Mean temperature per region (°C):") + for region in temp_mean.index: + logger.info(f" {region}: {temp_mean[region]:.2f} °C") + logger.info("=== END summary ===") + + # Save temporal aggregate results for analysis and visualization (if enabled) + if snakemake.params.enable_heat_source_maps: + # Energy temporal aggregate: spatial distribution of available energy + # Units: MWh (megawatt-hours) - total energy potential per location + xr.concat( + [res["temporal aggregate"]["total_energy"] for res in results], + dim=regions_onshore.index, + ).to_netcdf(snakemake.output.heat_source_energy_temporal_aggregate) + + # Temperature temporal aggregate: spatial distribution of temperatures + # Units: °C (degrees Celsius) - average temperature per location + xr.concat( + [res["temporal aggregate"]["average_temperature"] for res in results], + dim=regions_onshore.index, + ).to_netcdf(snakemake.output.heat_source_temperature_temporal_aggregate) + + else: + # Create empty placeholder files to satisfy Snakemake outputs + empty_ds = xr.Dataset() + empty_ds.to_netcdf(snakemake.output.heat_source_energy_temporal_aggregate) + empty_ds.to_netcdf(snakemake.output.heat_source_temperature_temporal_aggregate) diff --git a/scripts/definitions/heat_source.py b/scripts/definitions/heat_source.py index 6aba369787..ceb24fd7c4 100644 --- a/scripts/definitions/heat_source.py +++ b/scripts/definitions/heat_source.py @@ -42,7 +42,7 @@ class HeatSource(Enum): **Inexhaustible sources** (AIR, GROUND, SEA_WATER): Always available, no resource bus needed, heat pump draws from ambient. - **Limited sources requiring a bus** (GEOTHERMAL, RIVER_WATER, PTES): + **Limited sources requiring a bus** (GEOTHERMAL, RIVER_WATER, LAKE_WATER, PTES): Have spatial/temporal constraints, require resource tracking via buses. May support direct utilisation or preheating depending on temperature. @@ -58,6 +58,8 @@ class HeatSource(Enum): River water heat source with time-varying temperature. SEA_WATER : str Sea water heat source (treated as inexhaustible). + LAKE_WATER : str + Lake water heat source (treated as inexhaustible). AIR : str Ambient air heat source (inexhaustible). GROUND : str @@ -75,6 +77,7 @@ class HeatSource(Enum): GEOTHERMAL = "geothermal" RIVER_WATER = "river_water" SEA_WATER = "sea_water" + LAKE_WATER = "lake_water" AIR = "air" GROUND = "ground" PTES = "ptes" @@ -109,7 +112,11 @@ def source_type(self) -> HeatSourceType: """ if self in [HeatSource.AIR, HeatSource.GROUND, HeatSource.SEA_WATER]: return HeatSourceType.INEXHAUSTIBLE - elif self in [HeatSource.GEOTHERMAL, HeatSource.RIVER_WATER]: + elif self in [ + HeatSource.GEOTHERMAL, + HeatSource.RIVER_WATER, + HeatSource.LAKE_WATER, + ]: return HeatSourceType.SUPPLY_LIMITED elif self == HeatSource.PTES: return HeatSourceType.STORAGE @@ -130,7 +137,7 @@ def temperature_from_config(self) -> bool: True for sources with config-defined temperatures (geothermal, PTX). False for sources with file-based time-series (river_water, ptes). """ - if self == HeatSource.RIVER_WATER: + if self in [HeatSource.RIVER_WATER, HeatSource.LAKE_WATER]: return False return self.source_type in [ HeatSourceType.SUPPLY_LIMITED, diff --git a/scripts/definitions/heat_system.py b/scripts/definitions/heat_system.py index b7ad87994b..76b50285da 100644 --- a/scripts/definitions/heat_system.py +++ b/scripts/definitions/heat_system.py @@ -234,6 +234,7 @@ def heat_pump_costs_name(self, heat_source: HeatSource | str) -> str: HeatSource.GEOTHERMAL, HeatSource.SEA_WATER, HeatSource.RIVER_WATER, + HeatSource.LAKE_WATER, HeatSource.ELECTROLYSIS_waste, HeatSource.FISCHER_TROPSCH_waste, HeatSource.SABATIER_waste, diff --git a/scripts/lib/validation/config/data.py b/scripts/lib/validation/config/data.py index ae56656912..e63c21c905 100644 --- a/scripts/lib/validation/config/data.py +++ b/scripts/lib/validation/config/data.py @@ -267,3 +267,7 @@ class DataConfig(BaseModel): default_factory=_DataSourceConfig, description="Entsoepy bidding zones data source configuration.", ) + lake_data: _DataSourceConfig = Field( + default_factory=_DataSourceConfig, + description="Lake data source configuration.", + ) diff --git a/scripts/lib/validation/config/sector.py b/scripts/lib/validation/config/sector.py index d9b8b6a96d..1ca6f49d04 100644 --- a/scripts/lib/validation/config/sector.py +++ b/scripts/lib/validation/config/sector.py @@ -185,6 +185,10 @@ class _DistrictHeatingConfig(ConfigModel): default_factory=lambda: {"buffer": 1000, "handle_missing_countries": "fill"}, description="District heating areas settings.", ) + fallback_ptx_heat_losses: float = Field( + 0.05, + description="Default heat loss fraction for PtX processes when waste heat recovery is enabled but no specific loss value is provided.", + ) heat_source_temperatures: dict[str, float] = Field( default_factory=lambda: { "geothermal": 65,