Skip to content

Commit 1a66674

Browse files
Move to pandera io (#11)
* Move to pandera io * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
1 parent 81981d9 commit 1a66674

12 files changed

Lines changed: 599 additions & 101 deletions

pixi.lock

Lines changed: 349 additions & 31 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pixi.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,13 +23,14 @@ ipykernel = ">=6.29.5,<7"
2323
pyarrow = ">=19.0.1,<20"
2424
pycountry = ">=24.6.1,<25"
2525
numpy = ">=2.2.4,<3"
26-
pandera-geopandas = ">=0.23.1,<0.24"
26+
pandera-geopandas = ">=0.24.0,<0.25"
2727
contextily = ">=1.6.2,<2"
2828
ipdb = ">=0.13.13"
2929
atlite = ">=0.4.0,<0.5"
3030
geofileops = ">=0.9.2,<0.10"
3131
xarray = ">=2025.3.0,<2026"
3232
scipy = ">=1.15.2,<2"
33+
pandera-io = ">=0.24.0,<0.25"
3334

3435
[pypi-dependencies]
3536
mkdocs-mermaid2-plugin = ">=1.2.1"

workflow/envs/default.yaml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ dependencies:
88
- ipdb = 0.13.13
99
- numpy = 2.2.4
1010
- pandas = 2.2.3
11-
- pandera-geopandas = 0.23.1
11+
- pandera-geopandas = 0.24
12+
- pandera-io = 0.24
1213
- pyarrow = 19.0.1
1314
- scipy = 1.15.2
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
schema_type: dataframe
2+
version: 0.24.0
3+
columns:
4+
country_id:
5+
title: null
6+
description: null
7+
dtype: str
8+
nullable: false
9+
checks: null
10+
unique: false
11+
coerce: false
12+
required: true
13+
regex: false
14+
year:
15+
title: null
16+
description: null
17+
dtype: int64
18+
nullable: false
19+
checks: null
20+
unique: false
21+
coerce: false
22+
required: true
23+
regex: false
24+
generation_mwh:
25+
title: null
26+
description: null
27+
dtype: float64
28+
nullable: false
29+
checks:
30+
- value: 0
31+
options:
32+
check_name: greater_than_or_equal_to
33+
raise_warning: false
34+
ignore_na: true
35+
unique: false
36+
coerce: false
37+
required: true
38+
regex: false
39+
checks: null
40+
index:
41+
- title: null
42+
description: null
43+
dtype: int64
44+
nullable: false
45+
checks: null
46+
name: null
47+
unique: true
48+
coerce: false
49+
dtype: null
50+
coerce: true
51+
strict: false
52+
name: NationalGenerationSchema
53+
ordered: false
54+
unique: null
55+
report_duplicates: all
56+
unique_column_names: false
57+
add_missing_columns: false
58+
title: null
59+
description: null
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
schema_type: dataframe
2+
version: 0.24.0
3+
columns:
4+
powerplant_id:
5+
title: null
6+
description: null
7+
dtype: str
8+
nullable: false
9+
checks: null
10+
unique: true
11+
coerce: false
12+
required: true
13+
regex: false
14+
net_generation_capacity_mw:
15+
title: null
16+
description: null
17+
dtype: float64
18+
nullable: false
19+
checks:
20+
- value: 0
21+
options:
22+
check_name: greater_than_or_equal_to
23+
raise_warning: false
24+
ignore_na: true
25+
unique: false
26+
coerce: false
27+
required: true
28+
regex: false
29+
storage_capacity_mwh:
30+
title: null
31+
description: null
32+
dtype: float64
33+
nullable: true
34+
checks: null
35+
unique: false
36+
coerce: false
37+
required: true
38+
regex: false
39+
powerplant_type:
40+
title: null
41+
description: null
42+
dtype: str
43+
nullable: false
44+
checks:
45+
- value:
46+
- hydro_run_of_river
47+
- hydro_dam
48+
options:
49+
check_name: isin
50+
raise_warning: false
51+
ignore_na: true
52+
unique: false
53+
coerce: false
54+
required: true
55+
regex: false
56+
geometry:
57+
title: null
58+
description: null
59+
dtype: geometry
60+
nullable: false
61+
checks: null
62+
unique: false
63+
coerce: false
64+
required: true
65+
regex: false
66+
checks: null
67+
index:
68+
- title: null
69+
description: null
70+
dtype: int64
71+
nullable: false
72+
checks: null
73+
name: null
74+
unique: true
75+
coerce: false
76+
dtype: null
77+
coerce: true
78+
strict: false
79+
name: PowerplantSchema
80+
ordered: false
81+
unique: null
82+
report_duplicates: all
83+
unique_column_names: false
84+
add_missing_columns: false
85+
title: null
86+
description: null
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
schema_type: dataframe
2+
version: 0.24.0
3+
columns:
4+
shape_id:
5+
title: null
6+
description: null
7+
dtype: str
8+
nullable: false
9+
checks: null
10+
unique: true
11+
coerce: false
12+
required: true
13+
regex: false
14+
country_id:
15+
title: null
16+
description: null
17+
dtype: str
18+
nullable: false
19+
checks: null
20+
unique: false
21+
coerce: false
22+
required: true
23+
regex: false
24+
shape_class:
25+
title: null
26+
description: null
27+
dtype: str
28+
nullable: false
29+
checks:
30+
- value:
31+
- land
32+
options:
33+
check_name: isin
34+
raise_warning: false
35+
ignore_na: true
36+
unique: false
37+
coerce: false
38+
required: true
39+
regex: false
40+
geometry:
41+
title: null
42+
description: null
43+
dtype: geometry
44+
nullable: false
45+
checks: null
46+
unique: false
47+
coerce: false
48+
required: true
49+
regex: false
50+
checks: null
51+
index:
52+
- title: null
53+
description: null
54+
dtype: int64
55+
nullable: false
56+
checks: null
57+
name: null
58+
unique: true
59+
coerce: false
60+
dtype: null
61+
coerce: true
62+
strict: false
63+
name: ShapeSchema
64+
ordered: false
65+
unique: null
66+
report_duplicates: all
67+
unique_column_names: false
68+
add_missing_columns: false
69+
title: null
70+
description: null

workflow/rules/powerplants.smk

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,9 @@ rule powerplants_adjust_location:
1212
f"resources/automatic/hydrobasin_global_{config["pfafstetter_level"]}.parquet"
1313
),
1414
powerplants="resources/user/powerplants.parquet",
15+
powerplant_schema=workflow.source_path("../internal/powerplant.schema.yaml"),
1516
shapes="resources/user/shapes.parquet",
17+
shape_schema=workflow.source_path("../internal/shape.schema.yaml"),
1618
output:
1719
adjusted_powerplants="results/adjusted_powerplants.parquet",
1820
plot=report(
@@ -34,11 +36,13 @@ rule powerplants_get_inflow_m3:
3436
params:
3537
smoothing_hours=config["smoothing_hours"],
3638
input:
39+
adjusted_powerplants="results/adjusted_powerplants.parquet",
3740
basins=ancient(
3841
f"resources/automatic/hydrobasin_global_{config["pfafstetter_level"]}.parquet"
3942
),
43+
powerplant_schema=workflow.source_path("../internal/powerplant.schema.yaml"),
4044
shapes="resources/user/shapes.parquet",
41-
adjusted_powerplants="results/adjusted_powerplants.parquet",
45+
shape_schema=workflow.source_path("../internal/shape.schema.yaml"),
4246
cutout=ancient("resources/automatic/cutout.nc"),
4347
output:
4448
inflow="results/by_powerplant_id/inflow_m3.parquet",
@@ -58,7 +62,11 @@ rule powerplants_get_inflow_mwh:
5862
input:
5963
inflow_m3="results/by_powerplant_id/inflow_m3.parquet",
6064
adjusted_powerplants="results/adjusted_powerplants.parquet",
61-
generation="resources/user/national_generation.parquet",
65+
powerplant_schema=workflow.source_path("../internal/powerplant.schema.yaml"),
66+
national_generation="resources/user/national_generation.parquet",
67+
national_generation_schema=workflow.source_path(
68+
"../internal/national_generation.schema.yaml"
69+
),
6270
output:
6371
inflow_mwh="results/by_powerplant_id/inflow_mwh.parquet",
6472
log:
@@ -75,6 +83,7 @@ rule powerplants_get_cf_per_shape:
7583
input:
7684
adjusted_powerplants="results/adjusted_powerplants.parquet",
7785
inflow_mwh="results/by_powerplant_id/inflow_mwh.parquet",
86+
schema=workflow.source_path("../internal/powerplant.schema.yaml"),
7887
output:
7988
timeseries="results/by_shape_id/{plant_type}_cf.parquet",
8089
figure=report(

workflow/scripts/_schema.py

Lines changed: 0 additions & 53 deletions
This file was deleted.

workflow/scripts/powerplants_adjust_location.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,14 @@
66

77
import geopandas as gpd
88
import matplotlib.pyplot as plt
9-
from _schema import PowerplantSchema, ShapeSchema
9+
import pandera.io as io
1010
from pyproj import CRS
1111

1212
if TYPE_CHECKING:
1313
snakemake: Any
1414
sys.stderr = open(snakemake.log[0], "w")
15+
POWERPLANT_SCHEMA = io.from_yaml(snakemake.input.powerplant_schema)
16+
SHAPE_SCHEMA = io.from_yaml(snakemake.input.shape_schema)
1517

1618

1719
def _plot_adjustment(
@@ -64,9 +66,9 @@ def powerplants_adjust_location(
6466
# Read and validate input files
6567
basins = gpd.read_parquet(basins_path)
6668
powerplants = gpd.read_parquet(powerplants_path)
67-
PowerplantSchema.validate(powerplants)
69+
POWERPLANT_SCHEMA.validate(powerplants)
6870
shapes = gpd.read_parquet(shapes_path)
69-
ShapeSchema.validate(shapes)
71+
SHAPE_SCHEMA.validate(shapes)
7072

7173
# Coordinate-based operations must use a geographic CRS
7274
basins = basins.to_crs(crs["geographic"])
@@ -126,7 +128,7 @@ def powerplants_adjust_location(
126128

127129
# Re-validate and save
128130
powerplants = powerplants.to_crs(crs["geographic"])
129-
PowerplantSchema.validate(powerplants)
131+
POWERPLANT_SCHEMA.validate(powerplants)
130132
powerplants.to_parquet(adjusted_powerplants_path)
131133

132134

0 commit comments

Comments
 (0)