Skip to content

Commit dd34d21

Browse files
committed
Improve schema
1 parent 0fe3238 commit dd34d21

File tree

2 files changed

+20
-17
lines changed

2 files changed

+20
-17
lines changed

workflow/scripts/_schemas.py

Lines changed: 18 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,28 @@
11
"""Schemas for tabular data used in the workflow."""
22

3-
from pandera.pandas import DataFrameModel, Field, check
3+
import pandera.pandas as pa
44
from pandera.typing.geopandas import GeoSeries
55
from pandera.typing.pandas import Series
6-
from shapely.geometry import Point
76

87

9-
class Shapes(DataFrameModel):
10-
class Config:
11-
coerce = True
12-
strict = False
8+
class ShapesSchema(pa.DataFrameModel):
9+
"""Schema for geographic shapes."""
1310

14-
shape_id: Series[str] = Field(unique=True)
15-
"Unique ID for this shape."
11+
shape_id: Series[str] = pa.Field(unique=True)
12+
"A unique identifier for this shape."
1613
country_id: Series[str]
17-
"ISO alpha-3 code."
18-
shape_class: Series[str] = Field(isin=["land", "maritime"])
19-
"Shape classifier"
20-
geometry: GeoSeries[Point] = Field()
21-
"Shape polygon."
14+
"Country ISO alpha-3 code."
15+
shape_class: Series[str] = pa.Field(isin=["land", "maritime"])
16+
"Identifier of the shape's context."
17+
geometry: GeoSeries
18+
"Shape (multi)polygon."
19+
parent_name: Series[str] | None
20+
"Human-readable name in the parent dataset."
2221

23-
@check("geometry", element_wise=True)
24-
def geom_not_empty(cls, geom):
22+
@pa.check("geometry", element_wise=True)
23+
def check_geometries(cls, geom):
2524
return (geom is not None) and (not geom.is_empty) and geom.is_valid
25+
26+
class Config:
27+
coerce = True
28+
strict = False

workflow/scripts/breakup_shape.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
import click
66
import geopandas as gpd
7-
from _schemas import Shapes
7+
from _schemas import ShapesSchema
88

99

1010
@click.command()
@@ -26,7 +26,7 @@ def breakup_shape(shapes_path, split_by, output_path):
2626
output_path = Path(output_path)
2727
output_path.mkdir(parents=True, exist_ok=True)
2828
shapes = gpd.read_parquet(shapes_path)
29-
shapes = Shapes.validate(shapes)
29+
shapes = ShapesSchema.validate(shapes)
3030

3131
# Print rows where geometry is empty
3232
if shapes.geometry.is_empty.any():

0 commit comments

Comments
 (0)