Skip to content

Commit 8eac0ad

Browse files
committed
test: fix tests for parkserve, pwd parcels
1 parent 25f356b commit 8eac0ad

4 files changed

Lines changed: 347 additions & 108 deletions

File tree

data/src/config/config.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import logging
22
from pathlib import Path
33

4-
FORCE_RELOAD = True
4+
FORCE_RELOAD = False
55
""" During the data load, whether to query the various GIS API services for the data to load. If True, will query the
66
API services and report on data differences. If false will read the cached data."""
77

@@ -14,7 +14,7 @@
1414
CACHE_FRACTION = 0.05
1515
"""The fraction used to cache portions of the pipeline's transformed data in each step of the pipeline."""
1616

17-
log_level: int = logging.WARN
17+
log_level: int = logging.INFO
1818
""" overall log level for the project """
1919

2020
report_to_slack_channel: str = ""

data/src/data_utils/park_priority.py

Lines changed: 42 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -11,35 +11,26 @@
1111
from ..utilities import spatial_join
1212

1313

14-
@validate_output(ParkPriorityOutputValidator)
15-
def park_priority(
14+
def _park_priority_logic(
1615
input_gdf: gpd.GeoDataFrame,
1716
) -> Tuple[gpd.GeoDataFrame, ValidationResult]:
1817
"""
19-
Associates properties with park priority areas for Philadelphia using TPL's FeatureServer.
18+
Core business logic for park priority processing.
2019
21-
This function loads park priority data from TPL's ESRI FeatureServer and performs
22-
a spatial join with the input GeoDataFrame to associate properties with their
23-
park priority scores.
20+
This function contains the actual logic for:
21+
- Loading park priority data from ESRI
22+
- Renaming columns
23+
- Performing spatial joins
24+
- Returning results
25+
26+
This function can be tested independently without the validation decorator.
2427
2528
Args:
2629
input_gdf (gpd.GeoDataFrame): The input GeoDataFrame containing property data.
2730
2831
Returns:
2932
Tuple[gpd.GeoDataFrame, ValidationResult]: The input GeoDataFrame with park
3033
priority data joined and the validation result.
31-
32-
Tagline:
33-
Labels high-priority park areas.
34-
35-
Columns Added:
36-
park_priority (float): The park priority score from TPL's analysis.
37-
38-
Primary Feature Layer Columns Referenced:
39-
opa_id, geometry
40-
41-
Source:
42-
https://server7.tplgis.org/arcgis7/rest/services/ParkServe/ParkServe_ProdNew/FeatureServer/6/
4334
"""
4435
start_time = time.time()
4536
print(f"Starting park_priority function at {time.strftime('%H:%M:%S')}")
@@ -118,3 +109,36 @@ def park_priority(
118109
print(f"Function completed at {time.strftime('%H:%M:%S')}")
119110

120111
return merged_gdf, input_validation
112+
113+
114+
@validate_output(ParkPriorityOutputValidator)
115+
def park_priority(
116+
input_gdf: gpd.GeoDataFrame,
117+
) -> Tuple[gpd.GeoDataFrame, ValidationResult]:
118+
"""
119+
Associates properties with park priority areas for Philadelphia using TPL's FeatureServer.
120+
121+
This function loads park priority data from TPL's ESRI FeatureServer and performs
122+
a spatial join with the input GeoDataFrame to associate properties with their
123+
park priority scores.
124+
125+
Args:
126+
input_gdf (gpd.GeoDataFrame): The input GeoDataFrame containing property data.
127+
128+
Returns:
129+
Tuple[gpd.GeoDataFrame, ValidationResult]: The input GeoDataFrame with park
130+
priority data joined and the validation result.
131+
132+
Tagline:
133+
Labels high-priority park areas.
134+
135+
Columns Added:
136+
park_priority (float): The park priority score from TPL's analysis.
137+
138+
Primary Feature Layer Columns Referenced:
139+
opa_id, geometry
140+
141+
Source:
142+
https://server7.tplgis.org/arcgis7/rest/services/ParkServe/ParkServe_ProdNew/FeatureServer/6/
143+
"""
144+
return _park_priority_logic(input_gdf)

data/src/data_utils/pwd_parcels.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -80,8 +80,7 @@ def merge_pwd_parcels_gdf(
8080

8181
# Calculate the area of the parcel in square feet
8282
merged_gdf["parcel_area_sqft"] = merged_gdf.geometry.area
83-
# Fill NaN values (from point geometries) with "NA" string
84-
merged_gdf["parcel_area_sqft"] = merged_gdf["parcel_area_sqft"].fillna("NA")
83+
# Note: Point geometries return 0.0 from .area, not NaN, so no fillna needed
8584

8685
return merged_gdf
8786

@@ -106,8 +105,8 @@ def pwd_parcels(
106105
is_condo_unit (bool): Flag indicating if the property is a condominium unit.
107106
Condo units are identified by duplicate geometries (multiple units at same site)
108107
and retain their point geometries.
109-
parcel_area_sqft (str): The area of the parcel in square feet.
110-
Polygons will have an area value; points will have "NA".
108+
parcel_area_sqft (float): The area of the parcel in square feet.
109+
Polygons will have an area value; points will have 0.0.
111110
112111
Columns Updated:
113112
geometry: The geometry column is updated with validated geometries from PWD parcels.

0 commit comments

Comments
 (0)