Skip to content

Commit bd2c193

Browse files
committed
Add back missing CA HCD data (floating point years being dropped)
1 parent 189a635 commit bd2c193

1 file changed

Lines changed: 5 additions & 1 deletion

File tree

python/housing_data/california_hcd_data.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,11 @@ def load_california_hcd_data(
8080
assert df["building_type"].isnull().sum() < 50
8181
df = df[df["building_type"].notnull()]
8282

83-
df = df.rename(columns={"YEAR": "year"}).astype({"year": str})
83+
# Drop rows where YEAR is not parseable as an int
84+
df = df.rename(columns={"YEAR": "year"})
85+
df["year"] = pd.to_numeric(df["year"], errors="coerce").replace({np.nan: None})
86+
df = df.dropna(subset=["year"])
87+
df["year"] = df["year"].astype(int).astype(str)
8488

8589
places_df = _aggregate_to_geography(df, "place", data_path)
8690
counties_df = _aggregate_to_geography(df, "county", data_path)

0 commit comments

Comments
 (0)