Skip to content

Commit ffe65e4

Browse files
committed
Update population data from 2022/2023 to 2024
1 parent f135ffc commit ffe65e4

6 files changed

Lines changed: 30 additions & 46 deletions

File tree

python/housing_data/build_data_utils.py

Lines changed: 3 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -290,30 +290,13 @@ def add_current_year_projections(year_to_date_df: pd.DataFrame) -> pd.DataFrame:
290290
return year_to_date_df
291291

292292

293-
def impute_2023_to_2025_population(df_2020s: pd.DataFrame) -> pd.DataFrame:
293+
def impute_2025_population(df_2020s: pd.DataFrame) -> pd.DataFrame:
294294
"""
295-
Impute 2023 to 2025 with the 2022 population.
296-
TODO: use a more recent population snapshot
295+
Impute 2025 with the 2024 population
297296
"""
298297
return pd.concat(
299298
[
300299
df_2020s,
301-
df_2020s[df_2020s["year"] == "2022"].assign(year="2023"),
302-
df_2020s[df_2020s["year"] == "2022"].assign(year="2024"),
303-
df_2020s[df_2020s["year"] == "2022"].assign(year="2025"),
304-
]
305-
)
306-
307-
308-
def impute_2024_and_2025_population(df_2020s: pd.DataFrame) -> pd.DataFrame:
309-
"""
310-
Impute 2024 and 2025 with the 2023 population
311-
TODO: use a more recent population snapshot
312-
"""
313-
return pd.concat(
314-
[
315-
df_2020s,
316-
df_2020s[df_2020s["year"] == "2023"].assign(year="2024"),
317-
df_2020s[df_2020s["year"] == "2023"].assign(year="2025"),
300+
df_2020s[df_2020s["year"] == "2024"].assign(year="2025"),
318301
]
319302
)

python/housing_data/california_hcd_data.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -155,15 +155,15 @@ def _aggregate_to_geography(
155155
def _load_fips_crosswalk(data_path: Path) -> pd.DataFrame:
156156
crosswalk_df = load_fips_crosswalk(data_path)
157157
crosswalk_df = crosswalk_df[
158-
(crosswalk_df["State Code (FIPS)"] == 6) # California rows
158+
(crosswalk_df["State FIPS Code"] == 6) # California rows
159159
& (
160-
(crosswalk_df["Place Code (FIPS)"] != 0)
161-
| (crosswalk_df["County Code (FIPS)"] != 0)
160+
(crosswalk_df["Place FIPS Code"] != 0)
161+
| (crosswalk_df["County FIPS Code"] != 0)
162162
)
163-
].rename(columns={"State Code (FIPS)": "state_code"})
163+
].rename(columns={"State FIPS Code": "state_code"})
164164

165165
crosswalk_df["name"] = (
166-
crosswalk_df["Area Name (including legal/statistical area description)"]
166+
crosswalk_df["Area Name"]
167167
.str.removesuffix(" city")
168168
.str.removesuffix(" town")
169169
.replace(
@@ -186,9 +186,9 @@ def _load_fips_crosswalk(data_path: Path) -> pd.DataFrame:
186186
)
187187

188188
crosswalk_df["place_or_county_code"] = np.where(
189-
crosswalk_df["County Code (FIPS)"] != 0,
190-
crosswalk_df["County Code (FIPS)"].astype(str) + "_county",
191-
crosswalk_df["Place Code (FIPS)"].astype(str),
189+
crosswalk_df["County FIPS Code"] != 0,
190+
crosswalk_df["County FIPS Code"].astype(str) + "_county",
191+
crosswalk_df["Place FIPS Code"].astype(str),
192192
)
193-
crosswalk_df["county_code"] = crosswalk_df["County Code (FIPS)"]
193+
crosswalk_df["county_code"] = crosswalk_df["County FIPS Code"]
194194
return crosswalk_df[["name", "place_or_county_code", "county_code", "state_code"]]

python/housing_data/county_population.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
import pandas as pd
55
import us
6-
from housing_data.build_data_utils import impute_2024_and_2025_population
6+
from housing_data.build_data_utils import impute_2025_population
77
from housing_data.data_loading_helpers import get_url_text
88
from housing_data.fips_crosswalk import load_fips_crosswalk
99

@@ -20,10 +20,10 @@ def _melt_df(df: pd.DataFrame, years: list[int]) -> pd.DataFrame:
2020

2121

2222
def get_county_populations_2020s(data_path: Path) -> pd.DataFrame:
23-
df = pd.read_csv(data_path / "co-est2023-alldata.csv", encoding="latin_1")
23+
df = pd.read_csv(data_path / "co-est2024-alldata.csv", encoding="latin_1")
2424

25-
df = _melt_df(df, list(range(2020, 2024)))
26-
return impute_2024_and_2025_population(df)
25+
df = _melt_df(df, list(range(2020, 2025)))
26+
return impute_2025_population(df)
2727

2828

2929
def get_county_populations_2010s(data_path: Path) -> pd.DataFrame:
@@ -106,12 +106,12 @@ def get_county_populations_2000s(data_path: Path, data_repo_path: Path) -> pd.Da
106106

107107
def get_county_fips_crosswalk(data_repo_path: Path) -> pd.DataFrame:
108108
df = load_fips_crosswalk(data_repo_path)
109-
df = df[df["County Code (FIPS)"] != 0]
109+
df = df[df["County FIPS Code"] != 0]
110110

111111
rename_cols = {
112-
"State Code (FIPS)": "state_code",
113-
"County Code (FIPS)": "county_code",
114-
"Area Name (including legal/statistical area description)": "county_name",
112+
"State FIPS Code": "state_code",
113+
"County FIPS Code": "county_code",
114+
"Area Name": "county_name",
115115
}
116116
df = df[rename_cols.keys()].rename(columns=rename_cols)
117117

python/housing_data/fips_crosswalk.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,5 +5,5 @@
55

66
def load_fips_crosswalk(data_repo_path: Path) -> pd.DataFrame:
77
return pd.read_excel(
8-
data_repo_path / "data/crosswalk/all-geocodes-v2021.xlsx", skiprows=4
8+
data_repo_path / "data/crosswalk/all-geocodes-v2024.xlsx", skiprows=4
99
)

python/housing_data/place_population.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
import numpy as np
66
import pandas as pd
7-
from housing_data.build_data_utils import impute_2023_to_2025_population
7+
from housing_data.build_data_utils import impute_2025_population
88
from housing_data.data_loading_helpers import get_path, get_url_text
99

1010

@@ -427,12 +427,13 @@ def get_place_populations_2010s(data_path: Optional[Path]) -> pd.DataFrame:
427427
def get_place_populations_2020s(data_path: Optional[Path]) -> pd.DataFrame:
428428
df = pd.read_csv(
429429
get_path(
430-
"https://www2.census.gov/programs-surveys/popest/datasets/2010-2020/cities/sub-est2022.csv",
430+
"https://www2.census.gov/programs-surveys/popest/datasets/2010-2020/cities/sub-est2024.csv",
431431
data_path,
432432
),
433+
encoding="latin_1",
433434
)
434-
df = _melt_df(df, years=list(range(2020, 2023)))
435-
df = impute_2023_to_2025_population(df)
435+
df = _melt_df(df, years=list(range(2020, 2025)))
436+
df = impute_2025_population(df)
436437
return df
437438

438439

python/housing_data/state_population.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
import pandas as pd
55
import us
6-
from housing_data.build_data_utils import impute_2024_and_2025_population
6+
from housing_data.build_data_utils import impute_2025_population
77
from housing_data.data_loading_helpers import get_path, get_url_text
88

99
DIVISIONS = {
@@ -217,10 +217,10 @@ def get_state_populations_2010s(data_path: Path) -> pd.DataFrame:
217217

218218

219219
def get_state_populations_2020s(data_path: Path) -> pd.DataFrame:
220-
df = pd.read_csv(data_path / "NST-EST2023-ALLDATA.csv")
220+
df = pd.read_csv(data_path / "NST-EST2024-ALLDATA.csv")
221221

222-
df = _melt_df(df, list(range(2020, 2024)))
223-
return impute_2024_and_2025_population(df)
222+
df = _melt_df(df, list(range(2020, 2025)))
223+
return impute_2025_population(df)
224224

225225

226226
def get_state_population_estimates(data_path: Path) -> pd.DataFrame:

0 commit comments

Comments
 (0)