Skip to content

Commit 3e84c39

Browse files
committed
Remove unnecessary get_path helper
1 parent ffe65e4 commit 3e84c39

3 files changed

Lines changed: 11 additions & 53 deletions

File tree

python/housing_data/data_loading_helpers.py

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -34,10 +34,3 @@ def get_url_text(
3434
else:
3535
web_url = os.path.join(web_prefix, common_path)
3636
return requests.get(web_url).text
37-
38-
39-
def get_path(url: str, data_path: Optional[Path]) -> str:
40-
if data_path is not None:
41-
return str(Path(data_path, Path(url).name))
42-
else:
43-
return url

python/housing_data/place_population.py

Lines changed: 8 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -5,15 +5,10 @@
55
import numpy as np
66
import pandas as pd
77
from housing_data.build_data_utils import impute_2025_population
8-
from housing_data.data_loading_helpers import get_path, get_url_text
98

109

1110
def _get_places_crosswalk_df(data_path: Optional[Path] = None) -> pd.DataFrame:
12-
df = pd.read_fwf(
13-
get_path(
14-
"https://www2.census.gov/geo/tiger/PREVGENZ/pl/us_places.txt", data_path
15-
)
16-
)
11+
df = pd.read_fwf(data_path / "us_places.txt")
1712

1813
df["State Code"] = df["CENSUS"] // 10000
1914
df["Place Code"] = df["CENSUS"] % 10000
@@ -147,12 +142,8 @@ def get_place_populations_1980(data_path: Optional[Path]) -> pd.DataFrame:
147142
return df
148143

149144

150-
def _load_raw_place_populations_1990s(data_path: Optional[Path]) -> pd.DataFrame:
151-
tables = get_url_text(
152-
"https://www2.census.gov/programs-surveys/popest/tables/1990-2000/"
153-
"2000-subcounties-evaluation-estimates/sc2000f_us.txt",
154-
data_path,
155-
).split("\f")
145+
def _load_raw_place_populations_1990s(data_path: Path) -> pd.DataFrame:
146+
tables = (data_path / "sc2000f_us.txt").read_text().split("\f")
156147

157148
common_cols = [
158149
"Block",
@@ -396,14 +387,8 @@ def _melt_df(
396387
)
397388

398389

399-
def get_place_populations_2000s(data_path: Optional[Path]) -> pd.DataFrame:
400-
df = pd.read_csv(
401-
get_path(
402-
"https://www2.census.gov/programs-surveys/popest/datasets/2000-2010/intercensal/cities/sub-est00int.csv",
403-
data_path,
404-
),
405-
encoding="latin_1",
406-
)
390+
def get_place_populations_2000s(data_path: Path) -> pd.DataFrame:
391+
df = pd.read_csv(data_path / "sub-est00int.csv", encoding="latin_1")
407392
return _melt_df(
408393
df,
409394
years=list(range(2000, 2011)),
@@ -412,26 +397,14 @@ def get_place_populations_2000s(data_path: Optional[Path]) -> pd.DataFrame:
412397
)
413398

414399

415-
def get_place_populations_2010s(data_path: Optional[Path]) -> pd.DataFrame:
416-
df = pd.read_csv(
417-
get_path(
418-
"https://www2.census.gov/programs-surveys/popest/datasets/2010-2020/cities/SUB-EST2020_ALL.csv",
419-
data_path,
420-
),
421-
encoding="latin_1",
422-
)
400+
def get_place_populations_2010s(data_path: Path) -> pd.DataFrame:
401+
df = pd.read_csv(data_path / "SUB-EST2020_ALL.csv", encoding="latin_1")
423402

424403
return _melt_df(df, years=list(range(2010, 2021)))
425404

426405

427406
def get_place_populations_2020s(data_path: Optional[Path]) -> pd.DataFrame:
428-
df = pd.read_csv(
429-
get_path(
430-
"https://www2.census.gov/programs-surveys/popest/datasets/2010-2020/cities/sub-est2024.csv",
431-
data_path,
432-
),
433-
encoding="latin_1",
434-
)
407+
df = pd.read_csv(data_path / "sub-est2024.csv", encoding="latin_1")
435408
df = _melt_df(df, years=list(range(2020, 2025)))
436409
df = impute_2025_population(df)
437410
return df

python/housing_data/state_population.py

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import pandas as pd
55
import us
66
from housing_data.build_data_utils import impute_2025_population
7-
from housing_data.data_loading_helpers import get_path, get_url_text
7+
from housing_data.data_loading_helpers import get_url_text
88

99
DIVISIONS = {
1010
"New England": [
@@ -168,10 +168,7 @@ def get_state_populations_1990s(data_path: Path) -> pd.DataFrame:
168168

169169
def get_state_populations_2000s(data_path: Path) -> pd.DataFrame:
170170
df = pd.read_excel(
171-
get_path(
172-
"https://www2.census.gov/programs-surveys/popest/tables/2000-2010/intercensal/state/st-est00int-01.xls",
173-
data_path,
174-
),
171+
data_path / "st-est00int-01.xls",
175172
skiprows=3,
176173
skipfooter=8,
177174
)
@@ -206,12 +203,7 @@ def _melt_df(df: pd.DataFrame, years: list[int]) -> pd.DataFrame:
206203

207204

208205
def get_state_populations_2010s(data_path: Path) -> pd.DataFrame:
209-
df = pd.read_csv(
210-
get_path(
211-
"https://www2.census.gov/programs-surveys/popest/datasets/2010-2020/state/totals/nst-est2020-alldata.csv",
212-
data_path,
213-
)
214-
)
206+
df = pd.read_csv(data_path / "nst-est2020-alldata.csv")
215207

216208
return _melt_df(df, list(range(2010, 2020)))
217209

0 commit comments

Comments
 (0)