Skip to content

Commit e5e0e04

Browse files
authored
Map data workflow improvements (#61)
Updated `build-map-data` workflow to be more resilient. * Broke up workflow into multiple jobs * Allows appropriate notifications to be sent without adding conditional checks to every step. * Workflow no longer fails if "404 not found" is encountered * This is not considered a failure since it just means the map data files have not been updated yet. * Improved `shapefiles.py` to result in fewer failures
2 parents 11fe8cb + 594e07f commit e5e0e04

4 files changed

Lines changed: 112 additions & 131 deletions

File tree

.github/workflows/build-map-data.yaml

Lines changed: 76 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,13 @@ on:
1414
default: ''
1515

1616
jobs:
17-
build:
17+
download:
1818
runs-on: ubuntu-latest
19-
env:
20-
PUSHOVER_API_KEY: ${{ secrets.PUSHOVER_API_KEY }}
21-
PUSHOVER_USER_KEY: ${{ secrets.PUSHOVER_USER_KEY }}
19+
outputs:
20+
exit_code: ${{ steps.dl-shp.outputs.exit_code }}
21+
shp_year: ${{ steps.info.outputs.shp_year }}
22+
state_shp: ${{ steps.info.outputs.state_shp }}
23+
county_shp: ${{ steps.info.outputs.county_shp }}
2224

2325
steps:
2426
- name: Checkout
@@ -38,6 +40,38 @@ jobs:
3840
run: |
3941
python data-raw/scripts/shapefiles.py ${{ inputs.year }}
4042
43+
- name: Save shapefile info
44+
id: info
45+
run: |
46+
echo "shp_year=${{ env.shp_year }}" >> "$GITHUB_OUTPUT"
47+
echo "state_shp=${{ env.state_shp }}" >> "$GITHUB_OUTPUT"
48+
echo "county_shp=${{ env.county_shp }}" >> "$GITHUB_OUTPUT"
49+
50+
- name: Upload shapefiles
51+
if: steps.dl-shp.outputs.exit_code == '0'
52+
uses: actions/upload-artifact@v4
53+
with:
54+
name: shapefiles
55+
path: data-raw/shapefiles/${{ env.shp_year }}
56+
57+
process:
58+
runs-on: ubuntu-latest
59+
needs: download
60+
if: needs.download.outputs.exit_code == '0'
61+
outputs:
62+
pr_url: ${{ steps.info.outputs.pr_url }}
63+
pr_number: ${{ steps.info.outputs.pr_number }}
64+
65+
steps:
66+
- name: Checkout
67+
uses: actions/checkout@v4
68+
69+
- name: Download shapefiles
70+
uses: actions/download-artifact@v4
71+
with:
72+
name: shapefiles
73+
path: data-raw/shapefiles/${{ needs.download.outputs.shp_year }}
74+
4175
- name: Setup R
4276
uses: r-lib/actions/setup-r@v2
4377

@@ -48,9 +82,9 @@ jobs:
4882

4983
- name: Modify shapefiles
5084
env:
51-
STATE_SHP: ${{ env.state_shp }}
52-
COUNTY_SHP: ${{ env.county_shp }}
53-
YEAR: ${{ env.shp_year }}
85+
STATE_SHP: ${{ needs.download.outputs.state_shp }}
86+
COUNTY_SHP: ${{ needs.download.outputs.county_shp }}
87+
YEAR: ${{ needs.download.outputs.shp_year }}
5488
run: |
5589
input_dir <- file.path("data-raw", "shapefiles", Sys.getenv("YEAR"))
5690
output_dir <- file.path("inst", "extdata", Sys.getenv("YEAR"))
@@ -80,7 +114,7 @@ jobs:
80114
- name: Determine pull request parameters
81115
id: pr-params
82116
env:
83-
YEAR: ${{ env.shp_year }}
117+
YEAR: ${{ needs.download.outputs.shp_year }}
84118
run: |
85119
echo "branch_name=data-update/$YEAR" >> "$GITHUB_OUTPUT"
86120
echo "pr_title=Add $YEAR map data" >> "$GITHUB_OUTPUT"
@@ -100,7 +134,7 @@ jobs:
100134
token: ${{ secrets.BOT_PAT }}
101135
author: ${{ secrets.BOT_USER }}
102136
committer: ${{ secrets.BOT_USER }}
103-
commit-message: "[automated] Add ${{ env.shp_year }} map data based on available shapefiles"
137+
commit-message: "[automated] Add ${{ needs.download.outputs.shp_year }} map data based on available shapefiles"
104138
branch: ${{ steps.pr-params.outputs.branch_name }}
105139
title: ${{ steps.pr-params.outputs.pr_title }}
106140
body: ${{ steps.pr-body.outputs.result }}
@@ -109,17 +143,45 @@ jobs:
109143
labels: data update
110144
delete-branch: true
111145

146+
- name: Save PR info
147+
id: info
148+
run: |
149+
echo "pr_url=${{ steps.open-pr.outputs.pull-request-url }}" >> "$GITHUB_OUTPUT"
150+
echo "pr_number=${{ steps.open-pr.outputs.pull-request-number }}" >> "$GITHUB_OUTPUT"
151+
152+
notify:
153+
runs-on: ubuntu-latest
154+
needs: [download, process]
155+
if: always()
156+
env:
157+
PUSHOVER_API_KEY: ${{ secrets.PUSHOVER_API_KEY }}
158+
PUSHOVER_USER_KEY: ${{ secrets.PUSHOVER_USER_KEY }}
159+
160+
steps:
161+
- name: Checkout
162+
uses: actions/checkout@v4
163+
164+
- name: Setup Python
165+
uses: actions/setup-python@v5
166+
with:
167+
python-version: '3.9'
168+
cache: 'pip'
169+
170+
- name: Install Python dependencies
171+
run: pip install -r data-raw/scripts/requirements.txt
172+
112173
- name: Send success notification
174+
if: needs.download.outputs.exit_code == '0' && needs.process.result == 'success'
113175
run: |
114-
python data-raw/scripts/pushover.py "✅ usmapdata has updated its data files, a PR review is needed: <a href=\"${{ steps.open-pr.outputs.pull-request-url }}\">PR #${{ steps.open-pr.outputs.pull-request-number }}</a>"
176+
python data-raw/scripts/pushover.py "✅ usmapdata has updated its data files, a PR review is needed: <a href=\"${{ needs.process.outputs.pr_url }}\">PR #${{ needs.process.outputs.pr_number }}</a>"
115177
116178
- name: Send data not found notification
117-
if: ${{ failure() && steps.dl-shp.outputs.exit_code == '404' }}
179+
if: needs.download.outputs.exit_code == '404'
118180
run: |
119-
python data-raw/scripts/pushover.py "⚠️ usmapdata failed to find map data files for ${{ env.shp_year }}." "LOW"
181+
python data-raw/scripts/pushover.py "⚠️ usmapdata failed to find map data files for ${{ needs.download.outputs.shp_year }}." "LOW"
120182
121183
- name: Send failure notification
122-
if: ${{ failure() && steps.dl-shp.outputs.exit_code != '404' }}
184+
if: needs.download.outputs.exit_code != '0' && needs.download.outputs.exit_code != '404'
123185
run: |
124-
python data-raw/scripts/pushover.py "❌ usmapdata failed to update map data files. (error: ${{ steps.dl-shp.outputs.exit_code }})" "LOW"
186+
python data-raw/scripts/pushover.py "❌ usmapdata failed to update map data files. (error: ${{ needs.download.outputs.exit_code }})" "LOW"
125187

data-raw/certs/www2-census-gov-chain.pem

Lines changed: 0 additions & 92 deletions
This file was deleted.

data-raw/scripts/config.ini

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
[shapefiles]
22
url = https://www2.census.gov/geo/tiger/GENZ{year}/shp/cb_{year}_us_{entity}_{res}.zip
3-
cert = www2-census-gov-chain.pem
43
current_year = 2024
54
entities = state,county
65
res = 20m

data-raw/scripts/shapefiles.py

Lines changed: 36 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -4,36 +4,48 @@
44
import requests
55
import shutil
66
import sys
7+
import tempfile
78
from zipfile import ZipFile
89

910
class DownloadError(Exception):
10-
def __init__(self, message, code=None):
11+
def __init__(self, message, code):
1112
super().__init__(message)
1213
self.code = code
1314

14-
def _download_and_extract(file_url: str, extract_dir: str, cert_url: str) -> bool:
15-
response = requests.get(file_url, verify = cert_url)
16-
LOCAL_FILE = "download.zip"
15+
def _download_and_extract(file_url: str, extract_dir: str):
16+
response = requests.get(file_url, timeout=300)
1717

18-
if response.status_code == 200:
19-
with open(LOCAL_FILE, "wb") as f:
20-
f.write(response.content)
21-
print(f"{LOCAL_FILE} downloaded from {file_url}.")
18+
if response.status_code != 200:
19+
raise DownloadError(f"Failed to download {file_url}.", code=response.status_code)
20+
21+
with tempfile.NamedTemporaryFile(suffix='.zip', delete=False) as tmp_file:
22+
tmp_filename = tmp_file.name
23+
tmp_file.write(response.content)
24+
print(f"Files downloaded from {file_url} to {tmp_filename}.")
2225

23-
with ZipFile(LOCAL_FILE, "r") as z:
26+
try:
27+
with ZipFile(tmp_filename, "r") as z:
2428
z.extractall(extract_dir)
25-
print(f"{LOCAL_FILE} extracted to {extract_dir}.")
29+
print(f"{tmp_filename} extracted to {extract_dir}.")
30+
finally:
31+
os.remove(tmp_filename)
2632

27-
os.remove(LOCAL_FILE)
28-
else:
29-
raise DownloadError(f"Failed to download {file_url}.", code=response.status_code)
33+
def _exit(sys_code: int, gh_code: int=None):
34+
"""
35+
Exits with the given code(s).
36+
37+
Parameters:
38+
sys_code: The exit code to call sys.exit() with.
39+
gh_code (optional): The code to set in the GitHub output.
40+
If None, uses sys_code.
41+
"""
42+
gh_code = sys_code if gh_code is None else gh_code
3043

31-
def _failed(code: int):
3244
if (gh_env := os.getenv("GITHUB_OUTPUT")):
3345
with open(gh_env, "a") as f:
34-
f.write(f"exit_code={code}\n")
46+
f.write(f"exit_code={gh_code}\n")
3547

36-
sys.exit(code)
48+
sys.exit(sys_code)
3749

3850
def download_shapefiles(selected_year=None):
3951
"""
@@ -54,7 +66,6 @@ def download_shapefiles(selected_year=None):
5466
SECTION = "shapefiles"
5567

5668
url_template = config.get(SECTION, "url")
57-
cert_file = config.get(SECTION, "cert")
5869
current_year = config.getint(SECTION, "current_year")
5970
entities = config.get(SECTION, "entities").split(",")
6071
res = config.get(SECTION, "res")
@@ -68,21 +79,18 @@ def download_shapefiles(selected_year=None):
6879
with open(gh_env, "a") as f:
6980
f.write(f"shp_year={year}\n")
7081

71-
# create cert file URL
72-
cert_url = os.path.join(script_dir, "..", "certs", cert_file)
73-
7482
# create output directory
7583
extract_dir = os.path.join(script_dir, "..", "shapefiles", str(year))
7684

7785
if os.path.exists(extract_dir):
7886
shutil.rmtree(extract_dir)
79-
shutil.os.makedirs(extract_dir)
87+
os.makedirs(extract_dir)
8088

8189
try:
8290
# attempt shapefile downloads
8391
for entity in entities:
8492
url = url_template.format(year=year, entity=entity, res=res)
85-
_download_and_extract(url, extract_dir, cert_url)
93+
_download_and_extract(url, extract_dir)
8694

8795
if (gh_env := os.getenv("GITHUB_ENV")):
8896
with open(gh_env, "a") as f:
@@ -93,16 +101,20 @@ def download_shapefiles(selected_year=None):
93101
config.set(SECTION, "current_year", f"{year}")
94102
with open(config_file, "w") as f:
95103
config.write(f)
104+
105+
_exit(0)
96106
except DownloadError as e:
97107
if e.code == 404: # i.e. shapefiles not found
98108
print(f"The shapefiles for {year} were not found. Better luck next time!")
109+
# "files not found" is not considered a system failure
110+
_exit(sys_code=0, gh_code=404)
99111
else: # other download errors
100112
print(e)
113+
_exit(e.code)
101114

102-
_failed(e.code)
103115
except Exception as e:
104116
print(e)
105-
_failed(-1)
117+
_exit(-1)
106118

107119

108120
if __name__ == "__main__":

0 commit comments

Comments
 (0)