Skip to content

Commit 32d2b91

Browse files
authored
feat: web page revalidation (#1653)
1 parent d64cafd commit 32d2b91

File tree

34 files changed

+724
-128977
lines changed

34 files changed

+724
-128977
lines changed

.github/workflows/api-deployer.yml

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,14 @@ on:
6060
description: Oauth client id part of the authorization for the operations API
6161
required: true
6262
type: string
63+
WEB_APP_REVALIDATE_URL:
64+
description: URL of the web app revalidation endpoint
65+
required: false
66+
type: string
67+
WEB_APP_REVALIDATE_SECRET_1PASSWORD:
68+
description: 1Password reference for the web app revalidation secret token
69+
required: false
70+
type: string
6371
SKIP_TESTS:
6472
description: The skip test parameter is useful for DEV environment deployments, not advised for QA and PROD.
6573
required: true
@@ -289,6 +297,7 @@ jobs:
289297
echo "OAUTH2_CLIENT_SECRET=${{ secrets.OAUTH2_CLIENT_SECRET }}" >> $GITHUB_ENV
290298
echo "GLOBAL_RATE_LIMIT_REQ_PER_MINUTE=${{ inputs.GLOBAL_RATE_LIMIT_REQ_PER_MINUTE }}" >> $GITHUB_ENV
291299
echo "VALIDATOR_ENDPOINT=${{ inputs.VALIDATOR_ENDPOINT }}" >> $GITHUB_ENV
300+
echo "WEB_APP_REVALIDATE_URL=${{ inputs.WEB_APP_REVALIDATE_URL }}" >> $GITHUB_ENV
292301
293302
- name: Load secret from 1Password
294303
uses: 1password/load-secrets-action@v2
@@ -300,10 +309,19 @@ jobs:
300309
TDG_API_TOKEN: "op://rbiv7rvkkrsdlpcrz3bmv7nmcu/Transport.data.gouv.fr API Token/credential"
301310
OPERATIONS_OAUTH2_CLIENT_ID: ${{ inputs.OPERATIONS_OAUTH2_CLIENT_ID_1PASSWORD }}
302311

312+
- name: Load web app revalidation secret from 1Password
313+
if: ${{ inputs.WEB_APP_REVALIDATE_SECRET_1PASSWORD != '' }}
314+
uses: 1password/load-secrets-action@v2
315+
with:
316+
export-env: true
317+
env:
318+
OP_SERVICE_ACCOUNT_TOKEN: ${{ secrets.OP_SERVICE_ACCOUNT_TOKEN }}
319+
WEB_APP_REVALIDATE_SECRET: ${{ inputs.WEB_APP_REVALIDATE_SECRET_1PASSWORD }}
320+
303321
- name: Populate Variables
304322
run: |
305323
scripts/replace-variables.sh -in_file infra/backend.conf.rename_me -out_file infra/backend.conf -variables BUCKET_NAME,OBJECT_PREFIX
306-
scripts/replace-variables.sh -in_file infra/vars.tfvars.rename_me -out_file infra/vars.tfvars -variables PROJECT_ID,REGION,ENVIRONMENT,DEPLOYER_SERVICE_ACCOUNT,FEED_API_IMAGE_VERSION,OAUTH2_CLIENT_ID,OAUTH2_CLIENT_SECRET,GLOBAL_RATE_LIMIT_REQ_PER_MINUTE,ARTIFACT_REPO_NAME,VALIDATOR_ENDPOINT,TRANSITLAND_API_KEY,OPERATIONS_OAUTH2_CLIENT_ID,TDG_API_TOKEN
324+
scripts/replace-variables.sh -in_file infra/vars.tfvars.rename_me -out_file infra/vars.tfvars -variables PROJECT_ID,REGION,ENVIRONMENT,DEPLOYER_SERVICE_ACCOUNT,FEED_API_IMAGE_VERSION,OAUTH2_CLIENT_ID,OAUTH2_CLIENT_SECRET,GLOBAL_RATE_LIMIT_REQ_PER_MINUTE,ARTIFACT_REPO_NAME,VALIDATOR_ENDPOINT,TRANSITLAND_API_KEY,OPERATIONS_OAUTH2_CLIENT_ID,TDG_API_TOKEN,WEB_APP_REVALIDATE_URL,WEB_APP_REVALIDATE_SECRET
307325
308326
- uses: hashicorp/setup-terraform@v3
309327
with:

.github/workflows/api-dev.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@ jobs:
2424
SKIP_TESTS: true
2525
VALIDATOR_ENDPOINT: https://stg-gtfs-validator-web-mbzoxaljzq-ue.a.run.app
2626
OPERATIONS_OAUTH2_CLIENT_ID_1PASSWORD: "op://rbiv7rvkkrsdlpcrz3bmv7nmcu/GCP_RETOOL_OAUTH2_CREDS/username"
27+
WEB_APP_REVALIDATE_URL: "https://staging.mobilitydatabase.org/api/revalidate"
28+
WEB_APP_REVALIDATE_SECRET_1PASSWORD: "op://rbiv7rvkkrsdlpcrz3bmv7nmcu/MobilityDatabase Vercel Deployment/REVALIDATE_SECRET_QA"
2729
secrets:
2830
GCP_MOBILITY_FEEDS_SA_KEY: ${{ secrets.DEV_GCP_MOBILITY_FEEDS_SA_KEY }}
2931
OAUTH2_CLIENT_ID: ${{ secrets.DEV_MOBILITY_FEEDS_OAUTH2_CLIENT_ID}}

.github/workflows/api-prod.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@ jobs:
2020
SKIP_TESTS: false
2121
VALIDATOR_ENDPOINT: https://gtfs-validator-web-mbzoxaljzq-ue.a.run.app
2222
OPERATIONS_OAUTH2_CLIENT_ID_1PASSWORD: "op://rbiv7rvkkrsdlpcrz3bmv7nmcu/GCP_RETOOL_OAUTH2_CREDS/username"
23+
WEB_APP_REVALIDATE_URL: "https://mobilitydatabase.org/api/revalidate"
24+
WEB_APP_REVALIDATE_SECRET_1PASSWORD: "op://rbiv7rvkkrsdlpcrz3bmv7nmcu/MobilityDatabase Vercel Deployment/REVALIDATE_SECRET"
2325
secrets:
2426
GCP_MOBILITY_FEEDS_SA_KEY: ${{ secrets.PROD_GCP_MOBILITY_FEEDS_SA_KEY }}
2527
OAUTH2_CLIENT_ID: ${{ secrets.PROD_MOBILITY_FEEDS_OAUTH2_CLIENT_ID}}

.github/workflows/api-qa.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@ jobs:
2020
GLOBAL_RATE_LIMIT_REQ_PER_MINUTE: ${{ vars.GLOBAL_RATE_LIMIT_REQ_PER_MINUTE }}
2121
VALIDATOR_ENDPOINT: https://stg-gtfs-validator-web-mbzoxaljzq-ue.a.run.app
2222
OPERATIONS_OAUTH2_CLIENT_ID_1PASSWORD: "op://rbiv7rvkkrsdlpcrz3bmv7nmcu/GCP_RETOOL_OAUTH2_CREDS/username"
23+
WEB_APP_REVALIDATE_URL: "https://staging.mobilitydatabase.org/api/revalidate"
24+
WEB_APP_REVALIDATE_SECRET_1PASSWORD: "op://rbiv7rvkkrsdlpcrz3bmv7nmcu/MobilityDatabase Vercel Deployment/REVALIDATE_SECRET_QA"
2325
secrets:
2426
GCP_MOBILITY_FEEDS_SA_KEY: ${{ secrets.QA_GCP_MOBILITY_FEEDS_SA_KEY }}
2527
OAUTH2_CLIENT_ID: ${{ secrets.DEV_MOBILITY_FEEDS_OAUTH2_CLIENT_ID}}

api/requirements.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,4 +49,5 @@ PyJWT
4949
shapely
5050
google-cloud-pubsub
5151
pycountry
52-
pytz
52+
pytz
53+
google-cloud-tasks

api/src/shared/common/gcp_utils.py

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import json
22
import logging
33
import os
4+
from typing import List
45

56
REFRESH_VIEW_TASK_EXECUTOR_BODY = json.dumps(
67
{"task": "refresh_materialized_view", "payload": {"dry_run": False}}
@@ -69,6 +70,94 @@ def create_refresh_materialized_view_task():
6970
return {"error": "Error enqueuing task: %s" % error}, 500
7071

7172

73+
def create_web_revalidation_task(feed_stable_ids: List[str]) -> None:
74+
"""
75+
Enqueue a Cloud Task to revalidate the web app cache for specific feed pages.
76+
Uses time-bucketed task names for deduplication: multiple calls for the same
77+
feed within the same 30-minute window are collapsed into one task.
78+
79+
Args:
80+
feed_stable_ids: List of feed stable IDs whose pages should be revalidated.
81+
"""
82+
from google.cloud import tasks_v2
83+
from google.protobuf import timestamp_pb2
84+
from datetime import datetime, timedelta
85+
86+
if not feed_stable_ids:
87+
return
88+
89+
try:
90+
now = datetime.now()
91+
92+
# BOUNCE WINDOW: next :00 or :30 (same pattern as materialized view refresh)
93+
minute = now.minute
94+
if minute < 30:
95+
bucket_time = now.replace(minute=30, second=0, microsecond=0)
96+
else:
97+
bucket_time = now.replace(minute=0, second=0, microsecond=0) + timedelta(hours=1)
98+
99+
proto_time = timestamp_pb2.Timestamp()
100+
proto_time.FromDatetime(bucket_time)
101+
102+
project = os.getenv("PROJECT_ID")
103+
queue = os.getenv("WEB_REVALIDATION_QUEUE")
104+
gcp_region = os.getenv("GCP_REGION")
105+
environment_name = os.getenv("ENVIRONMENT")
106+
url = f"https://{gcp_region}-{project}.cloudfunctions.net/" f"tasks_executor-{environment_name}"
107+
108+
if not queue:
109+
logging.warning(
110+
"WEB_REVALIDATION_QUEUE not set; skipping revalidation for %s",
111+
feed_stable_ids,
112+
)
113+
return
114+
115+
client = tasks_v2.CloudTasksClient()
116+
timestamp_str = bucket_time.strftime("%Y-%m-%d-%H-%M")
117+
118+
for feed_stable_id in feed_stable_ids:
119+
task_name = f"revalidate-{feed_stable_id}-{timestamp_str}"
120+
body = json.dumps(
121+
{
122+
"task": "revalidate_feed",
123+
"payload": {"feed_stable_id": feed_stable_id},
124+
}
125+
).encode()
126+
127+
try:
128+
create_http_task_with_name(
129+
client=client,
130+
body=body,
131+
url=url,
132+
project_id=project,
133+
gcp_region=gcp_region,
134+
queue_name=queue,
135+
task_name=task_name,
136+
task_time=proto_time,
137+
http_method=tasks_v2.HttpMethod.POST,
138+
)
139+
logging.info(
140+
"Scheduled web revalidation task for feed %s (%s)",
141+
feed_stable_id,
142+
task_name,
143+
)
144+
except Exception as e:
145+
if "ALREADY_EXISTS" in str(e):
146+
logging.info(
147+
"Revalidation task already exists for %s, skipping.",
148+
task_name,
149+
)
150+
else:
151+
logging.error(
152+
"Error creating revalidation task for %s: %s",
153+
feed_stable_id,
154+
e,
155+
)
156+
157+
except Exception as error:
158+
logging.error("Error enqueuing revalidation tasks: %s", error)
159+
160+
72161
def create_http_task_with_name(
73162
client: any, # tasks_v2.CloudTasksClient
74163
body: bytes,

api/tests/test_web_revalidation.py

Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,145 @@
1+
#
2+
# MobilityData 2025
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
#
15+
16+
import sys
17+
import unittest
18+
from unittest.mock import MagicMock, patch
19+
20+
21+
class TestCreateWebRevalidationTask(unittest.TestCase):
22+
def setUp(self):
23+
# google-cloud-tasks is not installed in the test environment.
24+
# Provide a MagicMock so `from google.cloud import tasks_v2` succeeds
25+
# for tests that proceed past the early-return guards.
26+
self._mock_tasks_v2 = MagicMock()
27+
self._sys_modules_patcher = patch.dict(sys.modules, {"google.cloud.tasks_v2": self._mock_tasks_v2})
28+
self._sys_modules_patcher.start()
29+
30+
def tearDown(self):
31+
self._sys_modules_patcher.stop()
32+
33+
def test_empty_feed_ids(self):
34+
"""Should return early without creating any tasks."""
35+
from shared.common.gcp_utils import create_web_revalidation_task
36+
37+
# Should not raise
38+
create_web_revalidation_task([])
39+
40+
@patch.dict(
41+
"os.environ",
42+
{
43+
"PROJECT_ID": "test-project",
44+
"WEB_REVALIDATION_QUEUE": "",
45+
"GCP_REGION": "us-central1",
46+
"ENVIRONMENT": "dev",
47+
},
48+
)
49+
def test_missing_queue_env_var(self):
50+
"""Should log a warning and return without creating tasks."""
51+
from shared.common.gcp_utils import create_web_revalidation_task
52+
53+
# Should not raise
54+
create_web_revalidation_task(["mdb-123"])
55+
56+
@patch("shared.common.gcp_utils.create_http_task_with_name")
57+
@patch.dict(
58+
"os.environ",
59+
{
60+
"PROJECT_ID": "test-project",
61+
"WEB_REVALIDATION_QUEUE": "web-revalidation-queue",
62+
"GCP_REGION": "us-central1",
63+
"ENVIRONMENT": "dev",
64+
"SERVICE_ACCOUNT_EMAIL": "test@test.iam.gserviceaccount.com",
65+
},
66+
)
67+
def test_creates_tasks_for_each_feed(self, mock_create_task):
68+
"""Should create one Cloud Task per feed stable ID."""
69+
from shared.common.gcp_utils import create_web_revalidation_task
70+
71+
create_web_revalidation_task(["mdb-100", "mdb-200"])
72+
73+
self.assertEqual(mock_create_task.call_count, 2)
74+
75+
# Verify the task bodies contain the correct feed IDs
76+
first_call_body = mock_create_task.call_args_list[0]
77+
second_call_body = mock_create_task.call_args_list[1]
78+
79+
self.assertIn(b"mdb-100", first_call_body.kwargs.get("body", b""))
80+
self.assertIn(b"mdb-200", second_call_body.kwargs.get("body", b""))
81+
82+
@patch("shared.common.gcp_utils.create_http_task_with_name")
83+
@patch.dict(
84+
"os.environ",
85+
{
86+
"PROJECT_ID": "test-project",
87+
"WEB_REVALIDATION_QUEUE": "web-revalidation-queue",
88+
"GCP_REGION": "us-central1",
89+
"ENVIRONMENT": "dev",
90+
"SERVICE_ACCOUNT_EMAIL": "test@test.iam.gserviceaccount.com",
91+
},
92+
)
93+
def test_dedup_task_name_contains_feed_id(self, mock_create_task):
94+
"""Task name should include the feed stable ID for deduplication."""
95+
from shared.common.gcp_utils import create_web_revalidation_task
96+
97+
create_web_revalidation_task(["mdb-42"])
98+
99+
self.assertEqual(mock_create_task.call_count, 1)
100+
task_name = mock_create_task.call_args.kwargs.get("task_name", "")
101+
self.assertTrue(task_name.startswith("revalidate-mdb-42-"))
102+
103+
@patch("shared.common.gcp_utils.create_http_task_with_name")
104+
@patch.dict(
105+
"os.environ",
106+
{
107+
"PROJECT_ID": "test-project",
108+
"WEB_REVALIDATION_QUEUE": "web-revalidation-queue",
109+
"GCP_REGION": "us-central1",
110+
"ENVIRONMENT": "dev",
111+
"SERVICE_ACCOUNT_EMAIL": "test@test.iam.gserviceaccount.com",
112+
},
113+
)
114+
def test_already_exists_is_handled_gracefully(self, mock_create_task):
115+
"""ALREADY_EXISTS errors should be caught and logged, not raised."""
116+
mock_create_task.side_effect = Exception("409 ALREADY_EXISTS: task already exists")
117+
from shared.common.gcp_utils import create_web_revalidation_task
118+
119+
# Should not raise
120+
create_web_revalidation_task(["mdb-123"])
121+
122+
@patch("shared.common.gcp_utils.create_http_task_with_name")
123+
@patch.dict(
124+
"os.environ",
125+
{
126+
"PROJECT_ID": "test-project",
127+
"WEB_REVALIDATION_QUEUE": "web-revalidation-queue",
128+
"GCP_REGION": "us-central1",
129+
"ENVIRONMENT": "dev",
130+
"SERVICE_ACCOUNT_EMAIL": "test@test.iam.gserviceaccount.com",
131+
},
132+
)
133+
def test_targets_tasks_executor_url(self, mock_create_task):
134+
"""Tasks should target the tasks_executor Cloud Function URL."""
135+
from shared.common.gcp_utils import create_web_revalidation_task
136+
137+
create_web_revalidation_task(["mdb-1"])
138+
139+
url = mock_create_task.call_args.kwargs.get("url", "")
140+
self.assertIn("tasks_executor-dev", url)
141+
self.assertIn("us-central1", url)
142+
143+
144+
if __name__ == "__main__":
145+
unittest.main()

0 commit comments

Comments
 (0)