Skip to content

Commit b7d1838

Browse files
committed
timezone changes
1 parent d6fc6ac commit b7d1838

6 files changed

Lines changed: 240 additions & 200 deletions

File tree

functions-python/backfill_dataset_service_date_range/src/main.py

Lines changed: 49 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,15 @@
99
from typing import TYPE_CHECKING
1010
from sqlalchemy.orm import joinedload
1111
from sqlalchemy import or_, func
12+
from zoneinfo import ZoneInfo
13+
from datetime import timezone
14+
from shared.helpers.database import refresh_materialized_view
1215

13-
from shared.database_gen.sqlacodegen_models import Gtfsdataset, Validationreport
16+
from shared.database_gen.sqlacodegen_models import (
17+
Gtfsdataset,
18+
Validationreport,
19+
t_feedsearch,
20+
)
1421

1522
import requests
1623
import json
@@ -103,19 +110,24 @@ def backfill_datasets(session: "Session"):
103110
response.raise_for_status()
104111
json_data = response.json()
105112

106-
extracted_service_start_date = (
107-
json_data.get("summary", {})
108-
.get("feedInfo", {})
109-
.get("feedServiceWindowStart", None)
113+
summary = json_data.get("summary", {})
114+
extracted_service_start_date = summary.get("feedInfo", {}).get(
115+
"feedServiceWindowStart", None
110116
)
111-
extracted_service_end_date = (
112-
json_data.get("summary", {})
113-
.get("feedInfo", {})
114-
.get("feedServiceWindowEnd", None)
117+
extracted_service_end_date = summary.get("feedInfo", {}).get(
118+
"feedServiceWindowEnd", None
115119
)
116120

121+
extracted_timezone = None
122+
if isinstance(summary.get("agencies"), list) and summary["agencies"]:
123+
extracted_timezone = summary["agencies"][0].get("timezone", None)
124+
125+
formatted_service_start_date
126+
formatted_service_end_date
117127
try:
118-
datetime.strptime(extracted_service_start_date, "%Y-%m-%d")
128+
formatted_service_start_date = datetime.strptime(
129+
extracted_service_start_date, "%Y-%m-%d"
130+
)
119131
except ValueError:
120132
logging.error(
121133
f"""
@@ -126,7 +138,9 @@ def backfill_datasets(session: "Session"):
126138
continue
127139

128140
try:
129-
datetime.strptime(extracted_service_end_date, "%Y-%m-%d")
141+
formatted_service_end_date = datetime.strptime(
142+
extracted_service_end_date, "%Y-%m-%d"
143+
)
130144
except ValueError:
131145
logging.error(
132146
f"""
@@ -136,12 +150,31 @@ def backfill_datasets(session: "Session"):
136150
)
137151
continue
138152

139-
dataset.service_date_range_start = extracted_service_start_date
140-
dataset.service_date_range_end = extracted_service_end_date
153+
# If timezone is None (not found in the validation report), it will use UTC as base.
154+
# It will not save the timezone in the database if it is None.
155+
formatting_timezone = extracted_timezone
156+
if formatting_timezone is None:
157+
formatting_timezone = timezone.utc
141158

142-
formatted_dates = (
143-
extracted_service_start_date + " - " + extracted_service_end_date
159+
local_service_start_date = formatted_service_start_date.replace(
160+
hour=0, minute=0, tzinfo=formatting_timezone
144161
)
162+
utc_service_start_date = local_service_start_date.astimezone(
163+
ZoneInfo("UTC")
164+
)
165+
166+
local_service_end_date = formatted_service_end_date.replace(
167+
hour=23, minute=59, tzinfo=formatting_timezone
168+
)
169+
utc_service_end_date = local_service_end_date.astimezone(ZoneInfo("UTC"))
170+
171+
dataset.service_date_range_start = utc_service_start_date
172+
dataset.service_date_range_end = utc_service_end_date
173+
174+
if extracted_timezone is not None:
175+
dataset.agency_timezone = extracted_timezone
176+
177+
formatted_dates = utc_service_start_date + " - " + utc_service_end_date
145178
logging.info(
146179
f"Updated gtfsdataset ID {gtfsdataset_id} with value: {formatted_dates}"
147180
)
@@ -150,6 +183,7 @@ def backfill_datasets(session: "Session"):
150183
if changes_count >= elements_per_commit:
151184
try:
152185
changes_count = 0
186+
refresh_materialized_view(session, t_feedsearch.name)
153187
session.commit()
154188
logging.info(f"{changes_count} elements committed.")
155189
except Exception as e:

functions-python/process_validation_report/src/main.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,12 @@ def generate_report_entities(
152152

153153
populate_service_date(dataset, json_report)
154154

155+
summary = json_report.get("summary", {})
156+
if isinstance(summary.get("agencies"), list) and summary["agencies"]:
157+
extracted_timezone = summary["agencies"][0].get("timezone", None)
158+
if extracted_timezone is not None:
159+
dataset.agency_timezone = extracted_timezone
160+
155161
for feature_name in get_nested_value(json_report, ["summary", "gtfsFeatures"], []):
156162
feature = get_feature(feature_name, session)
157163
feature.validations.append(validation_report_entity)

functions-python/update_feed_status/src/main.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,13 @@
11
import logging
22
import os
33
import functions_framework
4-
from datetime import date
4+
from datetime import datetime, timezone
55
from shared.helpers.logger import Logger
66
from shared.helpers.database import Database
77
from typing import TYPE_CHECKING
88
from sqlalchemy import case, text
9-
from shared.database_gen.sqlacodegen_models import Gtfsdataset, Feed
9+
from shared.database_gen.sqlacodegen_models import Gtfsdataset, Feed, t_feedsearch
10+
from shared.helpers.database import refresh_materialized_view
1011

1112
if TYPE_CHECKING:
1213
from sqlalchemy.orm import Session
@@ -16,7 +17,7 @@
1617

1718
# query to update the status of the feeds based on the service date range of the latest dataset
1819
def update_feed_statuses_query(session: "Session"):
19-
today = date.today()
20+
today_utc = datetime.now(timezone.utc).date()
2021

2122
latest_dataset_subq = (
2223
session.query(
@@ -34,16 +35,16 @@ def update_feed_statuses_query(session: "Session"):
3435

3536
new_status = case(
3637
(
37-
latest_dataset_subq.c.service_date_range_end < today,
38+
latest_dataset_subq.c.service_date_range_end < today_utc,
3839
text("'inactive'::status"),
3940
),
4041
(
41-
latest_dataset_subq.c.service_date_range_start > today,
42+
latest_dataset_subq.c.service_date_range_start > today_utc,
4243
text("'future'::status"),
4344
),
4445
(
45-
(latest_dataset_subq.c.service_date_range_start <= today)
46-
& (latest_dataset_subq.c.service_date_range_end >= today),
46+
(latest_dataset_subq.c.service_date_range_start <= today_utc)
47+
& (latest_dataset_subq.c.service_date_range_end >= today_utc),
4748
text("'active'::status"),
4849
),
4950
)
@@ -63,6 +64,7 @@ def update_feed_statuses_query(session: "Session"):
6364
raise Exception(f"Error updating feed statuses: {e}")
6465

6566
try:
67+
refresh_materialized_view(session, t_feedsearch.name)
6668
session.commit()
6769
logging.info("Feed Database changes committed.")
6870
session.close()

liquibase/changelog.xml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,5 +42,4 @@
4242
<include file="changes/feat_927_2.sql" relativeToChangelogFile="true"/>
4343
<include file="changes/feat_937.sql" relativeToChangelogFile="true"/>
4444
<include file="changes/feat_946.sql" relativeToChangelogFile="true"/>
45-
<include file="changes/feat_946_2.sql" relativeToChangelogFile="true"/>
4645
</databaseChangeLog>

liquibase/changes/feat_946.sql

Lines changed: 176 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,180 @@
1+
-- Dropping the materialized view if it exists as we cannot update it
12
DROP MATERIALIZED VIEW IF EXISTS FeedSearch;
23
ALTER TABLE gtfsdataset
34
ADD COLUMN agency_timezone VARCHAR(255),
45
ALTER COLUMN service_date_range_start SET DATA TYPE TIMESTAMP WITH TIME ZONE USING service_date_range_start::TIMESTAMP WITH TIME ZONE,
5-
ALTER COLUMN service_date_range_end SET DATA TYPE TIMESTAMP WITH TIME ZONE USING service_date_range_end::TIMESTAMP WITH TIME ZONE;
6+
ALTER COLUMN service_date_range_end SET DATA TYPE TIMESTAMP WITH TIME ZONE USING service_date_range_end::TIMESTAMP WITH TIME ZONE;
7+
8+
CREATE MATERIALIZED VIEW FeedSearch AS
9+
SELECT
10+
-- feed
11+
Feed.stable_id AS feed_stable_id,
12+
Feed.id AS feed_id,
13+
Feed.data_type,
14+
Feed.status,
15+
Feed.feed_name,
16+
Feed.note,
17+
Feed.feed_contact_email,
18+
-- source
19+
Feed.producer_url,
20+
Feed.authentication_info_url,
21+
Feed.authentication_type,
22+
Feed.api_key_parameter_name,
23+
Feed.license_url,
24+
Feed.provider,
25+
Feed.operational_status,
26+
-- official status
27+
Latest_official_status.is_official AS official,
28+
-- latest_dataset
29+
Latest_dataset.id AS latest_dataset_id,
30+
Latest_dataset.hosted_url AS latest_dataset_hosted_url,
31+
Latest_dataset.downloaded_at AS latest_dataset_downloaded_at,
32+
Latest_dataset.bounding_box AS latest_dataset_bounding_box,
33+
Latest_dataset.hash AS latest_dataset_hash,
34+
Latest_dataset.agency_timezone AS latest_dataset_agency_timezone,
35+
Latest_dataset.service_date_range_start AS latest_dataset_service_date_range_start,
36+
Latest_dataset.service_date_range_end AS latest_dataset_service_date_range_end,
37+
-- external_ids
38+
ExternalIdJoin.external_ids,
39+
-- redirect_ids
40+
RedirectingIdJoin.redirect_ids,
41+
-- feed gtfs_rt references
42+
FeedReferenceJoin.feed_reference_ids,
43+
-- feed gtfs_rt entities
44+
EntityTypeFeedJoin.entities,
45+
-- locations
46+
FeedLocationJoin.locations,
47+
-- translations
48+
FeedCountryTranslationJoin.translations AS country_translations,
49+
FeedSubdivisionNameTranslationJoin.translations AS subdivision_name_translations,
50+
FeedMunicipalityTranslationJoin.translations AS municipality_translations,
51+
-- full-text searchable document
52+
setweight(to_tsvector('english', coalesce(unaccent(Feed.feed_name), '')), 'C') ||
53+
setweight(to_tsvector('english', coalesce(unaccent(Feed.provider), '')), 'C') ||
54+
setweight(to_tsvector('english', coalesce(unaccent((
55+
SELECT string_agg(
56+
coalesce(location->>'country_code', '') || ' ' ||
57+
coalesce(location->>'country', '') || ' ' ||
58+
coalesce(location->>'subdivision_name', '') || ' ' ||
59+
coalesce(location->>'municipality', ''),
60+
' '
61+
)
62+
FROM json_array_elements(FeedLocationJoin.locations) AS location
63+
)), '')), 'A') ||
64+
setweight(to_tsvector('english', coalesce(unaccent((
65+
SELECT string_agg(
66+
coalesce(translation->>'value', ''),
67+
' '
68+
)
69+
FROM json_array_elements(FeedCountryTranslationJoin.translations) AS translation
70+
)), '')), 'A') ||
71+
setweight(to_tsvector('english', coalesce(unaccent((
72+
SELECT string_agg(
73+
coalesce(translation->>'value', ''),
74+
' '
75+
)
76+
FROM json_array_elements(FeedSubdivisionNameTranslationJoin.translations) AS translation
77+
)), '')), 'A') ||
78+
setweight(to_tsvector('english', coalesce(unaccent((
79+
SELECT string_agg(
80+
coalesce(translation->>'value', ''),
81+
' '
82+
)
83+
FROM json_array_elements(FeedMunicipalityTranslationJoin.translations) AS translation
84+
)), '')), 'A') AS document
85+
FROM Feed
86+
LEFT JOIN (
87+
SELECT *
88+
FROM gtfsdataset
89+
WHERE latest = true
90+
) AS Latest_dataset ON Latest_dataset.feed_id = Feed.id AND Feed.data_type = 'gtfs'
91+
LEFT JOIN (
92+
SELECT
93+
feed_id,
94+
json_agg(json_build_object('external_id', associated_id, 'source', source)) AS external_ids
95+
FROM externalid
96+
GROUP BY feed_id
97+
) AS ExternalIdJoin ON ExternalIdJoin.feed_id = Feed.id
98+
LEFT JOIN (
99+
SELECT
100+
gtfs_rt_feed_id,
101+
array_agg(FeedReferenceJoinInnerQuery.stable_id) AS feed_reference_ids
102+
FROM FeedReference
103+
LEFT JOIN Feed AS FeedReferenceJoinInnerQuery ON FeedReferenceJoinInnerQuery.id = FeedReference.gtfs_feed_id
104+
GROUP BY gtfs_rt_feed_id
105+
) AS FeedReferenceJoin ON FeedReferenceJoin.gtfs_rt_feed_id = Feed.id AND Feed.data_type = 'gtfs_rt'
106+
LEFT JOIN (
107+
SELECT
108+
target_id,
109+
json_agg(json_build_object('target_id', target_id, 'comment', redirect_comment)) AS redirect_ids
110+
FROM RedirectingId
111+
GROUP BY target_id
112+
) AS RedirectingIdJoin ON RedirectingIdJoin.target_id = Feed.id
113+
LEFT JOIN (
114+
SELECT
115+
LocationFeed.feed_id,
116+
json_agg(json_build_object('country', country, 'country_code', country_code, 'subdivision_name',
117+
subdivision_name, 'municipality', municipality)) AS locations
118+
FROM Location
119+
LEFT JOIN LocationFeed ON LocationFeed.location_id = Location.id
120+
GROUP BY LocationFeed.feed_id
121+
) AS FeedLocationJoin ON FeedLocationJoin.feed_id = Feed.id
122+
LEFT JOIN (
123+
SELECT DISTINCT ON (feed_id) *
124+
FROM officialstatushistory
125+
ORDER BY feed_id, timestamp DESC
126+
) AS Latest_official_status ON Latest_official_status.feed_id = Feed.id
127+
LEFT JOIN (
128+
SELECT
129+
LocationFeed.feed_id,
130+
json_agg(json_build_object('value', Translation.value, 'key', Translation.key)) AS translations
131+
FROM Location
132+
LEFT JOIN Translation ON Location.country = Translation.key
133+
LEFT JOIN LocationFeed ON LocationFeed.location_id = Location.id
134+
WHERE Translation.language_code = 'en'
135+
AND Translation.type = 'country'
136+
AND Location.country IS NOT NULL
137+
GROUP BY LocationFeed.feed_id
138+
) AS FeedCountryTranslationJoin ON FeedCountryTranslationJoin.feed_id = Feed.id
139+
LEFT JOIN (
140+
SELECT
141+
LocationFeed.feed_id,
142+
json_agg(json_build_object('value', Translation.value, 'key', Translation.key)) AS translations
143+
FROM Location
144+
LEFT JOIN Translation ON Location.subdivision_name = Translation.key
145+
LEFT JOIN LocationFeed ON LocationFeed.location_id = Location.id
146+
WHERE Translation.language_code = 'en'
147+
AND Translation.type = 'subdivision_name'
148+
AND Location.subdivision_name IS NOT NULL
149+
GROUP BY LocationFeed.feed_id
150+
) AS FeedSubdivisionNameTranslationJoin ON FeedSubdivisionNameTranslationJoin.feed_id = Feed.id
151+
LEFT JOIN (
152+
SELECT
153+
LocationFeed.feed_id,
154+
json_agg(json_build_object('value', Translation.value, 'key', Translation.key)) AS translations
155+
FROM Location
156+
LEFT JOIN Translation ON Location.municipality = Translation.key
157+
LEFT JOIN LocationFeed ON LocationFeed.location_id = Location.id
158+
WHERE Translation.language_code = 'en'
159+
AND Translation.type = 'municipality'
160+
AND Location.municipality IS NOT NULL
161+
GROUP BY LocationFeed.feed_id
162+
) AS FeedMunicipalityTranslationJoin ON FeedMunicipalityTranslationJoin.feed_id = Feed.id
163+
LEFT JOIN (
164+
SELECT
165+
feed_id,
166+
array_agg(entity_name) AS entities
167+
FROM EntityTypeFeed
168+
GROUP BY feed_id
169+
) AS EntityTypeFeedJoin ON EntityTypeFeedJoin.feed_id = Feed.id AND Feed.data_type = 'gtfs_rt'
170+
;
171+
172+
173+
-- This index allows concurrent refresh on the materialized view avoiding table locks
174+
CREATE UNIQUE INDEX idx_unique_feed_id ON FeedSearch(feed_id);
175+
176+
-- Indices for feedsearch view optimization
177+
CREATE INDEX feedsearch_document_idx ON FeedSearch USING GIN(document);
178+
CREATE INDEX feedsearch_feed_stable_id ON FeedSearch(feed_stable_id);
179+
CREATE INDEX feedsearch_data_type ON FeedSearch(data_type);
180+
CREATE INDEX feedsearch_status ON FeedSearch(status);

0 commit comments

Comments
 (0)