Skip to content

Commit 5f62f9d

Browse files
committed
Merge branch 'main' into user_feature_flag
2 parents 0eab2eb + 152c80e commit 5f62f9d

20 files changed

Lines changed: 1163 additions & 60 deletions

File tree

api/.openapi-generator/FILES

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@ src/feeds_gen/models/gbfs_version.py
2626
src/feeds_gen/models/get_matching_licenses_request.py
2727
src/feeds_gen/models/gtfs_dataset.py
2828
src/feeds_gen/models/gtfs_feed.py
29+
src/feeds_gen/models/gtfs_feed_availability_check.py
30+
src/feeds_gen/models/gtfs_feed_availability_response.py
2931
src/feeds_gen/models/gtfs_rt_feed.py
3032
src/feeds_gen/models/latest_dataset.py
3133
src/feeds_gen/models/latest_dataset_validation_report.py

api/src/feeds/impl/feeds_api_impl.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,15 @@
99
from feeds.impl.error_handling import raise_http_error, raise_http_validation_error, convert_exception
1010
from shared.db_models.feed_impl import FeedImpl
1111
from shared.db_models.gbfs_feed_impl import GbfsFeedImpl
12+
from shared.db_models.gtfs_feed_availability_check_impl import GtfsFeedAvailabilityCheckImpl
1213
from shared.db_models.gtfs_feed_impl import GtfsFeedImpl
1314
from shared.db_models.gtfs_rt_feed_impl import GtfsRTFeedImpl
1415
from feeds_gen.apis.feeds_api_base import BaseFeedsApi
1516
from feeds_gen.models.feed import Feed
1617
from feeds_gen.models.gbfs_feed import GbfsFeed
1718
from feeds_gen.models.gtfs_dataset import GtfsDataset
1819
from feeds_gen.models.gtfs_feed import GtfsFeed
20+
from feeds_gen.models.gtfs_feed_availability_response import GtfsFeedAvailabilityResponse
1921
from feeds_gen.models.gtfs_rt_feed import GtfsRTFeed
2022
from middleware.request_context import is_user_email_restricted
2123
from shared.common.db_utils import (
@@ -26,6 +28,7 @@
2628
get_gbfs_feeds_query,
2729
)
2830
from shared.common.error_handling import (
31+
availability_from_after_to,
2932
invalid_date_message,
3033
feed_not_found,
3134
gtfs_feed_not_found,
@@ -38,6 +41,7 @@
3841
Feed as FeedOrm,
3942
Gtfsdataset,
4043
Gtfsfeed,
44+
GtfsFeedAvailabilityCheck,
4145
Gtfsrealtimefeed,
4246
)
4347
from shared.feed_filters.feed_filter import FeedFilter
@@ -308,6 +312,53 @@ def get_gtfs_feed_gtfs_rt_feeds(self, id: str, db_session: Session) -> List[Gtfs
308312
else:
309313
raise_http_error(404, gtfs_feed_not_found.format(id))
310314

315+
@with_db_session
316+
def get_gtfs_feed_availability(
317+
self,
318+
id: str,
319+
_from: str,
320+
to: str,
321+
limit: int,
322+
offset: int,
323+
sort: str,
324+
db_session: Session,
325+
) -> GtfsFeedAvailabilityResponse:
326+
"""Returns historical availability checks for a GTFS feed."""
327+
if _from and not valid_iso_date(_from):
328+
raise_http_validation_error(invalid_date_message.format("from"))
329+
if to and not valid_iso_date(to):
330+
raise_http_validation_error(invalid_date_message.format("to"))
331+
332+
from_dt = datetime.fromisoformat(_from.replace("Z", "+00:00")) if _from else None
333+
to_dt = datetime.fromisoformat(to.replace("Z", "+00:00")) if to else None
334+
335+
if from_dt and to_dt and from_dt > to_dt:
336+
raise_http_validation_error(availability_from_after_to)
337+
338+
feed = self._get_gtfs_feed(id, db_session, include_options_for_joinedload=False)
339+
if not feed:
340+
raise_http_error(404, gtfs_feed_not_found.format(id))
341+
342+
query = db_session.query(GtfsFeedAvailabilityCheck).filter(GtfsFeedAvailabilityCheck.feed_id == feed.id)
343+
if from_dt:
344+
query = query.filter(GtfsFeedAvailabilityCheck.checked_at >= from_dt)
345+
if to_dt:
346+
query = query.filter(GtfsFeedAvailabilityCheck.checked_at <= to_dt)
347+
348+
total = query.count()
349+
order = (
350+
GtfsFeedAvailabilityCheck.checked_at.asc() if sort == "asc" else GtfsFeedAvailabilityCheck.checked_at.desc()
351+
)
352+
checks = query.order_by(order).offset(offset).limit(limit).all()
353+
354+
return GtfsFeedAvailabilityResponse(
355+
feed_id=id,
356+
total=total,
357+
offset=offset,
358+
limit=limit,
359+
checks=[GtfsFeedAvailabilityCheckImpl.from_orm(c) for c in checks],
360+
)
361+
311362
@with_db_session
312363
def get_gbfs_feed(
313364
self,

api/src/scripts/populate_db_test_data.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
Gtfsdataset,
1111
Validationreport,
1212
Gtfsfeed,
13+
GtfsFeedAvailabilityCheck,
1314
Notice,
1415
Feature,
1516
License,
@@ -165,6 +166,11 @@ def populate_test_datasets(self, filepath, db_session: "Session"):
165166
if "validation_reports" in data:
166167
validation_report_dict = {}
167168
for report in data["validation_reports"]:
169+
if report["dataset_id"] not in dataset_dict:
170+
self.logger.error(
171+
f"No dataset found with id: {report['dataset_id']}; skipping validation report {report['id']}"
172+
)
173+
continue
168174
validation_report = Validationreport(
169175
id=report["id"],
170176
validator_version=report["validator_version"],
@@ -305,6 +311,29 @@ def populate_test_datasets(self, filepath, db_session: "Session"):
305311
gbfs_version.gbfsendpoints.append(gbfs_endpoint)
306312
gbfs_feed.gbfsversions.append(gbfs_version)
307313

314+
# GTFS feed availability checks
315+
if "gtfs_availability_checks" in data:
316+
for check in data["gtfs_availability_checks"]:
317+
feed_stable_id = check.get("feed_stable_id")
318+
gtfs_feed = db_session.query(Gtfsfeed).filter(Gtfsfeed.stable_id == feed_stable_id).one_or_none()
319+
if not gtfs_feed:
320+
self.logger.error(
321+
f"No GTFS feed found with stable_id: {feed_stable_id}; skipping availability check"
322+
)
323+
continue
324+
availability_check = GtfsFeedAvailabilityCheck(
325+
id=uuid4(),
326+
feed_id=gtfs_feed.id,
327+
checked_at=check["checked_at"],
328+
request_url=check.get("request_url", "https://example.com/feed.zip"),
329+
request_type=check["request_type"],
330+
success=check["success"],
331+
status_code=check.get("status_code"),
332+
latency_ms=check.get("latency_ms"),
333+
error_type=check.get("error_type"),
334+
)
335+
db_session.add(availability_check)
336+
308337
db_session.commit()
309338
db_session.execute(text(f"REFRESH MATERIALIZED VIEW CONCURRENTLY {t_feedsearch.name}"))
310339

@@ -325,6 +354,10 @@ def populate(self):
325354

326355
def populate_test_feeds(self, feeds_data, db_session: "Session"):
327356
for feed_data in feeds_data:
357+
existing = db_session.query(Gtfsfeed).filter(Gtfsfeed.stable_id == feed_data["id"]).one_or_none()
358+
if existing:
359+
logger.info(f"Feed {feed_data['id']} already exists, skipping")
360+
continue
328361
feed = Gtfsfeed(
329362
id=str(uuid4()),
330363
stable_id=feed_data["id"],

api/src/shared/common/error_handling.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
invalid_date_message: Final[str] = (
44
"Invalid date format for '{}'. Expected ISO 8601 format, example: '2021-01-01T00:00:00Z'"
55
)
6+
availability_from_after_to: Final[str] = "'from' timestamp must be before 'to' timestamp"
67
invalid_bounding_coordinates: Final[str] = "Invalid bounding coordinates {} {}"
78
invalid_bounding_method: Final[str] = "Invalid bounding_filter_method {}"
89
feed_not_found: Final[str] = "Feed '{}' not found"

api/src/shared/common/license_utils.py

Lines changed: 20 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -415,18 +415,27 @@ def resolve_license(
415415
# 4) Generic heuristics
416416
heuristic_match = heuristic_spdx(url_str)
417417
if heuristic_match:
418-
return [
419-
MatchingLicense(
420-
license_id=heuristic_match,
421-
license_url=url_str,
422-
normalized_url=url_normalized,
423-
spdx_id=heuristic_match,
424-
match_type="heuristic",
425-
confidence=0.95,
426-
matched_name=heuristic_match,
427-
matched_source="pattern-heuristics",
418+
if db_session is not None:
419+
# Check if the license found is actually in the DB
420+
db_lic = (
421+
db_session.query(License).filter(func.lower(License.id) == func.lower(heuristic_match)).one_or_none()
428422
)
429-
]
423+
if db_lic is None:
424+
logging.warning("Heuristic SPDX ID %s not found in DB, skipping assignment", heuristic_match)
425+
heuristic_match = None
426+
if heuristic_match:
427+
return [
428+
MatchingLicense(
429+
license_id=heuristic_match,
430+
license_url=url_str,
431+
normalized_url=url_normalized,
432+
spdx_id=heuristic_match,
433+
match_type="heuristic",
434+
confidence=0.95,
435+
matched_name=heuristic_match,
436+
matched_source="pattern-heuristics",
437+
)
438+
]
430439

431440
# 5) Fuzzy (same host candidates only)
432441
if allow_fuzzy and url_host and db_session is not None:
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
from shared.database_gen.sqlacodegen_models import GtfsFeedAvailabilityCheck as GtfsFeedAvailabilityCheckOrm
2+
from feeds_gen.models.gtfs_feed_availability_check import GtfsFeedAvailabilityCheck
3+
4+
_REQUEST_TYPE_TO_METHOD = {"http_head": "HEAD", "http_get": "GET"}
5+
6+
7+
class GtfsFeedAvailabilityCheckImpl(GtfsFeedAvailabilityCheck):
8+
"""Implementation of the `GtfsFeedAvailabilityCheck` model.
9+
This class converts a SQLAlchemy row DB object to a Pydantic model.
10+
"""
11+
12+
class Config:
13+
"""Pydantic configuration.
14+
Enabling `from_attributes` method to create a model instance from a SQLAlchemy row object."""
15+
16+
from_attributes = True
17+
18+
@classmethod
19+
def from_orm(cls, check: GtfsFeedAvailabilityCheckOrm | None) -> GtfsFeedAvailabilityCheck | None:
20+
"""Create a model instance from a SQLAlchemy GtfsFeedAvailabilityCheck row object."""
21+
if not check:
22+
return None
23+
return cls(
24+
checked_at=check.checked_at,
25+
success=check.success,
26+
request_method=_REQUEST_TYPE_TO_METHOD.get(check.request_type, check.request_type),
27+
status_code=check.status_code,
28+
latency_ms=float(check.latency_ms) if check.latency_ms is not None else None,
29+
error_type=check.error_type,
30+
)

api/tests/integration/test_data/extra_test_data.json

Lines changed: 52 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111
"content_html": "",
1212
"created_at": "2024-01-01T00:00:00Z",
1313
"updated_at": "2024-01-02T00:00:00Z"
14-
1514
},
1615
{
1716
"id": "license-2",
@@ -24,7 +23,6 @@
2423
"content_html": "",
2524
"created_at": "2024-01-01T00:00:00Z",
2625
"updated_at": "2024-01-02T00:00:00Z"
27-
2826
},
2927
{
3028
"id": "license-3",
@@ -39,7 +37,6 @@
3937
"updated_at": "2024-01-02T00:00:00Z"
4038
}
4139
],
42-
4340
"rules": [
4441
{
4542
"name": "license-rule-1",
@@ -60,7 +57,6 @@
6057
"type": "limitation"
6158
}
6259
],
63-
6460
"license_rules": [
6561
{
6662
"license_id": "license-1",
@@ -79,7 +75,6 @@
7975
"rule_id": "license-rule-3"
8076
}
8177
],
82-
8378
"license_tag_groups": [
8479
{
8580
"id": "family",
@@ -92,7 +87,6 @@
9287
"description": "License type taxonomy"
9388
}
9489
],
95-
9690
"license_tags": [
9791
{
9892
"id": "family:ODC",
@@ -107,7 +101,6 @@
107101
"description": "Open Data Commons license"
108102
}
109103
],
110-
111104
"license_license_tags": [
112105
{
113106
"license_id": "license-1",
@@ -118,7 +111,6 @@
118111
"tag_id": "license:open-data-commons"
119112
}
120113
],
121-
122114
"feeds": [
123115
{
124116
"id": "mdb-60",
@@ -237,5 +229,57 @@
237229
}
238230
]
239231
}
232+
],
233+
"gtfs_availability_checks": [
234+
{
235+
"feed_stable_id": "mdb-1",
236+
"checked_at": "2025-01-10T10:00:00+00:00",
237+
"request_url": "https://example.com/gtfs.zip",
238+
"request_type": "http_head",
239+
"success": true,
240+
"status_code": 200,
241+
"latency_ms": 120,
242+
"error_type": null
243+
},
244+
{
245+
"feed_stable_id": "mdb-1",
246+
"checked_at": "2025-02-10T10:00:00+00:00",
247+
"request_url": "https://example.com/gtfs.zip",
248+
"request_type": "http_head",
249+
"success": false,
250+
"status_code": 503,
251+
"latency_ms": 450,
252+
"error_type": "http_error"
253+
},
254+
{
255+
"feed_stable_id": "mdb-1",
256+
"checked_at": "2025-03-10T10:00:00+00:00",
257+
"request_url": "https://example.com/gtfs.zip",
258+
"request_type": "http_get",
259+
"success": true,
260+
"status_code": 206,
261+
"latency_ms": 200,
262+
"error_type": null
263+
},
264+
{
265+
"feed_stable_id": "mdb-1",
266+
"checked_at": "2025-04-10T10:00:00+00:00",
267+
"request_url": "https://example.com/gtfs.zip",
268+
"request_type": "http_head",
269+
"success": true,
270+
"status_code": 200,
271+
"latency_ms": 95,
272+
"error_type": null
273+
},
274+
{
275+
"feed_stable_id": "mdb-1",
276+
"checked_at": "2025-05-10T10:00:00+00:00",
277+
"request_url": "https://example.com/gtfs.zip",
278+
"request_type": "http_head",
279+
"success": false,
280+
"status_code": null,
281+
"latency_ms": null,
282+
"error_type": "connection_timeout"
283+
}
240284
]
241285
}

0 commit comments

Comments
 (0)