Skip to content

Commit a488c78

Browse files
authored
fix: gtfs_feeds endpoint performance (#1652)
1 parent dcb6920 commit a488c78

File tree

2 files changed

+20
-72
lines changed

2 files changed

+20
-72
lines changed

api/src/feeds/impl/feeds_api_impl.py

Lines changed: 8 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,11 @@
22
from typing import List, Union, TypeVar, Optional
33

44
from sqlalchemy import or_
5-
from sqlalchemy import select
6-
from sqlalchemy.orm import joinedload, contains_eager, selectinload, Session
5+
from sqlalchemy.orm import contains_eager, selectinload, Session
76
from sqlalchemy.orm.query import Query
87

98
from feeds.impl.datasets_api_impl import DatasetsApiImpl
109
from feeds.impl.error_handling import raise_http_error, raise_http_validation_error, convert_exception
11-
from shared.db_models.entity_type_enum import EntityType
1210
from shared.db_models.feed_impl import FeedImpl
1311
from shared.db_models.gbfs_feed_impl import GbfsFeedImpl
1412
from shared.db_models.gtfs_feed_impl import GtfsFeedImpl
@@ -23,7 +21,7 @@
2321
from shared.common.db_utils import (
2422
get_gtfs_feeds_query,
2523
get_gtfs_rt_feeds_query,
26-
get_joinedload_options,
24+
get_selectinload_options,
2725
add_official_filter,
2826
get_gbfs_feeds_query,
2927
)
@@ -41,13 +39,10 @@
4139
Gtfsdataset,
4240
Gtfsfeed,
4341
Gtfsrealtimefeed,
44-
Location,
45-
Entitytype,
4642
)
4743
from shared.feed_filters.feed_filter import FeedFilter
4844
from shared.feed_filters.gtfs_dataset_filter import GtfsDatasetFilter
49-
from shared.feed_filters.gtfs_feed_filter import LocationFilter
50-
from shared.feed_filters.gtfs_rt_feed_filter import GtfsRtFeedFilter, EntityTypeFilter
45+
from shared.feed_filters.gtfs_rt_feed_filter import GtfsRtFeedFilter
5146
from utils.date_utils import valid_iso_date
5247
from utils.logger import get_logger
5348

@@ -120,7 +115,7 @@ def get_feeds(
120115
# Results are sorted by provider
121116
feed_query = feed_query.order_by(FeedOrm.provider, FeedOrm.stable_id)
122117
# Ensure license relationship is available to the model conversion without extra queries
123-
feed_query = feed_query.options(*get_joinedload_options(), selectinload(FeedOrm.license))
118+
feed_query = feed_query.options(*get_selectinload_options(), selectinload(FeedOrm.license))
124119
if limit is not None:
125120
feed_query = feed_query.limit(limit)
126121
if offset is not None:
@@ -251,11 +246,10 @@ def get_gtfs_rt_feed(self, id: str, db_session: Session) -> GtfsRTFeed:
251246
not is_user_email_restricted(), # Allow all feeds to be returned if the user is not restricted
252247
)
253248
)
254-
.outerjoin(Location, Gtfsrealtimefeed.locations)
255249
.options(
256-
joinedload(Gtfsrealtimefeed.entitytypes),
257-
joinedload(Gtfsrealtimefeed.gtfs_feeds),
258-
*get_joinedload_options(),
250+
selectinload(Gtfsrealtimefeed.entitytypes),
251+
selectinload(Gtfsrealtimefeed.gtfs_feeds),
252+
*get_selectinload_options(),
259253
)
260254
).all()
261255

@@ -299,61 +293,11 @@ def get_gtfs_rt_feeds(
299293

300294
return self._get_response(feed_query, GtfsRTFeedImpl)
301295

302-
entity_types_list = entity_types.split(",") if entity_types else None
303-
304-
# Validate entity types using the EntityType enum
305-
if entity_types_list:
306-
try:
307-
entity_types_list = [EntityType(et.strip()).value for et in entity_types_list]
308-
except ValueError:
309-
raise_http_validation_error(
310-
"Entity types must be the value 'vp,' 'sa,' or 'tu,'. "
311-
"When provided a list values must be separated by commas."
312-
)
313-
314-
gtfs_rt_feed_filter = GtfsRtFeedFilter(
315-
stable_id=None,
316-
provider__ilike=provider,
317-
producer_url__ilike=producer_url,
318-
entity_types=EntityTypeFilter(name__in=entity_types_list),
319-
location=LocationFilter(
320-
country_code=country_code,
321-
subdivision_name__ilike=subdivision_name,
322-
municipality__ilike=municipality,
323-
),
324-
)
325-
subquery = gtfs_rt_feed_filter.filter(
326-
select(Gtfsrealtimefeed.id)
327-
.join(Location, Gtfsrealtimefeed.locations)
328-
.join(Entitytype, Gtfsrealtimefeed.entitytypes)
329-
).subquery()
330-
feed_query = (
331-
db_session.query(Gtfsrealtimefeed)
332-
.filter(Gtfsrealtimefeed.id.in_(subquery))
333-
.filter(
334-
or_(
335-
Gtfsrealtimefeed.operational_status == "published",
336-
not is_user_email_restricted(), # Allow all feeds to be returned if the user is not restricted
337-
)
338-
)
339-
.options(
340-
joinedload(Gtfsrealtimefeed.entitytypes),
341-
joinedload(Gtfsrealtimefeed.gtfs_feeds),
342-
*get_joinedload_options(),
343-
)
344-
.order_by(Gtfsrealtimefeed.provider, Gtfsrealtimefeed.stable_id)
345-
)
346-
feed_query = add_official_filter(feed_query, is_official)
347-
348-
feed_query = feed_query.limit(limit).offset(offset)
349-
return self._get_response(feed_query, GtfsRTFeedImpl)
350-
351296
@staticmethod
352297
def _get_response(feed_query: Query, impl_cls: type[T]) -> List[T]:
353298
"""Get the response for the feed query."""
354299
results = feed_query.all()
355-
response = [impl_cls.from_orm(feed) for feed in results]
356-
return list({feed.id: feed for feed in response}.values())
300+
return [impl_cls.from_orm(feed) for feed in results]
357301

358302
@with_db_session
359303
def get_gtfs_feed_gtfs_rt_feeds(self, id: str, db_session: Session) -> List[GtfsRTFeed]:

api/src/shared/common/db_utils.py

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -81,11 +81,15 @@ def get_gtfs_feeds_query(
8181

8282
if include_options_for_joinedload:
8383
feed_query = feed_query.options(
84-
joinedload(Gtfsfeed.latest_dataset)
85-
.joinedload(Gtfsdataset.validation_reports)
86-
.joinedload(Validationreport.features),
87-
joinedload(Gtfsfeed.visualization_dataset),
88-
*get_joinedload_options(),
84+
# Use selectinload for all collection relationships to avoid a cartesian-product row
85+
# explosion when multiple one-to-many associations are loaded simultaneously.
86+
# joinedload on collections multiplies rows (N feeds × M locations × F features …);
87+
# selectinload issues a separate IN-query per relationship, keeping rows at N per query.
88+
selectinload(Gtfsfeed.latest_dataset)
89+
.selectinload(Gtfsdataset.validation_reports)
90+
.selectinload(Validationreport.features),
91+
joinedload(Gtfsfeed.visualization_dataset), # scalar (many-to-one) — joinedload is safe
92+
*get_selectinload_options(),
8993
).order_by(Gtfsfeed.provider, Gtfsfeed.stable_id)
9094

9195
feed_query = feed_query.limit(limit).offset(offset)
@@ -274,9 +278,9 @@ def get_gtfs_rt_feeds_query(
274278
feed_query = feed_query.filter(Gtfsrealtimefeed.operational_status == "published")
275279

276280
feed_query = feed_query.options(
277-
joinedload(Gtfsrealtimefeed.entitytypes),
278-
joinedload(Gtfsrealtimefeed.gtfs_feeds),
279-
*get_joinedload_options(),
281+
selectinload(Gtfsrealtimefeed.entitytypes),
282+
selectinload(Gtfsrealtimefeed.gtfs_feeds),
283+
*get_selectinload_options(),
280284
)
281285
feed_query = add_official_filter(feed_query, is_official)
282286

0 commit comments

Comments
 (0)