Skip to content

Commit 8a6c64c

Browse files
committed
should use Feed.official in materialized view
Signed-off-by: Jingsi Lu <jingsi@mobilitydata.org>
1 parent 4fb3781 commit 8a6c64c

4 files changed

Lines changed: 319 additions & 1 deletion

File tree

api/src/feeds/impl/search_api_impl.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from feeds_gen.models.search_feeds200_response import SearchFeeds200Response
1212
from middleware.request_context import is_user_email_restricted
1313
from sqlalchemy import or_
14+
import logging
1415

1516
feed_search_columns = [column for column in t_feedsearch.columns if column.name != "document"]
1617

@@ -55,8 +56,10 @@ def add_search_query_filters(query, search_query, data_type, feed_id, status, is
5556
query = query.where(t_feedsearch.c.status.in_([s.strip().lower() for s in status_list]))
5657
if is_official is not None:
5758
if is_official:
58-
query = query.where(t_feedsearch.c.official.is_(True))
59+
logging.debug("is_official is true")
60+
query = query.where(t_feedsearch.c.official.is_not(None))
5961
else:
62+
logging.debug("is_official is false")
6063
query = query.where(or_(t_feedsearch.c.official.is_(False), t_feedsearch.c.official.is_(None)))
6164
if search_query and len(search_query.strip()) > 0:
6265
query = query.filter(

liquibase/changelog.xml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,4 +50,6 @@
5050
<include file="changes/feat_997.sql" relativeToChangelogFile="true"/>
5151
<!-- Materialized view updated. Added features and totals. -->
5252
<include file="changes/feat_993.sql" relativeToChangelogFile="true"/>
53+
<include file="changes/feat_1083.sql" relativeToChangelogFile="true"/>
54+
<include file="changes/feat_1083_2.sql" relativeToChangelogFile="true"/>
5355
</databaseChangeLog>

liquibase/changes/feat_1083.sql

Lines changed: 156 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,156 @@
1+
-- Updating the FeedSearch materialized view to include location extraction details
2+
-- 1. Added osm_locations as a column which is a json list of OsmlLocationGroup names and their locations
3+
-- 2. Added the names of the locations to the document for full-text search
4+
DROP MATERIALIZED VIEW IF EXISTS FeedSearch;
5+
CREATE MATERIALIZED VIEW FeedSearch AS
6+
SELECT
7+
-- feed
8+
Feed.stable_id AS feed_stable_id,
9+
Feed.id AS feed_id,
10+
Feed.data_type,
11+
Feed.status,
12+
Feed.feed_name,
13+
Feed.note,
14+
Feed.feed_contact_email,
15+
-- source
16+
Feed.producer_url,
17+
Feed.authentication_info_url,
18+
Feed.authentication_type,
19+
Feed.api_key_parameter_name,
20+
Feed.license_url,
21+
Feed.provider,
22+
Feed.operational_status,
23+
-- official status
24+
Feed.official AS official,
25+
-- latest_dataset
26+
Latest_dataset.id AS latest_dataset_id,
27+
Latest_dataset.hosted_url AS latest_dataset_hosted_url,
28+
Latest_dataset.downloaded_at AS latest_dataset_downloaded_at,
29+
Latest_dataset.bounding_box AS latest_dataset_bounding_box,
30+
Latest_dataset.hash AS latest_dataset_hash,
31+
Latest_dataset.service_date_range_start AS latest_dataset_service_date_range_start,
32+
Latest_dataset.service_date_range_end AS latest_dataset_service_date_range_end,
33+
-- external_ids
34+
ExternalIdJoin.external_ids,
35+
-- redirect_ids
36+
RedirectingIdJoin.redirect_ids,
37+
-- feed gtfs_rt references
38+
FeedReferenceJoin.feed_reference_ids,
39+
-- feed gtfs_rt entities
40+
EntityTypeFeedJoin.entities,
41+
-- locations
42+
FeedLocationJoin.locations,
43+
-- osm locations grouped
44+
OsmLocationJoin.osm_locations,
45+
46+
-- full-text searchable document
47+
setweight(to_tsvector('english', coalesce(unaccent(Feed.feed_name), '')), 'C') ||
48+
setweight(to_tsvector('english', coalesce(unaccent(Feed.provider), '')), 'C') ||
49+
setweight(to_tsvector('english', coalesce(unaccent((
50+
SELECT string_agg(
51+
coalesce(location->>'country_code', '') || ' ' ||
52+
coalesce(location->>'country', '') || ' ' ||
53+
coalesce(location->>'subdivision_name', '') || ' ' ||
54+
coalesce(location->>'municipality', ''),
55+
' '
56+
)
57+
FROM json_array_elements(FeedLocationJoin.locations) AS location
58+
)), '')), 'A') ||
59+
setweight(to_tsvector('english', coalesce(unaccent(OsmLocationNamesJoin.osm_location_names), '')), 'A')
60+
AS document
61+
FROM Feed
62+
LEFT JOIN (
63+
SELECT *
64+
FROM gtfsdataset
65+
WHERE latest = true
66+
) AS Latest_dataset ON Latest_dataset.feed_id = Feed.id AND Feed.data_type = 'gtfs'
67+
LEFT JOIN (
68+
SELECT
69+
feed_id,
70+
json_agg(json_build_object('external_id', associated_id, 'source', source)) AS external_ids
71+
FROM externalid
72+
GROUP BY feed_id
73+
) AS ExternalIdJoin ON ExternalIdJoin.feed_id = Feed.id
74+
LEFT JOIN (
75+
SELECT
76+
gtfs_rt_feed_id,
77+
array_agg(FeedReferenceJoinInnerQuery.stable_id) AS feed_reference_ids
78+
FROM FeedReference
79+
LEFT JOIN Feed AS FeedReferenceJoinInnerQuery ON FeedReferenceJoinInnerQuery.id = FeedReference.gtfs_feed_id
80+
GROUP BY gtfs_rt_feed_id
81+
) AS FeedReferenceJoin ON FeedReferenceJoin.gtfs_rt_feed_id = Feed.id AND Feed.data_type = 'gtfs_rt'
82+
LEFT JOIN (
83+
SELECT
84+
target_id,
85+
json_agg(json_build_object('target_id', target_id, 'comment', redirect_comment)) AS redirect_ids
86+
FROM RedirectingId
87+
GROUP BY target_id
88+
) AS RedirectingIdJoin ON RedirectingIdJoin.target_id = Feed.id
89+
LEFT JOIN (
90+
SELECT
91+
LocationFeed.feed_id,
92+
json_agg(json_build_object('country', country, 'country_code', country_code, 'subdivision_name',
93+
subdivision_name, 'municipality', municipality)) AS locations
94+
FROM Location
95+
LEFT JOIN LocationFeed ON LocationFeed.location_id = Location.id
96+
GROUP BY LocationFeed.feed_id
97+
) AS FeedLocationJoin ON FeedLocationJoin.feed_id = Feed.id
98+
LEFT JOIN (
99+
SELECT
100+
feed_id,
101+
array_agg(entity_name) AS entities
102+
FROM EntityTypeFeed
103+
GROUP BY feed_id
104+
) AS EntityTypeFeedJoin ON EntityTypeFeedJoin.feed_id = Feed.id AND Feed.data_type = 'gtfs_rt'
105+
LEFT JOIN (
106+
WITH locations_per_group AS (
107+
SELECT
108+
fog.feed_id,
109+
olg.group_name,
110+
jsonb_agg(
111+
DISTINCT jsonb_build_object(
112+
'admin_level', gp.admin_level,
113+
'name', gp.name
114+
)
115+
) AS locations
116+
FROM FeedOsmLocationGroup fog
117+
JOIN OsmLocationGroup olg ON olg.group_id = fog.group_id
118+
JOIN OsmLocationGroupGeopolygon olgg ON olgg.group_id = olg.group_id
119+
JOIN Geopolygon gp ON gp.osm_id = olgg.osm_id
120+
GROUP BY fog.feed_id, olg.group_name
121+
)
122+
SELECT
123+
feed_id,
124+
jsonb_agg(
125+
jsonb_build_object(
126+
'group_name', group_name,
127+
'locations', locations
128+
)
129+
)::json AS osm_locations
130+
FROM locations_per_group
131+
GROUP BY feed_id
132+
) AS OsmLocationJoin ON OsmLocationJoin.feed_id = Feed.id
133+
LEFT JOIN (
134+
SELECT
135+
fog.feed_id,
136+
string_agg(DISTINCT gp.name, ' ') AS osm_location_names
137+
FROM FeedOsmLocationGroup fog
138+
JOIN OsmLocationGroup olg ON olg.group_id = fog.group_id
139+
JOIN OsmLocationGroupGeopolygon olgg ON olgg.group_id = olg.group_id
140+
JOIN Geopolygon gp ON gp.osm_id = olgg.osm_id
141+
WHERE gp.name IS NOT NULL
142+
GROUP BY fog.feed_id
143+
) AS OsmLocationNamesJoin ON OsmLocationNamesJoin.feed_id = Feed.id;
144+
145+
146+
-- This index allows concurrent refresh on the materialized view avoiding table locks
147+
CREATE UNIQUE INDEX idx_unique_feed_id ON FeedSearch(feed_id);
148+
149+
-- Indices for feedsearch view optimization
150+
CREATE INDEX feedsearch_document_idx ON FeedSearch USING GIN(document);
151+
CREATE INDEX feedsearch_feed_stable_id ON FeedSearch(feed_stable_id);
152+
CREATE INDEX feedsearch_data_type ON FeedSearch(data_type);
153+
CREATE INDEX feedsearch_status ON FeedSearch(status);
154+
155+
DROP VIEW IF EXISTS location_with_translations_en;
156+
DROP TABLE IF EXISTS translation;

liquibase/changes/feat_1083_2.sql

Lines changed: 157 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,157 @@
1+
-- Updating the FeedSearch materialized view to include location extraction details
2+
-- 1. Added osm_locations as a column which is a json list of OsmlLocationGroup names and their locations
3+
-- 2. Added the names of the locations to the document for full-text search
4+
DROP MATERIALIZED VIEW IF EXISTS FeedSearch;
5+
CREATE MATERIALIZED VIEW FeedSearch AS
6+
SELECT
7+
-- feed
8+
Feed.stable_id AS feed_stable_id,
9+
Feed.id AS feed_id,
10+
Feed.data_type,
11+
Feed.status,
12+
Feed.feed_name,
13+
Feed.note,
14+
Feed.feed_contact_email,
15+
-- source
16+
Feed.producer_url,
17+
Feed.authentication_info_url,
18+
Feed.authentication_type,
19+
Feed.api_key_parameter_name,
20+
Feed.license_url,
21+
Feed.provider,
22+
Feed.operational_status,
23+
-- official status
24+
Feed.official AS official,
25+
-- latest_dataset
26+
Latest_dataset.id AS latest_dataset_id,
27+
Latest_dataset.hosted_url AS latest_dataset_hosted_url,
28+
Latest_dataset.downloaded_at AS latest_dataset_downloaded_at,
29+
Latest_dataset.bounding_box AS latest_dataset_bounding_box,
30+
Latest_dataset.hash AS latest_dataset_hash,
31+
Latest_dataset.agency_timezone AS latest_dataset_agency_timezone,
32+
Latest_dataset.service_date_range_start AS latest_dataset_service_date_range_start,
33+
Latest_dataset.service_date_range_end AS latest_dataset_service_date_range_end,
34+
-- external_ids
35+
ExternalIdJoin.external_ids,
36+
-- redirect_ids
37+
RedirectingIdJoin.redirect_ids,
38+
-- feed gtfs_rt references
39+
FeedReferenceJoin.feed_reference_ids,
40+
-- feed gtfs_rt entities
41+
EntityTypeFeedJoin.entities,
42+
-- locations
43+
FeedLocationJoin.locations,
44+
-- osm locations grouped
45+
OsmLocationJoin.osm_locations,
46+
47+
-- full-text searchable document
48+
setweight(to_tsvector('english', coalesce(unaccent(Feed.feed_name), '')), 'C') ||
49+
setweight(to_tsvector('english', coalesce(unaccent(Feed.provider), '')), 'C') ||
50+
setweight(to_tsvector('english', coalesce(unaccent((
51+
SELECT string_agg(
52+
coalesce(location->>'country_code', '') || ' ' ||
53+
coalesce(location->>'country', '') || ' ' ||
54+
coalesce(location->>'subdivision_name', '') || ' ' ||
55+
coalesce(location->>'municipality', ''),
56+
' '
57+
)
58+
FROM json_array_elements(FeedLocationJoin.locations) AS location
59+
)), '')), 'A') ||
60+
setweight(to_tsvector('english', coalesce(unaccent(OsmLocationNamesJoin.osm_location_names), '')), 'A')
61+
AS document
62+
FROM Feed
63+
LEFT JOIN (
64+
SELECT *
65+
FROM gtfsdataset
66+
WHERE latest = true
67+
) AS Latest_dataset ON Latest_dataset.feed_id = Feed.id AND Feed.data_type = 'gtfs'
68+
LEFT JOIN (
69+
SELECT
70+
feed_id,
71+
json_agg(json_build_object('external_id', associated_id, 'source', source)) AS external_ids
72+
FROM externalid
73+
GROUP BY feed_id
74+
) AS ExternalIdJoin ON ExternalIdJoin.feed_id = Feed.id
75+
LEFT JOIN (
76+
SELECT
77+
gtfs_rt_feed_id,
78+
array_agg(FeedReferenceJoinInnerQuery.stable_id) AS feed_reference_ids
79+
FROM FeedReference
80+
LEFT JOIN Feed AS FeedReferenceJoinInnerQuery ON FeedReferenceJoinInnerQuery.id = FeedReference.gtfs_feed_id
81+
GROUP BY gtfs_rt_feed_id
82+
) AS FeedReferenceJoin ON FeedReferenceJoin.gtfs_rt_feed_id = Feed.id AND Feed.data_type = 'gtfs_rt'
83+
LEFT JOIN (
84+
SELECT
85+
target_id,
86+
json_agg(json_build_object('target_id', target_id, 'comment', redirect_comment)) AS redirect_ids
87+
FROM RedirectingId
88+
GROUP BY target_id
89+
) AS RedirectingIdJoin ON RedirectingIdJoin.target_id = Feed.id
90+
LEFT JOIN (
91+
SELECT
92+
LocationFeed.feed_id,
93+
json_agg(json_build_object('country', country, 'country_code', country_code, 'subdivision_name',
94+
subdivision_name, 'municipality', municipality)) AS locations
95+
FROM Location
96+
LEFT JOIN LocationFeed ON LocationFeed.location_id = Location.id
97+
GROUP BY LocationFeed.feed_id
98+
) AS FeedLocationJoin ON FeedLocationJoin.feed_id = Feed.id
99+
LEFT JOIN (
100+
SELECT
101+
feed_id,
102+
array_agg(entity_name) AS entities
103+
FROM EntityTypeFeed
104+
GROUP BY feed_id
105+
) AS EntityTypeFeedJoin ON EntityTypeFeedJoin.feed_id = Feed.id AND Feed.data_type = 'gtfs_rt'
106+
LEFT JOIN (
107+
WITH locations_per_group AS (
108+
SELECT
109+
fog.feed_id,
110+
olg.group_name,
111+
jsonb_agg(
112+
DISTINCT jsonb_build_object(
113+
'admin_level', gp.admin_level,
114+
'name', gp.name
115+
)
116+
) AS locations
117+
FROM FeedOsmLocationGroup fog
118+
JOIN OsmLocationGroup olg ON olg.group_id = fog.group_id
119+
JOIN OsmLocationGroupGeopolygon olgg ON olgg.group_id = olg.group_id
120+
JOIN Geopolygon gp ON gp.osm_id = olgg.osm_id
121+
GROUP BY fog.feed_id, olg.group_name
122+
)
123+
SELECT
124+
feed_id,
125+
jsonb_agg(
126+
jsonb_build_object(
127+
'group_name', group_name,
128+
'locations', locations
129+
)
130+
)::json AS osm_locations
131+
FROM locations_per_group
132+
GROUP BY feed_id
133+
) AS OsmLocationJoin ON OsmLocationJoin.feed_id = Feed.id
134+
LEFT JOIN (
135+
SELECT
136+
fog.feed_id,
137+
string_agg(DISTINCT gp.name, ' ') AS osm_location_names
138+
FROM FeedOsmLocationGroup fog
139+
JOIN OsmLocationGroup olg ON olg.group_id = fog.group_id
140+
JOIN OsmLocationGroupGeopolygon olgg ON olgg.group_id = olg.group_id
141+
JOIN Geopolygon gp ON gp.osm_id = olgg.osm_id
142+
WHERE gp.name IS NOT NULL
143+
GROUP BY fog.feed_id
144+
) AS OsmLocationNamesJoin ON OsmLocationNamesJoin.feed_id = Feed.id;
145+
146+
147+
-- This index allows concurrent refresh on the materialized view avoiding table locks
148+
CREATE UNIQUE INDEX idx_unique_feed_id ON FeedSearch(feed_id);
149+
150+
-- Indices for feedsearch view optimization
151+
CREATE INDEX feedsearch_document_idx ON FeedSearch USING GIN(document);
152+
CREATE INDEX feedsearch_feed_stable_id ON FeedSearch(feed_stable_id);
153+
CREATE INDEX feedsearch_data_type ON FeedSearch(data_type);
154+
CREATE INDEX feedsearch_status ON FeedSearch(status);
155+
156+
DROP VIEW IF EXISTS location_with_translations_en;
157+
DROP TABLE IF EXISTS translation;

0 commit comments

Comments
 (0)