1+ import json
2+ import os
3+ import uuid
4+ from concurrent import futures
15from datetime import datetime
6+ from typing import List , Dict
27
38import pandas as pd
49import pycountry
510import pytz
11+ from google .cloud import pubsub_v1
612
713from scripts .gbfs_utils .comparison import generate_system_csv_from_db , compare_db_to_csv
814from scripts .gbfs_utils .fetching import fetch_data , get_data_content
1117from shared .database .database import generate_unique_id , configure_polymorphic_mappers
1218from shared .database_gen .sqlacodegen_models import Gbfsfeed , Location , Externalid
1319
20+ GBFS_PUBSUB_TOPIC_NAME = "validate-gbfs-feed"
21+
1422
1523class GBFSDatabasePopulateHelper (DatabasePopulateHelper ):
1624 def __init__ (self , filepaths , test_mode = False ):
1725 super ().__init__ (filepaths , test_mode )
26+ self .added_feeds : List [Dict ] = []
1827
1928 def filter_data (self ):
2029 """Filter out rows with Authentication Info and duplicate System IDs"""
@@ -73,6 +82,7 @@ def populate_db(self, session, fetch_url=True):
7382 else :
7483 fetched_data = dict ()
7584 # If the feed already exists, update it. Otherwise, create a new feed.
85+ is_new_feed = gbfs_feed is None
7686 if gbfs_feed :
7787 self .logger .info (f"Updating feed { stable_id } - { row ['Name' ]} " )
7888 else :
@@ -116,6 +126,14 @@ def populate_db(self, session, fetch_url=True):
116126 gbfs_feed .locations = [location ]
117127
118128 session .flush ()
129+ if is_new_feed :
130+ self .added_feeds .append (
131+ {
132+ "feed_id" : gbfs_feed .id ,
133+ "stable_id" : gbfs_feed .stable_id ,
134+ "url" : gbfs_feed .auto_discovery_url ,
135+ }
136+ )
119137 self .logger .info (80 * "-" )
120138
121139 # self.db.session.commit()
@@ -125,6 +143,42 @@ def populate_db(self, session, fetch_url=True):
125143 self .logger .error (f"Error populating the database: { e } " )
126144 raise e
127145
146+ def trigger_downstream_tasks (self ):
147+ """Trigger GBFS version extraction, validation, and geolocation for newly added feeds."""
148+ self .logger .info (
149+ f"Triggering downstream tasks for { len (self .added_feeds )} newly added feed(s): "
150+ f"{ ', ' .join (f ['stable_id' ] for f in self .added_feeds )} "
151+ )
152+ if os .getenv ("ENV" , "local" ) == "local" :
153+ self .logger .info ("Skipping downstream tasks in local environment." )
154+ return
155+ if not self .added_feeds :
156+ self .logger .info ("No feeds to trigger downstream tasks for." )
157+ return
158+
159+ env = os .getenv ("ENV" , "dev" )
160+ project_id = f"mobility-feeds-{ env } "
161+ publisher = pubsub_v1 .PublisherClient ()
162+ topic_path = publisher .topic_path (project_id , GBFS_PUBSUB_TOPIC_NAME )
163+ execution_id = str (uuid .uuid4 ())
164+
165+ publish_futures = []
166+ for feed_data in self .added_feeds :
167+ message = {
168+ "execution_id" : execution_id ,
169+ "stable_id" : feed_data ["stable_id" ],
170+ "feed_id" : feed_data ["feed_id" ],
171+ "url" : feed_data ["url" ],
172+ "extract_geolocation" : True ,
173+ }
174+ data = json .dumps (message ).encode ("utf-8" )
175+ self .logger .info (f"Publishing feed { feed_data ['stable_id' ]} to { topic_path } " )
176+ future = publisher .publish (topic_path , data )
177+ publish_futures .append (future )
178+
179+ futures .wait (publish_futures , return_when = futures .ALL_COMPLETED )
180+ self .logger .info (f"Published { len (self .added_feeds )} feed(s) to { topic_path } ." )
181+
128182
129183if __name__ == "__main__" :
130- GBFSDatabasePopulateHelper (set_up_configs ()).initialize (trigger_downstream_tasks = False )
184+ GBFSDatabasePopulateHelper (set_up_configs ()).initialize (trigger_downstream_tasks = True )
0 commit comments