55
66def generate_system_csv_from_db (df , db_session ):
77 """Generate a DataFrame from the database with the same columns as the CSV file."""
8- stable_ids = "gbfs-" + df ["System ID" ]
98 query = db_session .query (Gbfsfeed )
10- query = query .filter (Gbfsfeed .stable_id .in_ (stable_ids .to_list ()))
119 query = query .options (
1210 joinedload (Gbfsfeed .locations ), joinedload (Gbfsfeed .gbfsversions ), joinedload (Gbfsfeed .externalids )
1311 )
@@ -49,8 +47,9 @@ def compare_db_to_csv(df_from_db, df_from_csv, logger):
4947 return None , None
5048
5149 # Align both DataFrames by "System ID"
52- df_from_db .set_index ("System ID" , inplace = True )
53- df_from_csv .set_index ("System ID" , inplace = True )
50+ # Keep the System ID column because it's used later in the code
51+ df_from_db .set_index ("System ID" , inplace = True , drop = False )
52+ df_from_csv .set_index ("System ID" , inplace = True , drop = False )
5453
5554 # Find rows that are in the CSV but not in the DB (new feeds)
5655 missing_in_db = df_from_csv [~ df_from_csv .index .isin (df_from_db .index )]
@@ -68,7 +67,11 @@ def compare_db_to_csv(df_from_db, df_from_csv, logger):
6867 common_ids = df_from_db .index .intersection (df_from_csv .index )
6968 df_db_common = df_from_db .loc [common_ids ]
7069 df_csv_common = df_from_csv .loc [common_ids ]
71- differences = df_db_common != df_csv_common
70+
71+ # Exclude 'Location' from comparison because the DB values might have been changed in the
72+ # python function that calculates the location.
73+ columns_to_compare = [col for col in df_db_common .columns if col != "Location" ]
74+ differences = df_db_common [columns_to_compare ] != df_csv_common [columns_to_compare ]
7275 differing_rows = df_csv_common [differences .any (axis = 1 )]
7376
7477 if not differing_rows .empty :
@@ -83,6 +86,7 @@ def compare_db_to_csv(df_from_db, df_from_csv, logger):
8386 logger .info (80 * "-" )
8487
8588 # Merge differing rows with missing_in_db to capture all new or updated feeds
86- all_differing_or_new_rows = pd .concat ([differing_rows , missing_in_db ]).reset_index ()
89+ # Drop the index because we have it as the System ID column.
90+ all_differing_or_new_rows = pd .concat ([differing_rows , missing_in_db ]).reset_index (drop = True )
8791
8892 return all_differing_or_new_rows , missing_in_csv
0 commit comments