MobilityData
diff --git a/‎api/src/scripts/gbfs_utils/comparison.py‎
Lines changed: 10 additions & 6 deletions b/‎api/src/scripts/gbfs_utils/comparison.py‎
Lines changed: 10 additions & 6 deletions
diff --git a/‎api/src/scripts/populate_db_gbfs.py‎
Lines changed: 9 additions & 3 deletions b/‎api/src/scripts/populate_db_gbfs.py‎
Lines changed: 9 additions & 3 deletions
diff --git a/‎api/src/shared/database/database.py‎
Lines changed: 34 additions & 5 deletions b/‎api/src/shared/database/database.py‎
Lines changed: 34 additions & 5 deletions
diff --git a/‎api/tests/integration/cascade_delete/conftest.py‎
Lines changed: 52 additions & 0 deletions b/‎api/tests/integration/cascade_delete/conftest.py‎
Lines changed: 52 additions & 0 deletions
@@ -5,9 +5,7 @@
 
 def generate_system_csv_from_db(df, db_session):
     """Generate a DataFrame from the database with the same columns as the CSV file."""
-    stable_ids = "gbfs-" + df["System ID"]
     query = db_session.query(Gbfsfeed)
-    query = query.filter(Gbfsfeed.stable_id.in_(stable_ids.to_list()))
     query = query.options(
         joinedload(Gbfsfeed.locations), joinedload(Gbfsfeed.gbfsversions), joinedload(Gbfsfeed.externalids)
     )
@@ -49,8 +47,9 @@ def compare_db_to_csv(df_from_db, df_from_csv, logger):
         return None, None
 
     # Align both DataFrames by "System ID"
-    df_from_db.set_index("System ID", inplace=True)
-    df_from_csv.set_index("System ID", inplace=True)
+    # Keep the System ID column because it's used later in the code
+    df_from_db.set_index("System ID", inplace=True, drop=False)
+    df_from_csv.set_index("System ID", inplace=True, drop=False)
 
     # Find rows that are in the CSV but not in the DB (new feeds)
     missing_in_db = df_from_csv[~df_from_csv.index.isin(df_from_db.index)]
@@ -68,7 +67,11 @@ def compare_db_to_csv(df_from_db, df_from_csv, logger):
     common_ids = df_from_db.index.intersection(df_from_csv.index)
     df_db_common = df_from_db.loc[common_ids]
     df_csv_common = df_from_csv.loc[common_ids]
-    differences = df_db_common != df_csv_common
+
+    # Exclude 'Location' from comparison because the DB values might have been changed in the
+    # python function that calculates the location.
+    columns_to_compare = [col for col in df_db_common.columns if col != "Location"]
+    differences = df_db_common[columns_to_compare] != df_csv_common[columns_to_compare]
     differing_rows = df_csv_common[differences.any(axis=1)]
 
     if not differing_rows.empty:
@@ -83,6 +86,7 @@ def compare_db_to_csv(df_from_db, df_from_csv, logger):
             logger.info(80 * "-")
 
     # Merge differing rows with missing_in_db to capture all new or updated feeds
-    all_differing_or_new_rows = pd.concat([differing_rows, missing_in_db]).reset_index()
+    # Drop the index because we have it as the System ID column.
+    all_differing_or_new_rows = pd.concat([differing_rows, missing_in_db]).reset_index(drop=True)
 
     return all_differing_or_new_rows, missing_in_csv
@@ -36,14 +36,20 @@ def deprecate_feeds(self, deprecated_feeds):
             self.logger.info("No feeds to deprecate.")
             return
 
-        self.logger.info(f"Deprecating {len(deprecated_feeds)} feed(s).")
+        self.logger.info(f"Deleting {len(deprecated_feeds)} feed(s).")
         with self.db.start_db_session() as session:
             for index, row in deprecated_feeds.iterrows():
                 stable_id = self.get_stable_id(row)
                 gbfs_feed = self.query_feed_by_stable_id(session, stable_id, "gbfs")
                 if gbfs_feed:
-                    self.logger.info(f"Deprecating feed with stable_id={stable_id}")
-                    gbfs_feed.status = "deprecated"
+                    # A note about the deletion done here:
+                    # Some other tables have a foreign key pointing to the feed, and these cannot be null
+                    # (e.g. gbfsversion). So the delete will fail, unless we cascade the deletion of the
+                    # gbfs_feed to the deletion of the entry in gbfsversion, which is done in the DB
+                    # schema. It's also the case for other tables and other foreign keys.
+                    self.logger.info(f"Deleting feed with stable_id={stable_id}")
+                    session.delete(gbfs_feed)
+                    session.flush()
 
     def populate_db(self, session, fetch_url=True):
         """Populate the database with the GBFS feeds"""
 
@@ -15,6 +15,8 @@
     Gbfsversion,
     Gbfsfeed,
     Gbfsvalidationreport,
+    Osmlocationgroup,
+    Validationreport,
 )
 from sqlalchemy.orm import sessionmaker
 import logging
@@ -52,12 +54,39 @@ def configure_polymorphic_mappers():
     gbfsfeed_mapper.polymorphic_identity = Gbfsfeed.__tablename__.lower()
 
 
+# The `cascade_entities` dictionary maps SQLAlchemy models to lists of their relationship attributes
+# that should have cascading delete-orphan behavior. When a parent entity (such as `Feed`, `Gbfsfeed`, etc.)
+# is deleted, any related child entities listed here will also be deleted if they become orphans.
+# The `set_cascade` function applies this configuration by setting the `cascade` property to "all, delete-orphan"
+# and enabling `passive_deletes` for each specified relationship. This leverages the database's ON DELETE CASCADE
+# constraints and ensures that related records are cleaned up automatically when a parent is removed.
 cascade_entities = {
-    Gtfsfeed: [Gtfsfeed.redirectingids, Gtfsfeed.redirectingids_, Gtfsfeed.externalids],
-    Gbfsversion: [Gbfsversion.gbfsendpoints, Gbfsversion.gbfsvalidationreports],
-    Gbfsfeed: [Gbfsfeed.gbfsversions],
-    Gbfsvalidationreport: [Gbfsvalidationreport.gbfsnotices],
-    Feed: [Feed.feedosmlocationgroups],
+    Feed: [
+        Feed.externalids,  # externalid_feed_id_fkey
+        Feed.feedlocationgrouppoints,
+        Feed.feedosmlocationgroups,  # feedosmlocation_feed_id_fkey
+        Feed.gtfsdatasets,  # gtfsdataset_feed_id_fkey
+        Feed.officialstatushistories,  # officialstatushistory_feed_id_fkey
+        Feed.redirectingids,  # redirectingid_source_id_fkey
+        Feed.redirectingids_,  # redirectingid_target_id_fkey
+    ],
+    Gbfsfeed: [
+        Gbfsfeed.gbfsversions,  # gbfsversion_feed_id_fkey
+    ],
+    Gbfsvalidationreport: [
+        Gbfsvalidationreport.gbfsnotices,  # gbfsnotice_validation_report_id_fkey
+    ],
+    Gbfsversion: [
+        Gbfsversion.gbfsendpoints,  # gbfsendpoint_gbfs_version_id_fkey
+        Gbfsversion.gbfsvalidationreports,  # gbfsvalidationreport_gbfs_version_id_fkey
+    ],
+    Osmlocationgroup: [
+        Osmlocationgroup.feedlocationgrouppoints,
+        Osmlocationgroup.feedosmlocationgroups,  # feedosmlocation_group_id_fkey
+    ],
+    Validationreport: [
+        Validationreport.notices,  # notice_validation_report_id_fkey
+    ],
 }
 
 
 
@@ -0,0 +1,52 @@
+import os
+
+import pytest
+from fastapi import FastAPI
+from fastapi.testclient import TestClient
+from sqlalchemy import text
+
+from shared.database.database import Database
+from main import app as application
+from tests.test_utils.database import populate_database
+
+
+@pytest.fixture(scope="package")
+def app() -> FastAPI:
+    application.dependency_overrides = {}
+    return application
+
+
+@pytest.fixture(scope="package")
+def test_database():
+    # Restrict the tests to the test database
+    os.environ["FEEDS_DATABASE_URL"] = "postgresql://postgres:postgres@localhost:54320/MobilityDatabaseTest"
+
+    data_dirs = []
+    second_phase_data_dirs = []
+    with populate_database(Database(), data_dirs, second_phase_data_dirs) as db:
+        yield db
+
+
+@pytest.fixture(scope="package")
+def client(app, test_database) -> TestClient:
+    return TestClient(app)
+
+
+# We want to delete all data from the database after each test so we don't have to coordinate the DB ids between tests.
+@pytest.fixture(autouse=True)
+def clean_database(test_database, request):
+    yield
+    # Check if the test passed
+    if request.node.rep_call.outcome == "passed":
+        with test_database.start_db_session() as session:
+            for table in session.execute(text("SELECT tablename FROM pg_tables WHERE schemaname = 'public'")):
+                session.execute(text(f"TRUNCATE {table[0]} CASCADE"))
+            session.commit()
+
+
+@pytest.hookimpl(tryfirst=True, hookwrapper=True)
+def pytest_runtest_makereport(item, call):
+    # Attach test result to the request object
+    outcome = yield
+    report = outcome.get_result()
+    setattr(item, f"rep_{call.when}", report)