elementary-data · haritamar · Feb 28, 2026 · Feb 28, 2026 · Feb 28, 2026 · Feb 28, 2026
diff --git a/.github/workflows/cleanup-stale-schemas.yml b/.github/workflows/cleanup-stale-schemas.yml
@@ -0,0 +1,71 @@
+name: Cleanup stale CI schemas
+
+on:
+  schedule:
+    # Every Sunday at 03:00 UTC
+    - cron: "0 3 * * 0"
+  workflow_dispatch:
+    inputs:
+      max-age-hours:
+        type: string
+        required: false
+        default: "24"
+        description: Drop schemas older than this many hours
+
+env:
+  TESTS_DIR: ${{ github.workspace }}/dbt-data-reliability/integration_tests
+
+jobs:
+  cleanup:
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        warehouse-type:
+          - snowflake
+          - bigquery
+          - redshift
+          - databricks_catalog
+          - athena
+    steps:
+      - name: Checkout dbt package
+        uses: actions/checkout@v4
+        with:
+          path: dbt-data-reliability
+
+      - name: Setup Python
+        uses: actions/setup-python@v6
+        with:
+          python-version: "3.10"
+          cache: "pip"
+
+      - name: Install dbt
+        run: >
+          pip install
+          "dbt-core"
+          "dbt-${{ (matrix.warehouse-type == 'databricks_catalog' && 'databricks') || (matrix.warehouse-type == 'athena' && 'athena-community') || matrix.warehouse-type }}"
+
+      - name: Write dbt profiles
+        env:
+          CI_WAREHOUSE_SECRETS: ${{ secrets.CI_WAREHOUSE_SECRETS || '' }}
+        run: |
+          # The cleanup job doesn't create schemas, but generate_profiles.py
+          # requires --schema-name.  Use a dummy value.
+          python "${{ github.workspace }}/dbt-data-reliability/integration_tests/profiles/generate_profiles.py" \
+            --template "${{ github.workspace }}/dbt-data-reliability/integration_tests/profiles/profiles.yml.j2" \
+            --output ~/.dbt/profiles.yml \
+            --schema-name "cleanup_placeholder"
+
+      - name: Install dbt deps
+        working-directory: ${{ env.TESTS_DIR }}/dbt_project
+        run: dbt deps
+
+      - name: Symlink local elementary package
+        run: ln -sfn ${{ github.workspace }}/dbt-data-reliability ${{ env.TESTS_DIR }}/dbt_project/dbt_packages/elementary
+
+      - name: Drop stale CI schemas
+        working-directory: ${{ env.TESTS_DIR }}/dbt_project
+        run: >
+          dbt run-operation elementary.drop_stale_ci_schemas
+          --args '{prefixes: ["dbt_"], max_age_hours: ${{ inputs.max-age-hours || '24' }}}'
+          -t "${{ matrix.warehouse-type }}"
diff --git a/integration_tests/dbt_project/macros/test_drop_stale_ci_schemas.sql b/integration_tests/dbt_project/macros/test_drop_stale_ci_schemas.sql
@@ -0,0 +1,73 @@
+{#
+  Integration-test helper for elementary.drop_stale_ci_schemas.
+
+  Creates two CI-style schemas (one with an old timestamp, one recent),
+  runs the cleanup macro, checks which schemas survived, cleans up,
+  and returns a JSON result dict.
+#}
+
+{% macro test_drop_stale_ci_schemas() %}
+  {% set database = elementary.target_database() %}
+  {% set now = modules.datetime.datetime.utcnow() %}
-  {% set now = modules.datetime.datetime.utcnow() %}
+  {% set now = modules.datetime.datetime.now(modules.datetime.timezone.utc) %}
-  {% set now = modules.datetime.datetime.utcnow() %}
+  {% set now = modules.datetime.datetime.now(modules.datetime.timezone.utc) %}
+
+  {# Old schema: timestamp in the past (2020-01-01 00:00:00) #}
+  {% set old_schema = 'dbt_200101_000000_citest_00000000' %}
+  {# Recent schema: timestamp = now #}
+  {% set recent_ts = now.strftime('%y%m%d_%H%M%S') %}
+  {% set recent_schema = 'dbt_' ~ recent_ts ~ '_citest_11111111' %}
+
+  {{ log("TEST: creating old schema: " ~ old_schema, info=true) }}
+  {{ log("TEST: creating recent schema: " ~ recent_schema, info=true) }}
+
+  {# ── Create both schemas ───────────────────────────────────────────── #}
+  {% do elementary_tests.edr_create_schema(database, old_schema) %}
+  {% do elementary_tests.edr_create_schema(database, recent_schema) %}
+
+  {# ── Verify both exist before running cleanup ──────────────────────── #}
+  {% set old_exists_before = adapter.check_schema_exists(database, old_schema) %}
+  {% set recent_exists_before = adapter.check_schema_exists(database, recent_schema) %}
+  {{ log("TEST: old_exists_before=" ~ old_exists_before ~ ", recent_exists_before=" ~ recent_exists_before, info=true) }}
+
+  {# ── Run cleanup with 1-hour threshold ─────────────────────────────── #}
+  {% do elementary.drop_stale_ci_schemas(prefixes=['dbt_'], max_age_hours=1) %}
+
+  {# ── Check which schemas survived ─────────────────────────────────── #}
+  {% set old_exists_after = adapter.check_schema_exists(database, old_schema) %}
+  {% set recent_exists_after = adapter.check_schema_exists(database, recent_schema) %}
+  {{ log("TEST: old_exists_after=" ~ old_exists_after ~ ", recent_exists_after=" ~ recent_exists_after, info=true) }}
+
+  {# ── Cleanup: drop any remaining test schemas ─────────────────────── #}
+  {% if old_exists_after %}
+    {% do elementary.drop_ci_schema(database, old_schema) %}
+  {% endif %}
+  {% if recent_exists_after %}
+    {% do elementary.drop_ci_schema(database, recent_schema) %}
+  {% endif %}
+
+  {# ── Return results ────────────────────────────────────────────────── #}
+  {% set results = {
+    "old_exists_before": old_exists_before,
+    "recent_exists_before": recent_exists_before,
+    "old_dropped": not old_exists_after,
+    "recent_kept": recent_exists_after
+  } %}
+  {% do return(tojson(results)) %}
+{% endmacro %}
+
+
+{# ── Per-adapter schema creation ─────────────────────────────────────── #}
+
+{% macro edr_create_schema(database, schema_name) %}
+  {% do return(adapter.dispatch('edr_create_schema', 'elementary_tests')(database, schema_name)) %}
+{% endmacro %}
+
+{% macro default__edr_create_schema(database, schema_name) %}
+  {% set schema_relation = api.Relation.create(database=database, schema=schema_name) %}
+  {% do dbt.create_schema(schema_relation) %}
+  {% do adapter.commit() %}
+{% endmacro %}
+
+{% macro clickhouse__edr_create_schema(database, schema_name) %}
+  {% do run_query("CREATE DATABASE IF NOT EXISTS " ~ schema_name) %}
+  {% do adapter.commit() %}
+{% endmacro %}
diff --git a/integration_tests/tests/test_ci_cleanup.py b/integration_tests/tests/test_ci_cleanup.py
@@ -0,0 +1,23 @@
+"""Integration test for elementary.drop_stale_ci_schemas macro."""
+
+import json
+
+import pytest
+from dbt_project import DbtProject
+
+
+# Dremio does not support DROP SCHEMA reliably; skip it.
+# ClickHouse uses a custom dispatch (CREATE/DROP DATABASE) which is tested implicitly.
+@pytest.mark.skip_targets(["dremio"])
+def test_drop_stale_ci_schemas(dbt_project: DbtProject):
+    """Verify that old CI schemas are dropped and recent ones are kept."""
+    result = dbt_project.dbt_runner.run_operation(
+        "elementary_tests.test_drop_stale_ci_schemas",
+    )
+    assert result, "run_operation returned no output"
+    data = json.loads(result[0])
+
+    assert data["old_exists_before"], "Setup failed: old schema was not created"
+    assert data["recent_exists_before"], "Setup failed: recent schema was not created"
+    assert data["old_dropped"], "Old schema should have been dropped by cleanup"
+    assert data["recent_kept"], "Recent schema should have been kept by cleanup"
diff --git a/macros/utils/ci_cleanup/drop_stale_ci_schemas.sql b/macros/utils/ci_cleanup/drop_stale_ci_schemas.sql
@@ -0,0 +1,80 @@
+{#
+  drop_stale_ci_schemas – clean up timestamped CI schemas.
+
+  Schema naming convention produced by CI:
+      <prefix><YYMMDD_HHMMSS>_<branch>_<hash>
+  Examples:
+      dbt_260228_112345_master_abcd1234
+      py_260228_112345_master_abcd1234
+      dbt_260228_112345_master_abcd1234_elementary   (suffixed variant)
+
+  Call from a GitHub Actions workflow via:
+      dbt run-operation elementary.drop_stale_ci_schemas \
+          --args '{prefixes: ["dbt_", "py_"], max_age_hours: 24}'
+#}
+
+{% macro drop_stale_ci_schemas(prefixes=None, max_age_hours=24) %}
+  {% if prefixes is none %}
+    {% set prefixes = ['dbt_', 'py_'] %}
+  {% endif %}
+
+  {% set database = elementary.target_database() %}
+  {% set all_schemas = adapter.list_schemas(database) %}
+  {% set now = modules.datetime.datetime.utcnow() %}
+  {% set max_age_seconds = max_age_hours * 3600 %}
+  {% set ns = namespace(dropped=0) %}
+
+  {{ log("CI schema cleanup: scanning " ~ all_schemas | length ~ " schema(s) in database '" ~ database ~ "' for prefixes " ~ prefixes | string, info=true) }}
+
+  {% for schema_name in all_schemas | sort %}
+    {% set schema_lower = schema_name.lower() %}
+    {% for prefix in prefixes %}
+      {% if schema_lower.startswith(prefix.lower()) %}
+        {% set remainder = schema_lower[prefix | length :] %}
+        {# Timestamp format: YYMMDD_HHMMSS (13 chars) followed by _ #}
+        {% if remainder | length >= 14 and remainder[6:7] == '_' and remainder[13:14] == '_' %}
+          {% set ts_str = remainder[:13] %}
+          {# Validate: positions 0-5 and 7-12 must be digits #}
+          {% set digits = ts_str[:6] ~ ts_str[7:] %}
+          {% set ns_valid = namespace(ok=true) %}
+          {% for c in digits %}
+            {% if c not in '0123456789' %}
+              {% set ns_valid.ok = false %}
+            {% endif %}
+          {% endfor %}
+          {% if ns_valid.ok %}
+            {% set schema_ts = modules.datetime.datetime.strptime(ts_str, '%y%m%d_%H%M%S') %}
+            {% set age_seconds = (now - schema_ts).total_seconds() %}
+            {% if age_seconds > max_age_seconds %}
+              {{ log("  DROP " ~ schema_name ~ "  (age: " ~ (age_seconds / 3600) | round(1) ~ " h)", info=true) }}
+              {% do elementary.drop_ci_schema(database, schema_name) %}
+              {% set ns.dropped = ns.dropped + 1 %}
+            {% else %}
+              {{ log("  keep " ~ schema_name ~ "  (age: " ~ (age_seconds / 3600) | round(1) ~ " h)", info=true) }}
+            {% endif %}
+          {% endif %}
+        {% endif %}
+      {% endif %}
+    {% endfor %}
+  {% endfor %}
+
+  {{ log("CI schema cleanup complete. Dropped " ~ ns.dropped ~ " stale schema(s).", info=true) }}
+{% endmacro %}
+
+
+{# ── Per-adapter schema drop ─────────────────────────────────────────── #}
+
+{% macro drop_ci_schema(database, schema_name) %}
+  {% do return(adapter.dispatch('drop_ci_schema', 'elementary')(database, schema_name)) %}
+{% endmacro %}
+
+{% macro default__drop_ci_schema(database, schema_name) %}
+  {% set schema_relation = api.Relation.create(database=database, schema=schema_name) %}
+  {% do dbt.drop_schema(schema_relation) %}
+  {% do adapter.commit() %}
+{% endmacro %}
+
+{% macro clickhouse__drop_ci_schema(database, schema_name) %}
+  {% do run_query("DROP DATABASE IF EXISTS " ~ schema_name) %}
+  {% do adapter.commit() %}
-{% macro clickhouse__drop_ci_schema(database, schema_name) %}
-  {% do run_query("DROP DATABASE IF EXISTS " ~ schema_name) %}
-  {% do adapter.commit() %}
+{% macro clickhouse__drop_ci_schema(database, schema_name) %}
+  {% do run_query("DROP DATABASE IF EXISTS " ~ adapter.quote(schema_name)) %}
+  {% do adapter.commit() %}
+{% endmacro %}
-{% macro clickhouse__drop_ci_schema(database, schema_name) %}
-  {% do run_query("DROP DATABASE IF EXISTS " ~ schema_name) %}
-  {% do adapter.commit() %}
+{% macro clickhouse__drop_ci_schema(database, schema_name) %}
+  {% do run_query("DROP DATABASE IF EXISTS " ~ adapter.quote(schema_name)) %}
+  {% do adapter.commit() %}
+{% endmacro %}
+{% endmacro %}