From 41c5055530b317c45066264a3fff75e4c3f94832 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Fri, 27 Feb 2026 19:15:47 +0000 Subject: [PATCH 1/5] fix: use py___ schema naming to prevent CI collisions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the old truncation-based schema naming with a hash-based approach that prevents cross-branch collisions when concurrent CI jobs share the same warehouse. Uses py_ prefix to identify the Python package CI (matching dbt_ prefix in dbt-data-reliability). Format: py___<8-char-hash> The hash is derived from the concurrency group key. Co-Authored-By: Itamar Hartstein --- .github/workflows/test-warehouse.yml | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/.github/workflows/test-warehouse.yml b/.github/workflows/test-warehouse.yml index fe52d9c7f..b5373e210 100644 --- a/.github/workflows/test-warehouse.yml +++ b/.github/workflows/test-warehouse.yml @@ -65,7 +65,8 @@ jobs: run: working-directory: elementary concurrency: - # This is what eventually defines the schema name in the data platform. + # Serialises runs for the same warehouse × dbt-version × branch. + # The schema name is derived from a hash of this group (see "Write dbt profiles"). group: tests_${{ inputs.warehouse-type }}_dbt_${{ inputs.dbt-version }}_${{ github.head_ref || github.ref_name }} cancel-in-progress: true steps: @@ -116,13 +117,25 @@ jobs: env: CI_WAREHOUSE_SECRETS: ${{ secrets.CI_WAREHOUSE_SECRETS || '' }} run: | - DBT_VERSION=$(pip show dbt-core | grep -i version | awk '{print $2}' | sed 's/\.//g') - UNDERSCORED_REF_NAME=$(echo "${{ inputs.warehouse-type }}_dbt_${DBT_VERSION}_${BRANCH_NAME}" | awk '{print tolower($0)}' | head -c 40 | sed "s/[-\/]/_/g") + # Schema name = py___<8-char hash> + # The hash prevents collisions across concurrent jobs; the branch + # keeps it human-readable; the date helps with stale schema cleanup. + # + # Budget (PostgreSQL 63-char limit): + # py_(3) + date(6) + _(1) + branch(≤29) + _(1) + hash(8) = 48 + # + _elementary(11) + _gw7(4) = 63 + CONCURRENCY_GROUP="tests_${{ inputs.warehouse-type }}_dbt_${{ inputs.dbt-version }}_${BRANCH_NAME}" + SHORT_HASH=$(echo -n "$CONCURRENCY_GROUP" | sha256sum | head -c 8) + SAFE_BRANCH=$(echo "${BRANCH_NAME}" | awk '{print tolower($0)}' | sed "s/[^a-z0-9]/_/g" | head -c 29) + DATE_STAMP=$(date +%y%m%d) + SCHEMA_NAME="py_${DATE_STAMP}_${SAFE_BRANCH}_${SHORT_HASH}" + + echo "Schema name: $SCHEMA_NAME (branch='${BRANCH_NAME}', date=${DATE_STAMP}, hash of concurrency group)" python "${{ github.workspace }}/elementary/tests/profiles/generate_profiles.py" \ --template "${{ github.workspace }}/elementary/tests/profiles/profiles.yml.j2" \ --output ~/.dbt/profiles.yml \ - --schema-name "py_$UNDERSCORED_REF_NAME" + --schema-name "$SCHEMA_NAME" - name: Run Python package unit tests run: pytest -vv tests/unit --warehouse-type ${{ inputs.warehouse-type }} From 3f540f6861477074e59a87e6c106b099a9d09394 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Fri, 27 Feb 2026 19:20:49 +0000 Subject: [PATCH 2/5] style: collapse consecutive underscores in SAFE_BRANCH (CodeRabbit nitpick) Co-Authored-By: Itamar Hartstein --- .github/workflows/test-warehouse.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test-warehouse.yml b/.github/workflows/test-warehouse.yml index b5373e210..b166009ff 100644 --- a/.github/workflows/test-warehouse.yml +++ b/.github/workflows/test-warehouse.yml @@ -126,7 +126,7 @@ jobs: # + _elementary(11) + _gw7(4) = 63 CONCURRENCY_GROUP="tests_${{ inputs.warehouse-type }}_dbt_${{ inputs.dbt-version }}_${BRANCH_NAME}" SHORT_HASH=$(echo -n "$CONCURRENCY_GROUP" | sha256sum | head -c 8) - SAFE_BRANCH=$(echo "${BRANCH_NAME}" | awk '{print tolower($0)}' | sed "s/[^a-z0-9]/_/g" | head -c 29) + SAFE_BRANCH=$(echo "${BRANCH_NAME}" | awk '{print tolower($0)}' | sed "s/[^a-z0-9]/_/g; s/__*/_/g" | head -c 29) DATE_STAMP=$(date +%y%m%d) SCHEMA_NAME="py_${DATE_STAMP}_${SAFE_BRANCH}_${SHORT_HASH}" From 415b35b86cd1c288b547f54fd4437b91f960b02b Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Fri, 27 Feb 2026 21:09:45 +0000 Subject: [PATCH 3/5] feat: add HHMM to schema timestamp for per-run uniqueness Co-Authored-By: Itamar Hartstein --- .github/workflows/test-warehouse.yml | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/.github/workflows/test-warehouse.yml b/.github/workflows/test-warehouse.yml index b166009ff..0139e557d 100644 --- a/.github/workflows/test-warehouse.yml +++ b/.github/workflows/test-warehouse.yml @@ -117,20 +117,21 @@ jobs: env: CI_WAREHOUSE_SECRETS: ${{ secrets.CI_WAREHOUSE_SECRETS || '' }} run: | - # Schema name = py___<8-char hash> + # Schema name = py___<8-char hash> # The hash prevents collisions across concurrent jobs; the branch - # keeps it human-readable; the date helps with stale schema cleanup. + # keeps it human-readable; the timestamp helps with stale schema + # cleanup and ensures each CI run gets a unique schema. # # Budget (PostgreSQL 63-char limit): - # py_(3) + date(6) + _(1) + branch(≤29) + _(1) + hash(8) = 48 - # + _elementary(11) + _gw7(4) = 63 + # py_(3) + timestamp(11) + _(1) + branch(≤21) + _(1) + hash(8) = 45 + # + _elementary(11) + _gw7(4) = 60 CONCURRENCY_GROUP="tests_${{ inputs.warehouse-type }}_dbt_${{ inputs.dbt-version }}_${BRANCH_NAME}" SHORT_HASH=$(echo -n "$CONCURRENCY_GROUP" | sha256sum | head -c 8) - SAFE_BRANCH=$(echo "${BRANCH_NAME}" | awk '{print tolower($0)}' | sed "s/[^a-z0-9]/_/g; s/__*/_/g" | head -c 29) - DATE_STAMP=$(date +%y%m%d) + SAFE_BRANCH=$(echo "${BRANCH_NAME}" | awk '{print tolower($0)}' | sed "s/[^a-z0-9]/_/g; s/__*/_/g" | head -c 21) + DATE_STAMP=$(date +%y%m%d_%H%M) SCHEMA_NAME="py_${DATE_STAMP}_${SAFE_BRANCH}_${SHORT_HASH}" - echo "Schema name: $SCHEMA_NAME (branch='${BRANCH_NAME}', date=${DATE_STAMP}, hash of concurrency group)" + echo "Schema name: $SCHEMA_NAME (branch='${BRANCH_NAME}', timestamp=${DATE_STAMP}, hash of concurrency group)" python "${{ github.workspace }}/elementary/tests/profiles/generate_profiles.py" \ --template "${{ github.workspace }}/elementary/tests/profiles/profiles.yml.j2" \ From 4a3f43bf186b434ffb17325d891e95f3ea501373 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Fri, 27 Feb 2026 21:13:31 +0000 Subject: [PATCH 4/5] style: use explicit UTC for timestamp (date -u) Co-Authored-By: Itamar Hartstein --- .github/workflows/test-warehouse.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test-warehouse.yml b/.github/workflows/test-warehouse.yml index 0139e557d..5a720c475 100644 --- a/.github/workflows/test-warehouse.yml +++ b/.github/workflows/test-warehouse.yml @@ -128,7 +128,7 @@ jobs: CONCURRENCY_GROUP="tests_${{ inputs.warehouse-type }}_dbt_${{ inputs.dbt-version }}_${BRANCH_NAME}" SHORT_HASH=$(echo -n "$CONCURRENCY_GROUP" | sha256sum | head -c 8) SAFE_BRANCH=$(echo "${BRANCH_NAME}" | awk '{print tolower($0)}' | sed "s/[^a-z0-9]/_/g; s/__*/_/g" | head -c 21) - DATE_STAMP=$(date +%y%m%d_%H%M) + DATE_STAMP=$(date -u +%y%m%d_%H%M) SCHEMA_NAME="py_${DATE_STAMP}_${SAFE_BRANCH}_${SHORT_HASH}" echo "Schema name: $SCHEMA_NAME (branch='${BRANCH_NAME}', timestamp=${DATE_STAMP}, hash of concurrency group)" From 649af0b212a01274fab69d54d3b3e718b98f1c90 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Fri, 27 Feb 2026 21:56:41 +0000 Subject: [PATCH 5/5] style: add seconds to timestamp (YYMMDD_HHMMSS) per maintainer request Co-Authored-By: Itamar Hartstein --- .github/workflows/test-warehouse.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/test-warehouse.yml b/.github/workflows/test-warehouse.yml index 5a720c475..73e1a2331 100644 --- a/.github/workflows/test-warehouse.yml +++ b/.github/workflows/test-warehouse.yml @@ -117,18 +117,18 @@ jobs: env: CI_WAREHOUSE_SECRETS: ${{ secrets.CI_WAREHOUSE_SECRETS || '' }} run: | - # Schema name = py___<8-char hash> + # Schema name = py___<8-char hash> # The hash prevents collisions across concurrent jobs; the branch # keeps it human-readable; the timestamp helps with stale schema # cleanup and ensures each CI run gets a unique schema. # # Budget (PostgreSQL 63-char limit): - # py_(3) + timestamp(11) + _(1) + branch(≤21) + _(1) + hash(8) = 45 + # py_(3) + timestamp(13) + _(1) + branch(≤19) + _(1) + hash(8) = 45 # + _elementary(11) + _gw7(4) = 60 CONCURRENCY_GROUP="tests_${{ inputs.warehouse-type }}_dbt_${{ inputs.dbt-version }}_${BRANCH_NAME}" SHORT_HASH=$(echo -n "$CONCURRENCY_GROUP" | sha256sum | head -c 8) - SAFE_BRANCH=$(echo "${BRANCH_NAME}" | awk '{print tolower($0)}' | sed "s/[^a-z0-9]/_/g; s/__*/_/g" | head -c 21) - DATE_STAMP=$(date -u +%y%m%d_%H%M) + SAFE_BRANCH=$(echo "${BRANCH_NAME}" | awk '{print tolower($0)}' | sed "s/[^a-z0-9]/_/g; s/__*/_/g" | head -c 19) + DATE_STAMP=$(date -u +%y%m%d_%H%M%S) SCHEMA_NAME="py_${DATE_STAMP}_${SAFE_BRANCH}_${SHORT_HASH}" echo "Schema name: $SCHEMA_NAME (branch='${BRANCH_NAME}', timestamp=${DATE_STAMP}, hash of concurrency group)"