Skip to content

Integration Tests #2451

Integration Tests

Integration Tests #2451

Workflow file for this run

# Integration Tests for dbt-databricks
#
# This workflow runs integration tests that require Databricks secrets. It is
# never triggered by PR events directly — every PR-based run is the result of
# a maintainer's explicit decision.
#
# Triggering:
# 1. On a PR (internal OR fork): a maintainer comments `/integration-test`.
# The integration-trigger workflow validates the comment author and
# dispatches this workflow with the PR number. The run posts a result
# comment back to the PR when the matrix completes.
# 2. Manually from the Actions tab (workflow_dispatch):
# - One PR (e.g. "100") OR comma-separated list ("100,200,300") in pr_numbers.
# - Or a git_ref for ad-hoc testing.
# 3. Nightly on `main` at 03:00 IST (21:30 UTC). The `prepare` job short-circuits
# if the current main SHA has already had a successful integration run,
# emitting an empty targets array so the matrix jobs skip cleanly.
#
# Security: PR-triggered runs are gated on maintainer comment authorization;
# fork-PR code runs in the main repo context (access to secrets) only because
# a maintainer explicitly approved it via the slash command.
name: Integration Tests
on:
workflow_dispatch:
inputs:
pr_numbers:
description: "PR number(s) to test — single PR or comma-separated for batch (e.g. '100' or '100,200,300')"
required: false
type: string
git_ref:
description: "Git ref (branch/tag/commit) to test — used only when pr_numbers is empty"
required: false
type: string
schedule:
- cron: "30 21 * * *" # 21:30 UTC = 03:00 IST
permissions:
id-token: write
contents: read
# Target-aware concurrency:
# - Different PRs / batches don't cancel each other.
# - Re-dispatch of the same PR / batch cancels the stale run.
# - Schedule runs share the main-ref group.
concurrency:
group: ${{ github.workflow }}-${{ github.event.inputs.pr_numbers || github.event.inputs.git_ref || github.ref }}
cancel-in-progress: true
jobs:
prepare:
runs-on:
group: databricks-protected-runner-group
labels: linux-ubuntu-latest
outputs:
targets: ${{ steps.parse.outputs.targets }}
steps:
- name: Parse targets
id: parse
shell: bash
env:
EVENT_NAME: ${{ github.event_name }}
INPUT_PR_NUMBERS: ${{ github.event.inputs.pr_numbers }}
INPUT_GIT_REF: ${{ github.event.inputs.git_ref }}
DEFAULT_REF: ${{ github.ref }}
GH_TOKEN: ${{ github.token }}
run: |
set -euo pipefail
entry() { printf '{"pr":"%s","ref":"%s"}' "$1" "$2"; }
targets="["
if [[ "$EVENT_NAME" == "schedule" ]]; then
# Nightly skip-if-unchanged: if this main SHA already has a green
# integration run, emit empty targets so the matrix jobs skip.
already_tested=$(curl -sfS \
-H "Authorization: Bearer $GH_TOKEN" \
-H "Accept: application/vnd.github+json" \
"https://api.github.com/repos/$GITHUB_REPOSITORY/actions/workflows/integration.yml/runs?branch=main&status=success&head_sha=$GITHUB_SHA" \
| jq -r '.total_count // 0')
if [[ "$already_tested" -gt 0 ]]; then
echo "Nightly skip: main @ $GITHUB_SHA already has $already_tested successful run(s)."
else
targets+=$(entry "nightly" "$DEFAULT_REF")
fi
elif [[ -n "${INPUT_PR_NUMBERS//[[:space:]]/}" ]]; then
first=1
IFS=',' read -ra prs <<< "$INPUT_PR_NUMBERS"
for pr in "${prs[@]}"; do
pr_trimmed="${pr//[[:space:]]/}"
[[ -z "$pr_trimmed" ]] && continue
if [[ ! "$pr_trimmed" =~ ^[0-9]+$ ]]; then
echo "::error::Invalid PR number '$pr_trimmed' in pr_numbers='$INPUT_PR_NUMBERS' — expected digits, comma-separated."
exit 1
fi
[[ $first -eq 0 ]] && targets+=","
first=0
targets+=$(entry "$pr_trimmed" "refs/pull/$pr_trimmed/head")
done
elif [[ -n "${INPUT_GIT_REF//[[:space:]]/}" ]]; then
targets+=$(entry "manual" "$INPUT_GIT_REF")
else
targets+=$(entry "manual" "$DEFAULT_REF")
fi
targets+="]"
echo "targets=$targets" >> "$GITHUB_OUTPUT"
echo "Parsed targets: $targets"
run-uc-cluster-e2e-tests:
# Do not add `if: always()` / `if: !cancelled()` here or on sibling test jobs —
# `needs: prepare` propagates the external-fork skip cleanly, and forcing
# evaluation would make `fromJSON(needs.prepare.outputs.targets)` fail on an
# empty output. Matrix shape contract: {pr, ref} — defined in the `prepare` job.
needs: prepare
strategy:
fail-fast: false
max-parallel: 2
matrix:
target: ${{ fromJSON(needs.prepare.outputs.targets) }}
runs-on:
group: databricks-protected-runner-group
labels: linux-ubuntu-latest
environment: azure-prod
env:
DBT_DATABRICKS_HOST_NAME: ${{ secrets.DATABRICKS_HOST }}
DBT_DATABRICKS_CLIENT_ID: ${{ secrets.TEST_PECO_SP_ID }}
DBT_DATABRICKS_CLIENT_SECRET: ${{ secrets.TEST_PECO_SP_SECRET }}
DBT_DATABRICKS_UC_INITIAL_CATALOG: peco
DBT_DATABRICKS_LOCATION_ROOT: ${{ secrets.TEST_PECO_EXTERNAL_LOCATION }}test
TEST_PECO_UC_CLUSTER_ID: ${{ secrets.TEST_PECO_UC_CLUSTER_ID }}
UV_FROZEN: "1"
steps:
- name: Check out repository
uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
with:
ref: ${{ matrix.target.ref }}
- name: Setup Python Dependencies
id: deps
uses: ./.github/actions/setup-python-deps
- name: Setup JFrog PyPI Proxy (fallback)
if: steps.deps.outputs.cache-hit != 'true'
uses: ./.github/actions/setup-jfrog-pypi
- name: Set up python
id: setup-python
uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5
with:
python-version: "3.10"
- name: Get http path from environment
run: python .github/workflows/build_cluster_http_path.py
shell: sh
- name: Install uv
uses: astral-sh/setup-uv@38f3f104447c67c051c4a08e39b64a148898af3a # v4
with:
cache-local-path: ~/.cache/uv
- name: Install Hatch
id: install-dependencies
uses: pypa/hatch@257e27e51a6a5616ed08a39a408a21c35c9931bc # install
- name: Run UC Cluster Functional Tests
run: DBT_TEST_USER=notnecessaryformosttests@example.com DBT_DATABRICKS_LOCATION_ROOT=$DBT_DATABRICKS_LOCATION_ROOT DBT_DATABRICKS_HOST_NAME=$DBT_DATABRICKS_HOST_NAME DBT_DATABRICKS_UC_CLUSTER_HTTP_PATH=$DBT_DATABRICKS_UC_CLUSTER_HTTP_PATH DBT_DATABRICKS_CLIENT_ID=$DBT_DATABRICKS_CLIENT_ID DBT_DATABRICKS_CLIENT_SECRET=$DBT_DATABRICKS_CLIENT_SECRET hatch -v run uc-cluster-e2e
- name: Upload UC Cluster Test Logs
if: always()
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4
with:
name: uc-cluster-test-logs-${{ matrix.target.pr }}
path: logs/
retention-days: 5
run-sqlwarehouse-e2e-tests:
needs: prepare
strategy:
fail-fast: false
max-parallel: 2
matrix:
target: ${{ fromJSON(needs.prepare.outputs.targets) }}
runs-on:
group: databricks-protected-runner-group
labels: linux-ubuntu-latest
environment: azure-prod
env:
DBT_DATABRICKS_HOST_NAME: ${{ secrets.DATABRICKS_HOST }}
DBT_DATABRICKS_CLIENT_ID: ${{ secrets.TEST_PECO_SP_ID }}
DBT_DATABRICKS_CLIENT_SECRET: ${{ secrets.TEST_PECO_SP_SECRET }}
DBT_DATABRICKS_HTTP_PATH: ${{ secrets.TEST_PECO_WAREHOUSE_HTTP_PATH }}
DBT_DATABRICKS_UC_INITIAL_CATALOG: peco
DBT_DATABRICKS_LOCATION_ROOT: ${{ secrets.TEST_PECO_EXTERNAL_LOCATION }}test
TEST_PECO_UC_CLUSTER_ID: ${{ secrets.TEST_PECO_UC_CLUSTER_ID }}
UV_FROZEN: "1"
steps:
- name: Check out repository
uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
with:
ref: ${{ matrix.target.ref }}
- name: Setup Python Dependencies
id: deps
uses: ./.github/actions/setup-python-deps
- name: Setup JFrog PyPI Proxy (fallback)
if: steps.deps.outputs.cache-hit != 'true'
uses: ./.github/actions/setup-jfrog-pypi
- name: Set up python
id: setup-python
uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5
with:
python-version: "3.10"
- name: Get http path from environment
run: python .github/workflows/build_cluster_http_path.py
shell: sh
- name: Install uv
uses: astral-sh/setup-uv@38f3f104447c67c051c4a08e39b64a148898af3a # v4
with:
cache-local-path: ~/.cache/uv
- name: Install Hatch
id: install-dependencies
uses: pypa/hatch@257e27e51a6a5616ed08a39a408a21c35c9931bc # install
- name: Run Sql Endpoint Functional Tests
run: DBT_TEST_USER=notnecessaryformosttests@example.com DBT_DATABRICKS_LOCATION_ROOT=$DBT_DATABRICKS_LOCATION_ROOT DBT_DATABRICKS_HOST_NAME=$DBT_DATABRICKS_HOST_NAME DBT_DATABRICKS_UC_CLUSTER_HTTP_PATH=$DBT_DATABRICKS_UC_CLUSTER_HTTP_PATH DBT_DATABRICKS_CLIENT_ID=$DBT_DATABRICKS_CLIENT_ID DBT_DATABRICKS_CLIENT_SECRET=$DBT_DATABRICKS_CLIENT_SECRET hatch -v run sqlw-e2e
- name: Upload SQL Endpoint Test Logs
if: always()
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4
with:
name: sql-endpoint-test-logs-${{ matrix.target.pr }}
path: logs/
retention-days: 5
run-cluster-e2e-tests:
needs: prepare
strategy:
fail-fast: false
max-parallel: 2
matrix:
target: ${{ fromJSON(needs.prepare.outputs.targets) }}
runs-on:
group: databricks-protected-runner-group
labels: linux-ubuntu-latest
environment: azure-prod
env:
DBT_DATABRICKS_HOST_NAME: ${{ secrets.DATABRICKS_HOST }}
DBT_DATABRICKS_CLIENT_ID: ${{ secrets.TEST_PECO_SP_ID }}
DBT_DATABRICKS_CLIENT_SECRET: ${{ secrets.TEST_PECO_SP_SECRET }}
TEST_PECO_CLUSTER_ID: ${{ secrets.TEST_PECO_CLUSTER_ID }}
DBT_DATABRICKS_LOCATION_ROOT: ${{ secrets.TEST_PECO_EXTERNAL_LOCATION }}test
UV_FROZEN: "1"
steps:
- name: Check out repository
uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
with:
ref: ${{ matrix.target.ref }}
- name: Setup Python Dependencies
id: deps
uses: ./.github/actions/setup-python-deps
- name: Setup JFrog PyPI Proxy (fallback)
if: steps.deps.outputs.cache-hit != 'true'
uses: ./.github/actions/setup-jfrog-pypi
- name: Set up python
id: setup-python
uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5
with:
python-version: "3.10"
- name: Get http path from environment
run: python .github/workflows/build_cluster_http_path.py
shell: sh
- name: Install uv
uses: astral-sh/setup-uv@38f3f104447c67c051c4a08e39b64a148898af3a # v4
with:
cache-local-path: ~/.cache/uv
- name: Install Hatch
id: install-dependencies
uses: pypa/hatch@257e27e51a6a5616ed08a39a408a21c35c9931bc # install
- name: Run Cluster Functional Tests
run: DBT_TEST_USER=notnecessaryformosttests@example.com DBT_DATABRICKS_LOCATION_ROOT=$DBT_DATABRICKS_LOCATION_ROOT DBT_DATABRICKS_HOST_NAME=$DBT_DATABRICKS_HOST_NAME DBT_DATABRICKS_HTTP_PATH=$DBT_DATABRICKS_CLUSTER_HTTP_PATH DBT_DATABRICKS_CLIENT_ID=$DBT_DATABRICKS_CLIENT_ID DBT_DATABRICKS_CLIENT_SECRET=$DBT_DATABRICKS_CLIENT_SECRET hatch -v run cluster-e2e
- name: Upload Cluster Test Logs
if: always()
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4
with:
name: cluster-test-logs-${{ matrix.target.pr }}
path: logs/
retention-days: 5
# Posts a per-job pass/fail summary comment back to the PR when dispatched
# with a single PR number (the slash-command path). Skipped for batch
# dispatches (pr_numbers contains a comma) and for schedule / git_ref runs.
# Matrix jobs' result fields are aggregated across cells, which is why this
# only runs for single-PR dispatches.
report-status:
needs:
- run-uc-cluster-e2e-tests
- run-sqlwarehouse-e2e-tests
- run-cluster-e2e-tests
if: |
always() &&
github.event_name == 'workflow_dispatch' &&
inputs.pr_numbers != '' &&
!contains(inputs.pr_numbers, ',')
runs-on:
group: databricks-protected-runner-group
labels: linux-ubuntu-latest
permissions:
pull-requests: write
steps:
- name: Post result comment
uses: actions/github-script@f28e40c7f34bde8b3046d885e986cb6290c5673b # v7
env:
PR_NUMBER: ${{ inputs.pr_numbers }}
UC_RESULT: ${{ needs.run-uc-cluster-e2e-tests.result }}
SQLW_RESULT: ${{ needs.run-sqlwarehouse-e2e-tests.result }}
CLUSTER_RESULT: ${{ needs.run-cluster-e2e-tests.result }}
with:
script: |
const ICONS = { success: ':white_check_mark:', skipped: ':fast_forward:' };
const results = {
'UC cluster': process.env.UC_RESULT,
'SQL warehouse': process.env.SQLW_RESULT,
'All-purpose cluster': process.env.CLUSTER_RESULT,
};
const line = Object.entries(results)
.map(([name, r]) => `${name} ${ICONS[r] || ':x:'} ${r}`)
.join(' · ');
const runUrl =
`https://github.com/${context.repo.owner}/${context.repo.repo}` +
`/actions/runs/${context.runId}`;
const prNumber = process.env.PR_NUMBER.trim();
await github.rest.issues.createComment({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: parseInt(prNumber, 10),
body: `Integration results for PR #${prNumber} — ${line}\n\n[Run details](${runUrl}).`,
});