Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
98 commits
Select commit Hold shift + click to select a range
388c791
feat: add DuckDB, Trino, Dremio & Spark support to CI and CLI (part 1)
devin-ai-integration[bot] Mar 1, 2026
63a6bc1
feat: add Docker startup steps for Trino, Dremio, Spark in CI workflow
devin-ai-integration[bot] Mar 1, 2026
04b80ad
feat: add DuckDB, Trino, Dremio, Spark profile targets
devin-ai-integration[bot] Mar 1, 2026
16665b8
feat: add Trino Iceberg catalog config for CI testing
devin-ai-integration[bot] Mar 1, 2026
6e520fb
feat: add Spark Hive metastore config for CI testing
devin-ai-integration[bot] Mar 1, 2026
9351fbc
feat: add Dremio setup script for CI testing
devin-ai-integration[bot] Mar 1, 2026
0836bbf
feat: add Trino, Dremio, Spark Docker services to docker-compose.yml
devin-ai-integration[bot] Mar 1, 2026
40b2c74
fix: address DuckDB, Spark, and Dremio CI test failures
devin-ai-integration[bot] Mar 1, 2026
bac36f9
fix: use Nessie catalog source for Dremio instead of plain S3
devin-ai-integration[bot] Mar 1, 2026
3682f1c
feat: add seed caching for Docker-based adapters in CI
devin-ai-integration[bot] Mar 1, 2026
7969949
fix: move seed cache restore before Docker service startup
devin-ai-integration[bot] Mar 1, 2026
0d00cfa
fix: add docker-compose.yml to seed cache key and fail-fast readiness…
devin-ai-integration[bot] Mar 1, 2026
7213534
fix: convert ClickHouse bind mount to named Docker volume for seed ca…
devin-ai-integration[bot] Mar 1, 2026
25b81ff
ci: temporarily use dbt-data-reliability fix branch for Trino/Spark s…
devin-ai-integration[bot] Mar 1, 2026
c40da20
fix: stop Docker containers before archiving seed cache volumes
devin-ai-integration[bot] Mar 1, 2026
03c8c60
fix: add readiness wait after restarting Docker containers for seed c…
devin-ai-integration[bot] Mar 1, 2026
2b1c7c5
fix: use Trino starting:false check for proper readiness detection
devin-ai-integration[bot] Mar 1, 2026
6459984
fix: add Hive Metastore readiness check after container restart for T…
devin-ai-integration[bot] Mar 1, 2026
af20d30
fix: Dremio CI - batched seed materialization, single-threaded seedin…
devin-ai-integration[bot] Mar 1, 2026
dfa315e
fix: Dremio CI - single-threaded dbt run/test, fix cross-schema seed …
devin-ai-integration[bot] Mar 1, 2026
74368de
fix: revert reserved word quoting, use Dremio-specific expected failu…
devin-ai-integration[bot] Mar 1, 2026
7586f6e
fix: rename reserved word columns (min/max/sum/one) to avoid Dremio S…
devin-ai-integration[bot] Mar 1, 2026
b6f51e3
fix: always run seed step for all adapters (cloud adapters need fresh…
devin-ai-integration[bot] Mar 1, 2026
0165356
fix: Dremio generate_schema_name - use default_schema instead of root…
devin-ai-integration[bot] Mar 1, 2026
71fbaf7
fix: Dremio - put seeds in default schema to avoid cross-schema refer…
devin-ai-integration[bot] Mar 1, 2026
a1c8be2
feat: external seed loading for Dremio and Spark via MinIO/CSV instea…
devin-ai-integration[bot] Mar 1, 2026
2db8cc9
fix: format load_seeds_external.py with black, remove unused imports
devin-ai-integration[bot] Mar 1, 2026
99945c8
fix: use --entrypoint /bin/sh for minio/mc docker container to enable…
devin-ai-integration[bot] Mar 1, 2026
dfac673
refactor: extract external seeders into classes with click CLI
devin-ai-integration[bot] Mar 2, 2026
13936c1
fix: black/isort formatting in dremio.py
devin-ai-integration[bot] Mar 2, 2026
6b835c9
fix: read Dremio credentials from dremio-setup.sh for external seeder
devin-ai-integration[bot] Mar 2, 2026
094841b
fix: regex to handle escaped quotes in dremio-setup.sh for credential…
devin-ai-integration[bot] Mar 2, 2026
fd4738b
fix: use COPY INTO for Dremio seeds, skip Spark seed caching
devin-ai-integration[bot] Mar 2, 2026
eaca0ab
fix: add file_format delta for Spark models in e2e dbt_project.yml
devin-ai-integration[bot] Mar 2, 2026
039281c
fix: Dremio S3 source - use compatibilityMode, rootPath=/, v3 Catalog…
devin-ai-integration[bot] Mar 2, 2026
6b77609
fix: Dremio root_path double-nesting + Spark CLI file_format delta
devin-ai-integration[bot] Mar 2, 2026
11379cf
fix: Dremio Space architecture - views in Space, seeds in Nessie data…
devin-ai-integration[bot] Mar 2, 2026
a5449ad
style: apply black formatting to dremio.py
devin-ai-integration[bot] Mar 2, 2026
703d183
fix: restore dremio.py credential extraction from dremio-setup.sh
devin-ai-integration[bot] Mar 2, 2026
b7e3ee0
fix: use enterprise_catalog_namespace for Dremio to avoid Nessie vers…
devin-ai-integration[bot] Mar 2, 2026
b8790d6
fix: restore dremio__generate_schema_name delegation for correct Ness…
devin-ai-integration[bot] Mar 2, 2026
692b494
fix: flatten Dremio seed schema to single-level Nessie namespace
devin-ai-integration[bot] Mar 2, 2026
6cbc6a8
fix: avoid typos pre-commit false positive on SOURCE plural
devin-ai-integration[bot] Mar 2, 2026
2bead60
fix: create Nessie namespace via REST API + refresh source metadata
devin-ai-integration[bot] Mar 2, 2026
9b6bb1c
style: apply black formatting to Nessie namespace methods
devin-ai-integration[bot] Mar 2, 2026
a325dcc
fix: improve Nessie namespace creation + force Dremio catalog discovery
devin-ai-integration[bot] Mar 2, 2026
3a0f781
fix: force NessieSource metadata re-scan via Catalog API policy update
devin-ai-integration[bot] Mar 2, 2026
9752026
fix: use USE BRANCH main for Dremio Nessie version context resolution
devin-ai-integration[bot] Mar 2, 2026
25576cb
fix: put Dremio seeds in same Nessie namespace as models to fix VDS v…
devin-ai-integration[bot] Mar 2, 2026
9f5623e
fix: use CREATE FOLDER + ALTER SOURCE REFRESH for Dremio metadata vis…
devin-ai-integration[bot] Mar 2, 2026
149f7b8
fix: skip Docker restart for Dremio to preserve Nessie metadata cache
devin-ai-integration[bot] Mar 2, 2026
0415db5
fix: resolve Dremio edr monitor duplicate keys + exclude ephemeral mo…
devin-ai-integration[bot] Mar 2, 2026
196be41
fix: add continue-on-error for Dremio edr steps (dbt-core 1.11 compat)
devin-ai-integration[bot] Mar 2, 2026
4fba3c4
fix: revert temporary dbt-data-reliability branch pin (PR #948 merged)
devin-ai-integration[bot] Mar 2, 2026
dcfa5e6
refactor: address PR review - ref syntax, healthchecks, external scripts
devin-ai-integration[bot] Mar 2, 2026
36fb6ff
refactor: parameterize Docker credentials via environment variables
devin-ai-integration[bot] Mar 2, 2026
34f9160
style: fix prettier formatting for docker-compose.yml healthchecks
devin-ai-integration[bot] Mar 2, 2026
f489915
fix: increase Docker healthcheck timeouts for CI and fix Spark volume…
devin-ai-integration[bot] Mar 2, 2026
849c017
fix: increase dremio-minio healthcheck retries to 60 with start_perio…
devin-ai-integration[bot] Mar 2, 2026
e7de065
fix: use bash TCP check for healthchecks (curl/nc missing in MinIO 20…
devin-ai-integration[bot] Mar 2, 2026
04aa215
fix: align dremio-setup.sh default password with docker-compose (drem…
devin-ai-integration[bot] Mar 2, 2026
0fad79f
fix: resolve dremio.py credential extraction from shell variable defa…
devin-ai-integration[bot] Mar 2, 2026
6c0b313
fix: increase hive-metastore healthcheck retries to 60 with 60s start…
devin-ai-integration[bot] Mar 2, 2026
a8214bd
fix: wait for dremio-setup to complete before proceeding (use --exit-…
devin-ai-integration[bot] Mar 2, 2026
4518e15
fix: add nessie dependency to dremio-setup so NessieSource creation s…
devin-ai-integration[bot] Mar 2, 2026
a790a88
fix: use ghcr.io registry for nessie image (no longer on Docker Hub)
devin-ai-integration[bot] Mar 2, 2026
d06c1e3
fix: add continue-on-error for Dremio edr steps (dbt-core 1.11 ref() …
devin-ai-integration[bot] Mar 2, 2026
e450aa3
Merge remote-tracking branch 'origin/master' into devin/ELE-5266-1772…
devin-ai-integration[bot] Mar 2, 2026
7328c48
fix: remove continue-on-error for Dremio edr steps (ref() override no…
devin-ai-integration[bot] Mar 2, 2026
1182be3
fix: use dot-separated Nessie namespace for Dremio elementary profile
devin-ai-integration[bot] Mar 2, 2026
454d7be
fix: rename 'snapshots' CTE to avoid Dremio reserved keyword conflict
devin-ai-integration[bot] Mar 2, 2026
88b87c8
fix: quote 'filter' column to avoid Dremio reserved keyword conflict
devin-ai-integration[bot] Mar 2, 2026
820fd32
fix: make 'filter' column quoting Dremio-specific to avoid Snowflake …
devin-ai-integration[bot] Mar 2, 2026
0a934d3
fix: override dbt-dremio dateadd to handle integer interval parameter
devin-ai-integration[bot] Mar 2, 2026
897bf50
fix: remove 'select' prefix from dateadd override to avoid $SCALAR_QU…
devin-ai-integration[bot] Mar 2, 2026
abd09de
fix: strip Z timezone suffix from Dremio timestamps to avoid GandivaE…
devin-ai-integration[bot] Mar 2, 2026
7351b30
fix: use double quotes in dbt_project.yml for prettier compatibility
devin-ai-integration[bot] Mar 2, 2026
db901e5
fix: also replace T separator with space in Dremio timestamp cast
devin-ai-integration[bot] Mar 2, 2026
f0b3b09
fix: use targeted regex for T separator to avoid replacing T in non-t…
devin-ai-integration[bot] Mar 2, 2026
b4f7a3e
fix: quote 'filter' reserved keyword in get_source_freshness_results …
devin-ai-integration[bot] Mar 3, 2026
0e132ee
fix: quote Dremio reserved keywords row_number and count in SQL aliases
devin-ai-integration[bot] Mar 3, 2026
3158fe6
refactor: use elementary.escape_reserved_keywords() for Dremio reserv…
devin-ai-integration[bot] Mar 3, 2026
8a9596d
chore: revert temporary dbt-data-reliability branch pin (PR #955 merged)
devin-ai-integration[bot] Mar 3, 2026
5e97cec
fix: resolve 'Column unique_id is ambiguous' error in Dremio joins
devin-ai-integration[bot] Mar 3, 2026
00afc47
fix: qualify invocation_id column reference to resolve ambiguity in O…
devin-ai-integration[bot] Mar 3, 2026
d48a335
Merge branch 'master' into devin/ELE-5266-1772368864-code-ci
haritamar Mar 3, 2026
efaab28
fix: address CodeRabbit review comments
devin-ai-integration[bot] Mar 3, 2026
19652db
Merge branch 'devin/ELE-5266-1772368864-code-ci' of https://git-manag…
devin-ai-integration[bot] Mar 3, 2026
a63ce6b
style: fix black formatting in dremio.py and spark.py
devin-ai-integration[bot] Mar 3, 2026
2263199
fix: address CodeRabbit bugs - 409 fallback and stale empty tables
devin-ai-integration[bot] Mar 3, 2026
becc9a1
fix: address remaining CodeRabbit CI comments
devin-ai-integration[bot] Mar 3, 2026
a5dcef9
fix: clarify Spark seeder pyhive dependency
devin-ai-integration[bot] Mar 3, 2026
a50645c
fix: address CodeRabbit review round 3 - cleanup and hardening
devin-ai-integration[bot] Mar 3, 2026
05c57a3
fix: correct isort import order in dremio.py
devin-ai-integration[bot] Mar 3, 2026
114dd3f
fix: restore continue-on-error on dbt test step (many e2e tests are d…
devin-ai-integration[bot] Mar 3, 2026
856ddfa
Merge remote-tracking branch 'origin/master' into devin/ELE-5266-1772…
devin-ai-integration[bot] Mar 3, 2026
71d11e7
fix: remove Dremio dateadd and cast_column overrides now handled by d…
devin-ai-integration[bot] Mar 3, 2026
3ecb6f4
fix: remove dremio_target_database override now handled by dbt-data-r…
devin-ai-integration[bot] Mar 3, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .github/workflows/test-all-warehouses.yml
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,10 @@ jobs:
databricks_catalog,
athena,
clickhouse,
duckdb,
trino,
dremio,
spark,
]
uses: ./.github/workflows/test-warehouse.yml
with:
Expand Down
148 changes: 126 additions & 22 deletions .github/workflows/test-warehouse.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@ on:
- spark
- athena
- clickhouse
- duckdb
- trino
- dremio
elementary-ref:
type: string
required: false
Expand Down Expand Up @@ -83,6 +86,46 @@ jobs:
path: dbt-data-reliability
ref: ${{ inputs.dbt-data-reliability-ref }}

# ── Seed cache: compute key & restore volumes BEFORE starting services ──
# This ensures Docker volumes are populated before containers initialize.
- name: Compute seed cache key
id: seed-cache-key
if: inputs.warehouse-type == 'postgres' || inputs.warehouse-type == 'clickhouse' || inputs.warehouse-type == 'duckdb'
working-directory: ${{ env.E2E_DBT_PROJECT_DIR }}
run: |
# Cache key is a hash of seed-related files so that cache busts when
# the data generation script, dbt project config, or seed schemas change.
SEED_HASH=$(
{
cat generate_data.py \
dbt_project.yml \
docker-compose.yml \
${{ github.workspace }}/elementary/tests/profiles/profiles.yml.j2
echo "dbt_version=${{ inputs.dbt-version || '' }}"
} | sha256sum | head -c 16
Comment thread
haritamar marked this conversation as resolved.
)
Comment thread
haritamar marked this conversation as resolved.
echo "seed-hash=$SEED_HASH" >> "$GITHUB_OUTPUT"

- name: Restore seed cache
id: seed-cache
if: steps.seed-cache-key.outputs.seed-hash
uses: actions/cache@v4
with:
path: /tmp/seed-cache-${{ inputs.warehouse-type }}
key: seed-${{ inputs.warehouse-type }}-${{ steps.seed-cache-key.outputs.seed-hash }}

- name: Restore cached seed data into Docker volumes
if: steps.seed-cache.outputs.cache-hit == 'true' && inputs.warehouse-type != 'duckdb'
working-directory: ${{ env.E2E_DBT_PROJECT_DIR }}
run: bash ci/restore_seed_cache.sh "${{ inputs.warehouse-type }}"

- name: Restore cached DuckDB seed
if: steps.seed-cache.outputs.cache-hit == 'true' && inputs.warehouse-type == 'duckdb'
run: |
cp /tmp/seed-cache-duckdb/elementary_test.duckdb /tmp/elementary_test.duckdb
echo "DuckDB seed cache restored."

# ── Start warehouse services ──────────────────────────────────────────
- name: Start Postgres
if: inputs.warehouse-type == 'postgres'
working-directory: ${{ env.E2E_DBT_PROJECT_DIR }}
Expand All @@ -93,20 +136,43 @@ jobs:
working-directory: ${{ env.E2E_DBT_PROJECT_DIR }}
run: docker compose up -d clickhouse

- name: Start Trino
if: inputs.warehouse-type == 'trino'
working-directory: ${{ env.E2E_DBT_PROJECT_DIR }}
run: |
docker compose up -d --wait trino

- name: Start Dremio
if: inputs.warehouse-type == 'dremio'
working-directory: ${{ env.E2E_DBT_PROJECT_DIR }}
run: |
# Start Dremio services in detached mode with healthchecks, then
# run the setup container separately. Using --exit-code-from would
# imply --abort-on-container-exit, killing all services when the
# setup container finishes.
docker compose up -d --wait dremio dremio-minio nessie
docker compose run --rm dremio-setup

Comment thread
coderabbitai[bot] marked this conversation as resolved.
- name: Start Spark
if: inputs.warehouse-type == 'spark'
working-directory: ${{ env.E2E_DBT_PROJECT_DIR }}
run: |
docker compose up -d --build --wait spark-thrift

- name: Setup Python
uses: actions/setup-python@v5
with:
python-version: "3.10"

- name: Install Spark requirements
if: inputs.warehouse-type == 'spark'
run: sudo apt-get install python-dev libsasl2-dev gcc
run: sudo apt-get install -y python3-dev libsasl2-dev gcc

- name: Install dbt
run: >
pip install
"dbt-core${{ inputs.dbt-version && format('=={0}', inputs.dbt-version) }}"
"dbt-${{ (inputs.warehouse-type == 'databricks_catalog' && 'databricks') || (inputs.warehouse-type == 'athena' && 'athena-community') || inputs.warehouse-type }}${{ inputs.dbt-version && format('~={0}', inputs.dbt-version) }}"
"dbt-${{ (inputs.warehouse-type == 'databricks_catalog' && 'databricks') || (inputs.warehouse-type == 'athena' && 'athena-community') || (inputs.warehouse-type == 'dremio' && 'dremio') || inputs.warehouse-type }}${{ (inputs.warehouse-type == 'spark' && '[PyHive]') || '' }}${{ inputs.dbt-version && format('~={0}', inputs.dbt-version) }}"

- name: Install Elementary
run: |
Expand All @@ -117,21 +183,29 @@ jobs:
env:
CI_WAREHOUSE_SECRETS: ${{ secrets.CI_WAREHOUSE_SECRETS || '' }}
run: |
# Schema name = py_<YYMMDD_HHMMSS>_<branch≤19>_<8-char hash>
# The hash prevents collisions across concurrent jobs; the branch
# keeps it human-readable; the timestamp helps with stale schema
# cleanup and ensures each CI run gets a unique schema.
#
# Budget (PostgreSQL 63-char limit):
# py_(3) + timestamp(13) + _(1) + branch(≤19) + _(1) + hash(8) = 45
# + _elementary(11) + _gw7(4) = 60
CONCURRENCY_GROUP="tests_${{ inputs.warehouse-type }}_dbt_${{ inputs.dbt-version }}_${BRANCH_NAME}"
SHORT_HASH=$(echo -n "$CONCURRENCY_GROUP" | sha256sum | head -c 8)
SAFE_BRANCH=$(echo "${BRANCH_NAME}" | awk '{print tolower($0)}' | sed "s/[^a-z0-9]/_/g; s/__*/_/g" | head -c 19)
DATE_STAMP=$(date -u +%y%m%d_%H%M%S)
SCHEMA_NAME="py_${DATE_STAMP}_${SAFE_BRANCH}_${SHORT_HASH}"

echo "Schema name: $SCHEMA_NAME (branch='${BRANCH_NAME}', timestamp=${DATE_STAMP}, hash of concurrency group)"
# Docker-based adapters use ephemeral containers, so a fixed schema
# name is safe (the concurrency group prevents parallel collisions).
# This enables caching the seeded database state between runs.
IS_DOCKER=false
case "${{ inputs.warehouse-type }}" in
postgres|clickhouse|trino|dremio|duckdb|spark) IS_DOCKER=true ;;
esac

if [ "$IS_DOCKER" = "true" ]; then
SCHEMA_NAME="elementary_tests"
echo "Schema name: $SCHEMA_NAME (fixed for Docker adapter '${{ inputs.warehouse-type }}')"
else
# Cloud adapters: unique schema per run to avoid collisions.
# Schema name = py_<YYMMDD_HHMMSS>_<branch≤19>_<8-char hash>
CONCURRENCY_GROUP="tests_${{ inputs.warehouse-type }}_dbt_${{ inputs.dbt-version }}_${BRANCH_NAME}"
SHORT_HASH=$(echo -n "$CONCURRENCY_GROUP" | sha256sum | head -c 8)
SAFE_BRANCH=$(echo "${BRANCH_NAME}" | awk '{print tolower($0)}' | sed "s/[^a-z0-9]/_/g; s/__*/_/g" | head -c 19)
DATE_STAMP=$(date -u +%y%m%d_%H%M%S)
SCHEMA_NAME="py_${DATE_STAMP}_${SAFE_BRANCH}_${SHORT_HASH}"
echo "Schema name: $SCHEMA_NAME (branch='${BRANCH_NAME}', timestamp=${DATE_STAMP}, hash of concurrency group)"
fi

echo "SCHEMA_NAME=$SCHEMA_NAME" >> "$GITHUB_ENV"

python "${{ github.workspace }}/elementary/tests/profiles/generate_profiles.py" \
--template "${{ github.workspace }}/elementary/tests/profiles/profiles.yml.j2" \
Expand Down Expand Up @@ -160,17 +234,42 @@ jobs:
run: |
dbt deps

- name: Generate seed data
working-directory: ${{ env.E2E_DBT_PROJECT_DIR }}
if: steps.seed-cache.outputs.cache-hit != 'true'
run: python generate_data.py

- name: Seed e2e dbt project (external)
working-directory: ${{ env.E2E_DBT_PROJECT_DIR }}
if: steps.seed-cache.outputs.cache-hit != 'true' && (inputs.warehouse-type == 'dremio' || inputs.warehouse-type == 'spark')
run: python load_seeds_external.py "${{ inputs.warehouse-type }}" "$SCHEMA_NAME" data

- name: Seed e2e dbt project
working-directory: ${{ env.E2E_DBT_PROJECT_DIR }}
if: inputs.warehouse-type == 'postgres' || inputs.warehouse-type == 'clickhouse' || inputs.generate-data
if: steps.seed-cache.outputs.cache-hit != 'true' && inputs.warehouse-type != 'dremio' && inputs.warehouse-type != 'spark'
run: dbt seed -f --target "${{ inputs.warehouse-type }}"

- name: Save seed cache from Docker volumes
if: steps.seed-cache.outputs.cache-hit != 'true' && (inputs.warehouse-type == 'postgres' || inputs.warehouse-type == 'clickhouse')
working-directory: ${{ env.E2E_DBT_PROJECT_DIR }}
run: bash ci/save_seed_cache.sh "${{ inputs.warehouse-type }}"

- name: Save DuckDB seed cache
if: steps.seed-cache.outputs.cache-hit != 'true' && inputs.warehouse-type == 'duckdb'
run: |
python generate_data.py
dbt seed -f --target "${{ inputs.warehouse-type }}"
mkdir -p /tmp/seed-cache-duckdb
cp /tmp/elementary_test.duckdb /tmp/seed-cache-duckdb/elementary_test.duckdb
echo "DuckDB seed cache saved."

- name: Run e2e dbt project
working-directory: ${{ env.E2E_DBT_PROJECT_DIR }}
run: |
dbt run --target "${{ inputs.warehouse-type }}" || true
# Dremio needs single-threaded execution to avoid Nessie catalog race conditions
EXTRA_ARGS=()
if [ "${{ inputs.warehouse-type }}" = "dremio" ]; then
EXTRA_ARGS+=(--threads 1)
fi
dbt run --target "${{ inputs.warehouse-type }}" "${EXTRA_ARGS[@]}" || true

Comment thread
coderabbitai[bot] marked this conversation as resolved.
# Validate run_results.json: only error_model should be non-success
jq -e '
Expand All @@ -192,7 +291,12 @@ jobs:
working-directory: ${{ env.E2E_DBT_PROJECT_DIR }}
continue-on-error: true
run: |
dbt test --target "${{ inputs.warehouse-type }}"
# Dremio needs single-threaded execution to avoid Nessie catalog race conditions
EXTRA_ARGS=()
if [ "${{ inputs.warehouse-type }}" = "dremio" ]; then
EXTRA_ARGS+=(--threads 1 --exclude tag:ephemeral_model)
fi
dbt test --target "${{ inputs.warehouse-type }}" "${EXTRA_ARGS[@]}"

- name: Run help
run: edr --help
Expand Down
9 changes: 9 additions & 0 deletions elementary/clients/dbt/transient_errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,15 @@
"connection timed out",
"broken pipe",
),
"spark": (
"thrift transport is closed",
"could not connect to any thrift server",
"connection refused",
),
"duckdb": (
# DuckDB runs in-process; transient errors are rare.
# Common patterns (connection reset, broken pipe) are in _COMMON.
),
}

# Pre-computed union of all adapter-specific patterns for the fallback path
Expand Down
8 changes: 8 additions & 0 deletions elementary/monitor/dbt_project/dbt_project.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,17 @@ clean-targets: # directories to be removed by `dbt clean`

# Configuring models
# Full documentation: https://docs.getdbt.com/docs/configuring-models
dispatch:
- macro_namespace: elementary
search_order: ["elementary_cli", "elementary"]

vars:
edr_cli_run: true

models:
elementary_cli:
+file_format: "{{ 'delta' if target.type == 'spark' else none }}"

quoting:
database: "{{ env_var('DATABASE_QUOTING', 'None') | as_native }}"
schema: "{{ env_var('SCHEMA_QUOTING', 'None') | as_native }}"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@
select * from {{ ref('elementary', 'dbt_models') }}
),

snapshots as (
snapshots_data as (
select * from {{ ref('elementary', 'dbt_snapshots') }}
),

Expand All @@ -71,7 +71,7 @@
artifacts_meta as (
select unique_id, meta from models
union all
select unique_id, meta from snapshots
select unique_id, meta from snapshots_data
union all
select unique_id, meta from seeds
),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@
{% if error_after_column_exists %}
results.error_after,
results.warn_after,
results.filter,
results.{{ elementary.escape_reserved_keywords('filter') }},
{% endif %}
results.error,
sources.database_name,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@
{% set counter_query %}
with invocations as (
select invocation_id
from {{ ref("elementary", "dbt_source_freshness_results") }}
from {{ ref("dbt_source_freshness_results", package="elementary") }}
where {{ elementary.edr_datediff(elementary.edr_cast_as_timestamp('generated_at'), elementary.edr_current_timestamp(), 'day') }} < {{ days_back }}
)
select count(*) as count
select count(*) as {{ elementary.escape_reserved_keywords('count') }}
from invocations
where invocation_id = {{ elementary.edr_quote(invocation_id) }}
{% endset %}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,7 @@
{% macro athena__get_adapter_unique_id() %}
{{ return(target.s3_staging_dir) }}
{% endmacro %}

{% macro duckdb__get_adapter_unique_id() %}
{{ return(target.path) }}
{% endmacro %}
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,17 @@
{% set query %}
with ordered_run_results as (
select
*,
row_number() over (partition by unique_id order by run_results.generated_at desc) as row_number
from {{ ref("elementary", "dbt_run_results") }} run_results
join {{ ref("elementary", "dbt_models") }} using (unique_id)
run_results.unique_id,
run_results.invocation_id,
row_number() over (partition by run_results.unique_id order by run_results.generated_at desc) as {{ elementary.escape_reserved_keywords('row_number') }}
from {{ ref("dbt_run_results", package="elementary") }} run_results
join {{ ref("dbt_models", package="elementary") }} models on run_results.unique_id = models.unique_id
),

latest_run_results as (
select *
select unique_id, invocation_id
from ordered_run_results
where row_number = 1
where {{ elementary.escape_reserved_keywords('row_number') }} = 1
)

select unique_id, invocation_id from latest_run_results
Expand Down
Original file line number Diff line number Diff line change
@@ -1,35 +1,35 @@
{% macro get_models_latest_invocations_data() %}
{% set invocations_relation = ref("elementary", "dbt_invocations") %}
{% set invocations_relation = ref("dbt_invocations", package="elementary") %}
{% set column_exists = elementary.column_exists_in_relation(invocations_relation, 'job_url') %}

{% set query %}
with ordered_run_results as (
select
*,
row_number() over (partition by unique_id order by run_results.generated_at desc) as row_number
from {{ ref("elementary", "dbt_run_results") }} run_results
join {{ ref("elementary", "dbt_models") }} using (unique_id)
run_results.invocation_id,
row_number() over (partition by run_results.unique_id order by run_results.generated_at desc) as {{ elementary.escape_reserved_keywords('row_number') }}
from {{ ref("dbt_run_results", package="elementary") }} run_results
join {{ ref("dbt_models", package="elementary") }} models on run_results.unique_id = models.unique_id
),

latest_models_invocations as (
select distinct invocation_id
from ordered_run_results
where row_number = 1
where {{ elementary.escape_reserved_keywords('row_number') }} = 1
)

select
invocation_id,
command,
selected,
full_refresh,
invocations.invocation_id,
invocations.command,
invocations.selected,
invocations.full_refresh,
{% if column_exists %}
job_url,
invocations.job_url,
{% endif %}
job_name,
job_id,
orchestrator
invocations.job_name,
invocations.job_id,
invocations.orchestrator
from {{ invocations_relation }} invocations
join latest_models_invocations using (invocation_id)
join latest_models_invocations on invocations.invocation_id = latest_models_invocations.invocation_id
{% endset %}
{% set result = elementary.run_query(query) %}
{% do return(elementary.agate_to_dicts(result)) %}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
select
elementary_test_results_id,
result_row
from {{ ref("elementary", "test_result_rows") }}
from {{ ref("test_result_rows", package="elementary") }}
where {{ elementary.edr_datediff(elementary.edr_cast_as_timestamp('detected_at'), elementary.edr_current_timestamp(), 'day') }} < {{ days_back }}
{% if valid_ids_query %}
and elementary_test_results_id in ({{ valid_ids_query }})
Expand All @@ -25,7 +25,7 @@
select
elementary_test_results_id,
result_row
from {{ ref("elementary", "test_result_rows") }}
from {{ ref("test_result_rows", package="elementary") }}
where detected_at > {{ elementary.edr_timeadd('day', -1 * days_back, elementary.edr_current_timestamp()) }}
{% if valid_ids_query %}
and elementary_test_results_id in ({{ valid_ids_query }})
Expand Down
Loading
Loading