Skip to content

Commit dcfa5e6

Browse files
refactor: address PR review - ref syntax, healthchecks, external scripts
- Fix dbt-core 1.11 compat: convert ref('elementary', 'model') to ref('model', package='elementary') - Remove continue-on-error for Dremio edr steps (root cause fixed) - Simplify workflow Start steps to use docker compose up -d --wait - Move seed cache save/restore to external ci/*.sh scripts - Fix schema quoting in drop_test_schemas.sql for duckdb and spark - Add non-root user to Spark Dockerfile - Remove unused dremio_seed.sql (seeds now load via external S3) Co-Authored-By: Itamar Hartstein <haritamar@gmail.com>
1 parent 4fba3c4 commit dcfa5e6

32 files changed

Lines changed: 154 additions & 291 deletions

.github/workflows/test-warehouse.yml

Lines changed: 5 additions & 122 deletions
Original file line numberDiff line numberDiff line change
@@ -117,21 +117,7 @@ jobs:
117117
- name: Restore cached seed data into Docker volumes
118118
if: steps.seed-cache.outputs.cache-hit == 'true' && inputs.warehouse-type != 'duckdb'
119119
working-directory: ${{ env.E2E_DBT_PROJECT_DIR }}
120-
run: |
121-
echo "Restoring seed cache for ${{ inputs.warehouse-type }}..."
122-
CACHE_DIR="/tmp/seed-cache-${{ inputs.warehouse-type }}"
123-
124-
# Restore each Docker volume from the cached tarballs.
125-
# This runs BEFORE services start so containers initialise with cached data.
126-
for archive in "$CACHE_DIR"/*.tar.gz; do
127-
[ -f "$archive" ] || continue
128-
VOLUME_NAME=$(basename "$archive" .tar.gz)
129-
echo "Restoring volume $VOLUME_NAME from $archive..."
130-
docker volume create "$VOLUME_NAME" 2>/dev/null || true
131-
docker run --rm -v "$VOLUME_NAME:/data" -v "$CACHE_DIR:/cache:ro" \
132-
alpine sh -c "cd /data && tar xzf /cache/${VOLUME_NAME}.tar.gz"
133-
done
134-
echo "Seed cache restored."
120+
run: bash ci/restore_seed_cache.sh "${{ inputs.warehouse-type }}"
135121

136122
- name: Restore cached DuckDB seed
137123
if: steps.seed-cache.outputs.cache-hit == 'true' && inputs.warehouse-type == 'duckdb'
@@ -154,73 +140,19 @@ jobs:
154140
if: inputs.warehouse-type == 'trino'
155141
working-directory: ${{ env.E2E_DBT_PROJECT_DIR }}
156142
run: |
157-
docker compose up -d trino-minio trino-metastore-db
158-
sleep 5
159-
docker compose up -d hive-metastore trino-mc-job
160-
sleep 10
161-
docker compose up -d trino
162-
# Wait for Trino to be fully ready (not just HTTP responding, but initialized)
163-
ready=0
164-
for i in $(seq 1 60); do
165-
if curl -sf http://localhost:8086/v1/info 2>/dev/null | grep -q '"starting":false'; then
166-
ready=1
167-
break
168-
fi
169-
echo "Waiting for Trino... ($i/60)"
170-
sleep 5
171-
done
172-
if [ "$ready" -ne 1 ]; then
173-
echo "Timed out waiting for Trino"
174-
exit 1
175-
fi
143+
docker compose up -d --wait trino
176144
177145
- name: Start Dremio
178146
if: inputs.warehouse-type == 'dremio'
179147
working-directory: ${{ env.E2E_DBT_PROJECT_DIR }}
180148
run: |
181-
docker compose up -d nessie dremio-minio
182-
sleep 5
183-
docker compose up -d dremio-minio-setup
184-
docker compose up -d dremio
185-
# Wait for Dremio healthcheck
186-
ready=0
187-
for i in $(seq 1 60); do
188-
if curl -sf http://localhost:9047 > /dev/null; then
189-
ready=1
190-
break
191-
fi
192-
echo "Waiting for Dremio... ($i/60)"
193-
sleep 5
194-
done
195-
if [ "$ready" -ne 1 ]; then
196-
echo "Timed out waiting for Dremio"
197-
exit 1
198-
fi
199-
docker compose up -d dremio-setup
200-
# Wait for setup to complete
201-
sleep 15
149+
docker compose up -d --wait dremio-setup
202150
203151
- name: Start Spark
204152
if: inputs.warehouse-type == 'spark'
205153
working-directory: ${{ env.E2E_DBT_PROJECT_DIR }}
206154
run: |
207-
docker compose up -d spark-hive-metastore
208-
sleep 5
209-
docker compose up -d --build spark-thrift
210-
# Wait for Spark Thrift Server to be ready
211-
ready=0
212-
for i in $(seq 1 60); do
213-
if nc -z 127.0.0.1 10000; then
214-
ready=1
215-
break
216-
fi
217-
echo "Waiting for Spark Thrift Server... ($i/60)"
218-
sleep 5
219-
done
220-
if [ "$ready" -ne 1 ]; then
221-
echo "Timed out waiting for Spark Thrift Server"
222-
exit 1
223-
fi
155+
docker compose up -d --build --wait spark-thrift
224156
225157
- name: Setup Python
226158
uses: actions/setup-python@v5
@@ -319,51 +251,7 @@ jobs:
319251
- name: Save seed cache from Docker volumes
320252
if: steps.seed-cache.outputs.cache-hit != 'true' && inputs.warehouse-type != 'duckdb' && inputs.warehouse-type != 'trino' && inputs.warehouse-type != 'spark' && inputs.warehouse-type != 'dremio' && (inputs.warehouse-type == 'postgres' || inputs.warehouse-type == 'clickhouse')
321253
working-directory: ${{ env.E2E_DBT_PROJECT_DIR }}
322-
run: |
323-
CACHE_DIR="/tmp/seed-cache-${{ inputs.warehouse-type }}"
324-
mkdir -p "$CACHE_DIR"
325-
326-
# Get the Docker Compose project name and list volumes
327-
COMPOSE_VOLUMES=$(docker compose config --volumes 2>/dev/null || true)
328-
PROJECT=$(docker compose config 2>/dev/null | grep '^name:' | awk '{print $2}' || echo "e2e_dbt_project")
329-
330-
# Stop running containers so no files change while archiving
331-
docker compose stop || true
332-
333-
for vol in $COMPOSE_VOLUMES; do
334-
FULL_VOL="${PROJECT}_${vol}"
335-
if docker volume inspect "$FULL_VOL" >/dev/null 2>&1; then
336-
echo "Saving volume $FULL_VOL..."
337-
docker run --rm -v "$FULL_VOL:/data:ro" -v "$CACHE_DIR:/cache" \
338-
alpine sh -c "cd /data && tar czf /cache/${FULL_VOL}.tar.gz ."
339-
fi
340-
done
341-
342-
# Restart containers for the rest of the pipeline
343-
docker compose start || true
344-
345-
# Wait for services to be ready after restart
346-
case "${{ inputs.warehouse-type }}" in
347-
clickhouse)
348-
for i in $(seq 1 30); do
349-
curl -sf http://localhost:8123/ping > /dev/null && break
350-
echo "Waiting for ClickHouse after restart... ($i/30)"; sleep 2
351-
done
352-
;;
353-
spark)
354-
for i in $(seq 1 60); do
355-
nc -z 127.0.0.1 10000 && break
356-
echo "Waiting for Spark after restart... ($i/60)"; sleep 5
357-
done
358-
;;
359-
postgres)
360-
for i in $(seq 1 30); do
361-
pg_isready -h localhost -p 5432 > /dev/null 2>&1 && break
362-
echo "Waiting for Postgres after restart... ($i/30)"; sleep 2
363-
done
364-
;;
365-
esac
366-
echo "Seed cache saved."
254+
run: bash ci/save_seed_cache.sh "${{ inputs.warehouse-type }}"
367255

368256
- name: Save DuckDB seed cache
369257
if: steps.seed-cache.outputs.cache-hit != 'true' && inputs.warehouse-type == 'duckdb'
@@ -413,7 +301,6 @@ jobs:
413301
run: edr --help
414302

415303
- name: Run monitor
416-
continue-on-error: ${{ inputs.warehouse-type == 'dremio' }}
417304
env:
418305
SLACK_WEBHOOK: ${{ secrets.CI_SLACK_WEBHOOK }}
419306
run: >
@@ -425,14 +312,12 @@ jobs:
425312
--slack-webhook "$SLACK_WEBHOOK"
426313
427314
- name: Validate alerts statuses were updated
428-
continue-on-error: ${{ inputs.warehouse-type == 'dremio' }}
429315
working-directory: ${{ env.CLI_INTERNAL_DBT_PKG_DIR }}
430316
run: |
431317
dbt deps
432318
dbt run-operation validate_alert_statuses_are_updated -t "${{ inputs.warehouse-type }}"
433319
434320
- name: Run report
435-
continue-on-error: ${{ inputs.warehouse-type == 'dremio' }}
436321
run: >
437322
edr monitor report
438323
-t "${{ inputs.warehouse-type }}"
@@ -457,7 +342,6 @@ jobs:
457342
run: echo "$GCS_KEYFILE" | base64 -d > /tmp/gcs_keyfile.json
458343

459344
- name: Run send report
460-
continue-on-error: ${{ inputs.warehouse-type == 'dremio' }}
461345
env:
462346
SLACK_TOKEN: ${{ secrets.CI_SLACK_TOKEN }}
463347
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
@@ -495,7 +379,6 @@ jobs:
495379
path: elementary/edr_target/edr.log
496380

497381
- name: Run Python package e2e tests
498-
continue-on-error: ${{ inputs.warehouse-type == 'dremio' }}
499382
run: pytest -vv tests/e2e --warehouse-type ${{ inputs.warehouse-type }}
500383

501384
- name: Drop test schemas

elementary/monitor/dbt_project/macros/alerts/population/model_alerts.sql

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -49,23 +49,23 @@
4949
{% set seed_run_results_relation = elementary.get_elementary_relation('seed_run_results') -%}
5050

5151
with models as (
52-
select * from {{ ref('elementary', 'dbt_models') }}
52+
select * from {{ ref('dbt_models', package='elementary') }}
5353
),
5454

5555
snapshots as (
56-
select * from {{ ref('elementary', 'dbt_snapshots') }}
56+
select * from {{ ref('dbt_snapshots', package='elementary') }}
5757
),
5858

5959
seeds as (
60-
select * from {{ ref('elementary', 'dbt_seeds') }}
60+
select * from {{ ref('dbt_seeds', package='elementary') }}
6161
),
6262

6363
dbt_invocations as (
64-
select * from {{ ref('elementary', 'dbt_invocations') }}
64+
select * from {{ ref('dbt_invocations', package='elementary') }}
6565
),
6666

6767
dbt_run_results as (
68-
select * from {{ ref('elementary', 'dbt_run_results') }}
68+
select * from {{ ref('dbt_run_results', package='elementary') }}
6969
),
7070

7171
artifacts_meta as (

elementary/monitor/dbt_project/macros/alerts/population/source_freshness_alerts.sql

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -57,10 +57,10 @@
5757

5858

5959
{% macro populate_source_freshness_alerts_query(days_back=1) %}
60-
{% set source_freshness_results_relation = ref('elementary', 'dbt_source_freshness_results') %}
60+
{% set source_freshness_results_relation = ref('dbt_source_freshness_results', package='elementary') %}
6161
{% set error_after_column_exists = elementary.column_exists_in_relation(source_freshness_results_relation, 'error_after') %}
6262

63-
{% set sources_relation = ref('elementary', 'dbt_sources') %}
63+
{% set sources_relation = ref('dbt_sources', package='elementary') %}
6464
{% set freshness_description_column_exists = elementary.column_exists_in_relation(sources_relation, 'freshness_description') %}
6565

6666
with dbt_source_freshness_results as (
@@ -72,11 +72,11 @@
7272
),
7373

7474
dbt_invocations as (
75-
select * from {{ ref('elementary', 'dbt_invocations') }}
75+
select * from {{ ref('dbt_invocations', package='elementary') }}
7676
),
7777

7878
dbt_run_results as (
79-
select * from {{ ref('elementary', 'dbt_run_results') }}
79+
select * from {{ ref('dbt_run_results', package='elementary') }}
8080
),
8181

8282
source_freshness_alerts as (

elementary/monitor/dbt_project/macros/alerts/population/test_alerts.sql

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -70,23 +70,23 @@
7070
),
7171

7272
models as (
73-
select * from {{ ref('elementary', 'dbt_models') }}
73+
select * from {{ ref('dbt_models', package='elementary') }}
7474
),
7575

7676
sources as (
77-
select * from {{ ref('elementary', 'dbt_sources') }}
77+
select * from {{ ref('dbt_sources', package='elementary') }}
7878
),
7979

8080
tests as (
81-
select * from {{ ref('elementary', 'dbt_tests') }}
81+
select * from {{ ref('dbt_tests', package='elementary') }}
8282
),
8383

8484
dbt_invocations as (
85-
select * from {{ ref('elementary', 'dbt_invocations') }}
85+
select * from {{ ref('dbt_invocations', package='elementary') }}
8686
),
8787

8888
dbt_run_results as (
89-
select * from {{ ref('elementary', 'dbt_run_results') }}
89+
select * from {{ ref('dbt_run_results', package='elementary') }}
9090
),
9191

9292
artifacts_meta as (

elementary/monitor/dbt_project/macros/base_queries/current_tests_run_results_query.sql

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,28 @@
11
{% macro current_tests_run_results_query(days_back = none, invocation_id = none) %}
22
with elementary_test_results as (
3-
select * from {{ ref('elementary', 'elementary_test_results') }}
3+
select * from {{ ref('elementary_test_results', package='elementary') }}
44
{% if days_back %}
55
where {{ elementary.edr_datediff(elementary.edr_cast_as_timestamp('detected_at'), elementary.edr_current_timestamp(), 'day') }} < {{ days_back }}
66
{% endif %}
77
),
88

99
dbt_run_results as (
10-
select * from {{ ref('elementary', 'dbt_run_results') }}
10+
select * from {{ ref('dbt_run_results', package='elementary') }}
1111
{% if days_back %}
1212
where {{ elementary.edr_datediff(elementary.edr_cast_as_timestamp('execute_completed_at'), elementary.edr_current_timestamp(), 'day') }} < {{ days_back }}
1313
{% endif %}
1414
),
1515

1616
dbt_tests as (
17-
select * from {{ ref('elementary', 'dbt_tests') }}
17+
select * from {{ ref('dbt_tests', package='elementary') }}
1818
),
1919

2020
dbt_models as (
21-
select * from {{ ref('elementary', 'dbt_models') }}
21+
select * from {{ ref('dbt_models', package='elementary') }}
2222
),
2323

2424
dbt_sources as (
25-
select * from {{ ref('elementary', 'dbt_sources') }}
25+
select * from {{ ref('dbt_sources', package='elementary') }}
2626
),
2727

2828
dbt_artifacts as (

elementary/monitor/dbt_project/macros/base_queries/owners.sql

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,19 @@
11
{% macro get_project_owners() %}
22
{% set project_owners_query %}
33
with dbt_models as (
4-
select * from {{ ref('elementary', 'dbt_models') }}
4+
select * from {{ ref('dbt_models', package='elementary') }}
55
),
66

77
dbt_sources as (
8-
select * from {{ ref('elementary', 'dbt_sources') }}
8+
select * from {{ ref('dbt_sources', package='elementary') }}
99
),
1010

1111
dbt_seeds as (
12-
select * from {{ ref('elementary', 'dbt_seeds') }}
12+
select * from {{ ref('dbt_seeds', package='elementary') }}
1313
),
1414

1515
dbt_tests as (
16-
select * from {{ ref('elementary', 'dbt_tests') }}
16+
select * from {{ ref('dbt_tests', package='elementary') }}
1717
)
1818

1919
select model_owners as owner from dbt_tests

elementary/monitor/dbt_project/macros/base_queries/resources.sql

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
{% macro get_model_resources(exclude_elementary=true) %}
22
{% set model_resources_query %}
33
with dbt_models as (
4-
select * from {{ ref('elementary', 'dbt_models') }}
4+
select * from {{ ref('dbt_models', package='elementary') }}
55
)
66

77
select
@@ -24,7 +24,7 @@
2424
{% macro get_source_resources(exclude_elementary=true) %}
2525
{% set source_resources_query %}
2626
with dbt_sources as (
27-
select * from {{ ref('elementary', 'dbt_sources') }}
27+
select * from {{ ref('dbt_sources', package='elementary') }}
2828
)
2929

3030
select
@@ -62,19 +62,19 @@
6262
{% macro get_resources_meta() %}
6363
{% set resources_meta_query %}
6464
with dbt_models as (
65-
select * from {{ ref('elementary', 'dbt_models') }}
65+
select * from {{ ref('dbt_models', package='elementary') }}
6666
),
6767

6868
dbt_sources as (
69-
select * from {{ ref('elementary', 'dbt_sources') }}
69+
select * from {{ ref('dbt_sources', package='elementary') }}
7070
),
7171

7272
dbt_seeds as (
73-
select * from {{ ref('elementary', 'dbt_seeds') }}
73+
select * from {{ ref('dbt_seeds', package='elementary') }}
7474
),
7575

7676
dbt_tests as (
77-
select * from {{ ref('elementary', 'dbt_tests') }}
77+
select * from {{ ref('dbt_tests', package='elementary') }}
7878
)
7979

8080
select meta from dbt_tests

elementary/monitor/dbt_project/macros/base_queries/tags.sql

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,15 @@
11
{% macro get_project_tags() %}
22
{% set project_tags_query %}
33
with dbt_models as (
4-
select * from {{ ref('elementary', 'dbt_models') }}
4+
select * from {{ ref('dbt_models', package='elementary') }}
55
),
66

77
dbt_sources as (
8-
select * from {{ ref('elementary', 'dbt_sources') }}
8+
select * from {{ ref('dbt_sources', package='elementary') }}
99
),
1010

1111
dbt_tests as (
12-
select * from {{ ref('elementary', 'dbt_tests') }}
12+
select * from {{ ref('dbt_tests', package='elementary') }}
1313
)
1414

1515
select tags from dbt_models

0 commit comments

Comments
 (0)