From 8b84461f3b5a12cf013a10a8284bf9099df15f7f Mon Sep 17 00:00:00 2001 From: Jesse Cooke Date: Wed, 8 Oct 2025 16:50:39 -0700 Subject: [PATCH 01/32] Remove extra newlines that Vertica could not parse Without trimming the leading and trailing newlines, Vertica would fail to parse the compiled SQL. For example, `models/edr/dbt_artifacts/dbt_columns` compiles the following SQL, via `elementary.get_dbt_columns_empty_table_query`, `empty_table` and `empty_column`: ```sql select * from ( select cast('dummy_string' as varchar(4096)) as unique_id , cast('dummy_string' as varchar(4096)) as parent_unique_id , cast('dummy_string' as varchar(4096)) as name , cast('dummy_string' as varchar(4096)) as data_type , cast('this_is_just_a_long_dummy_string' as varchar(4096)) as tags , cast('this_is_just_a_long_dummy_string' as varchar(4096)) as meta , cast('dummy_string' as varchar(4096)) as database_name , cast('dummy_string' as varchar(4096)) as schema_name , cast('dummy_string' as varchar(4096)) as table_name , cast('this_is_just_a_long_dummy_string' as varchar(4096)) as description , cast('dummy_string' as varchar(4096)) as resource_type , cast('dummy_string' as varchar(4096)) as generated_at , cast('dummy_string' as varchar(4096)) as metadata_hash ) as empty_table where 1 = 0 ``` which would cause ``` SQL Error [4856] [42601]: [Vertica][VJDBC](4856) ERROR: Syntax error at or near ")" at character 1 ``` By trimming the newlines, the SQL is much tighter: ```sql select * from ( select cast('dummy_string' as varchar(4096)) as unique_id, cast('dummy_string' as varchar(4096)) as parent_unique_id, cast('dummy_string' as varchar(4096)) as name, cast('dummy_string' as varchar(4096)) as data_type, cast('this_is_just_a_long_dummy_string' as varchar(4096)) as tags, cast('this_is_just_a_long_dummy_string' as varchar(4096)) as meta, cast('dummy_string' as varchar(4096)) as database_name, cast('dummy_string' as varchar(4096)) as schema_name, cast('dummy_string' as varchar(4096)) as table_name, cast('this_is_just_a_long_dummy_string' as varchar(4096)) as description, cast('dummy_string' as varchar(4096)) as resource_type, cast('dummy_string' as varchar(4096)) as generated_at, cast('dummy_string' as varchar(4096)) as metadata_hash ) as empty_table where 1 = 0 ``` and this runs in Vertica just fine. --- macros/edr/system/system_utils/empty_table.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/macros/edr/system/system_utils/empty_table.sql b/macros/edr/system/system_utils/empty_table.sql index 07ccc3e4d..8987f52d4 100644 --- a/macros/edr/system/system_utils/empty_table.sql +++ b/macros/edr/system/system_utils/empty_table.sql @@ -121,7 +121,7 @@ {%- set empty_table_query -%} select * from ( select - {% for column in column_name_and_type_list %} + {%- for column in column_name_and_type_list -%} {{ elementary.empty_column(column[0], column[1]) }} {%- if not loop.last -%},{%- endif %} {%- endfor %} ) as empty_table @@ -161,7 +161,7 @@ cast({{ dummy_values['int'] }} as Nullable({{ elementary.edr_type_int() }})) as {{ column_name }} {%- else %} cast('{{ dummy_values['string'] }}' as {{ elementary.edr_type_string() }}) as {{ column_name }} - {%- endif %} + {%- endif -%} {% endmacro %} From 7146da5f6acd70c779014c9d523a29ac0a7819d1 Mon Sep 17 00:00:00 2001 From: Jesse Cooke Date: Mon, 17 Nov 2025 14:05:54 -0800 Subject: [PATCH 02/32] Add Vertica-specific escape macro This fixed 4 or 5 errors when running in my test project. --- macros/utils/table_operations/insert_rows.sql | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/macros/utils/table_operations/insert_rows.sql b/macros/utils/table_operations/insert_rows.sql index aa278027a..48c840805 100644 --- a/macros/utils/table_operations/insert_rows.sql +++ b/macros/utils/table_operations/insert_rows.sql @@ -149,6 +149,10 @@ {{- return(string_value | replace("'", "''")) -}} {%- endmacro -%} +{%- macro vertica__escape_special_chars(string_value) -%} + {{- return(string_value | replace("'", "''")) -}} +{%- endmacro -%} + {%- macro athena__escape_special_chars(string_value) -%} {{- return(string_value | replace("'", "''")) -}} {%- endmacro -%} From 27e924d4393d60b5da5381b13e37b872d2251296 Mon Sep 17 00:00:00 2001 From: Jesse Cooke Date: Mon, 17 Nov 2025 14:26:27 -0800 Subject: [PATCH 03/32] Add Vertica-specific timeadd macro --- macros/utils/cross_db_utils/timeadd.sql | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/macros/utils/cross_db_utils/timeadd.sql b/macros/utils/cross_db_utils/timeadd.sql index 0f2419e86..69d46c938 100644 --- a/macros/utils/cross_db_utils/timeadd.sql +++ b/macros/utils/cross_db_utils/timeadd.sql @@ -22,6 +22,10 @@ {{ elementary.edr_cast_as_timestamp(timestamp_expression) }} + {{ elementary.edr_cast_as_int(number) }} * INTERVAL '1 {{ date_part }}' {% endmacro %} +{% macro vertica__edr_timeadd(date_part, number, timestamp_expression) %} + timestampadd({{ date_part | upper }}, {{ elementary.edr_cast_as_int(number) }}, {{ elementary.edr_cast_as_timestamp(timestamp_expression) }}) +{% endmacro %} + {% macro redshift__edr_timeadd(date_part, number, timestamp_expression) %} dateadd({{ date_part }}, {{ elementary.edr_cast_as_int(number) }}, {{ elementary.edr_cast_as_timestamp(timestamp_expression) }}) {% endmacro %} From 667054b42818fa98b6ce1133adb7395bb5f526af Mon Sep 17 00:00:00 2001 From: Jesse Cooke Date: Mon, 17 Nov 2025 14:50:13 -0800 Subject: [PATCH 04/32] Attempt to set up Vertica in CI --- .github/workflows/test-all-warehouses.yml | 3 ++ .github/workflows/test-warehouse.yml | 6 ++++ integration_tests/docker-compose-vertica.yml | 31 ++++++++++++++++++++ 3 files changed, 40 insertions(+) create mode 100644 integration_tests/docker-compose-vertica.yml diff --git a/.github/workflows/test-all-warehouses.yml b/.github/workflows/test-all-warehouses.yml index c5e7ad4d1..ce7388e13 100644 --- a/.github/workflows/test-all-warehouses.yml +++ b/.github/workflows/test-all-warehouses.yml @@ -49,6 +49,7 @@ jobs: trino, clickhouse, dremio, + vertica, ] include: - dbt-version: "${{ inputs.dbt-version || 'latest_pre' }}" @@ -61,6 +62,8 @@ jobs: warehouse-type: redshift - dbt-version: "${{ inputs.dbt-version || 'fusion' }}" warehouse-type: databricks_catalog + - dbt-version: "${{ inputs.dbt-version || '1.8.5' }}" + warehouse-type: vertica uses: ./.github/workflows/test-warehouse.yml with: warehouse-type: ${{ matrix.warehouse-type }} diff --git a/.github/workflows/test-warehouse.yml b/.github/workflows/test-warehouse.yml index 65bed62cd..c72ebc414 100644 --- a/.github/workflows/test-warehouse.yml +++ b/.github/workflows/test-warehouse.yml @@ -17,6 +17,7 @@ on: - trino - clickhouse - dremio + - vertica elementary-ref: type: string required: false @@ -109,6 +110,11 @@ jobs: working-directory: ${{ env.TESTS_DIR }} run: docker compose -f docker-compose-dremio.yml up -d + - name: Start Vertica + if: inputs.warehouse-type == 'vertica' + working-directory: ${{ env.TESTS_DIR }} + run: docker compose -f docker-compose-vertica.yml up -d + - name: Setup Python uses: actions/setup-python@v4 with: diff --git a/integration_tests/docker-compose-vertica.yml b/integration_tests/docker-compose-vertica.yml new file mode 100644 index 000000000..4643c0d21 --- /dev/null +++ b/integration_tests/docker-compose-vertica.yml @@ -0,0 +1,31 @@ +services: + vertica: + environment: + VERTICA_USER: dbadmin + VERTICA_PASS: vertica + VERTICA_HOST: localhost + VERTICA_PORT: 5433 + VERTICA_DATABASE: elementary_tests + VERTICA_SCHEMA: elementary_tests_vertica + APP_DB_USER: ${VERTICA_USER} + APP_DB_PASSWORD: ${VERTICA_PASS} + TZ: "America/Los_Angeles" + VERTICA_DB_NAME: ${VERTICA_DATABASE} + VMART_ETL_SCRIPT: "" + container_name: vertica + image: vertica/vertica-ce + ports: + - "${VERTICA_PORT}:${VERTICA_PORT}" + - "5444:5444" + deploy: + mode: global + ulimits: + nofile: + soft: 65536 + hard: 65536 + volumes: + - type: volume + source: vertica-data + target: /data +volumes: + vertica-data: From 2130a7a90e06440208e4b058433d31bd670b8992 Mon Sep 17 00:00:00 2001 From: Jesse Cooke Date: Mon, 17 Nov 2025 14:58:08 -0800 Subject: [PATCH 05/32] Debug missing port --- .github/workflows/test-warehouse.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/test-warehouse.yml b/.github/workflows/test-warehouse.yml index c72ebc414..2e5a22732 100644 --- a/.github/workflows/test-warehouse.yml +++ b/.github/workflows/test-warehouse.yml @@ -111,6 +111,8 @@ jobs: run: docker compose -f docker-compose-dremio.yml up -d - name: Start Vertica + env: + VERTICA_PORT: 5433 if: inputs.warehouse-type == 'vertica' working-directory: ${{ env.TESTS_DIR }} run: docker compose -f docker-compose-vertica.yml up -d From d7a9c0f1e9813269a2c2fdf1caa3fc72d9f163f1 Mon Sep 17 00:00:00 2001 From: Jesse Cooke Date: Mon, 17 Nov 2025 15:00:22 -0800 Subject: [PATCH 06/32] Add more missing env vars for CI I thought I might have to add these and not just `VERTICA_PORT`. --- .github/workflows/test-warehouse.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/test-warehouse.yml b/.github/workflows/test-warehouse.yml index 2e5a22732..17611dae1 100644 --- a/.github/workflows/test-warehouse.yml +++ b/.github/workflows/test-warehouse.yml @@ -113,6 +113,10 @@ jobs: - name: Start Vertica env: VERTICA_PORT: 5433 + VERTICA_USER: dbadmin + VERTICA_PASS: vertica + VERTICA_DATABASE: elementary_tests + VERTICA_SCHEMA: elementary_tests_vertica if: inputs.warehouse-type == 'vertica' working-directory: ${{ env.TESTS_DIR }} run: docker compose -f docker-compose-vertica.yml up -d From 8d1975ddd01c1f82b7f2235664b7561139216f7d Mon Sep 17 00:00:00 2001 From: Jesse Cooke Date: Mon, 17 Nov 2025 15:06:12 -0800 Subject: [PATCH 07/32] Try opentext namespace for CI image --- integration_tests/docker-compose-vertica.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/integration_tests/docker-compose-vertica.yml b/integration_tests/docker-compose-vertica.yml index 4643c0d21..09ced56c0 100644 --- a/integration_tests/docker-compose-vertica.yml +++ b/integration_tests/docker-compose-vertica.yml @@ -13,7 +13,7 @@ services: VERTICA_DB_NAME: ${VERTICA_DATABASE} VMART_ETL_SCRIPT: "" container_name: vertica - image: vertica/vertica-ce + image: opentext/vertica-ce ports: - "${VERTICA_PORT}:${VERTICA_PORT}" - "5444:5444" From fe2585ef9825c048fff955db13169a326af8a83e Mon Sep 17 00:00:00 2001 From: Jesse Cooke Date: Mon, 17 Nov 2025 15:16:56 -0800 Subject: [PATCH 08/32] Use Ratio's Vertica-CE I can't tell if OpenText pulled Vertica or what, but both the vertica and opentext namespaces were failing. Luckily I had the image pulled locally. --- integration_tests/docker-compose-vertica.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/integration_tests/docker-compose-vertica.yml b/integration_tests/docker-compose-vertica.yml index 09ced56c0..6bc8ef012 100644 --- a/integration_tests/docker-compose-vertica.yml +++ b/integration_tests/docker-compose-vertica.yml @@ -13,7 +13,7 @@ services: VERTICA_DB_NAME: ${VERTICA_DATABASE} VMART_ETL_SCRIPT: "" container_name: vertica - image: opentext/vertica-ce + image: ratiopbc/vertica-ce ports: - "${VERTICA_PORT}:${VERTICA_PORT}" - "5444:5444" From 21541637ce30f1fa1b123ad59f571b9aa84c8f79 Mon Sep 17 00:00:00 2001 From: Jesse Cooke Date: Tue, 18 Nov 2025 10:17:03 -0800 Subject: [PATCH 09/32] Add dbt-vertica-version dbt-vertica versions match dbt-core versions, and they are a bit behind, which is why we default to the latest available: 1.8.5. --- .github/workflows/test-warehouse.yml | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/.github/workflows/test-warehouse.yml b/.github/workflows/test-warehouse.yml index 17611dae1..30c587e80 100644 --- a/.github/workflows/test-warehouse.yml +++ b/.github/workflows/test-warehouse.yml @@ -31,6 +31,11 @@ on: required: false default: "latest_official" description: dbt's version to test with + dbt-vertica-version: + type: string + required: false + default: "1.8.5" + description: dbt-vertica's version to test with workflow_call: inputs: @@ -47,6 +52,10 @@ on: type: string default: "latest_official" required: false + dbt-vertica-version: + type: string + default: "1.8.5" + required: false env: BRANCH_NAME: ${{ github.head_ref || github.ref_name }} @@ -135,8 +144,13 @@ jobs: if: startsWith(inputs.warehouse-type, 'databricks') && inputs.dbt-version < '1.7.0' run: pip install databricks-sql-connector==2.9.3 + - name: Install dbt-vertica + if: ${{ inputs.warehouse-type == 'vertica' }} + run: + pip install "dbt-vertica==${{ inputs.dbt-vertica-version }}" + - name: Install dbt - if: ${{ inputs.dbt-version != 'fusion' }} + if: ${{ inputs.dbt-version != 'fusion' && inputs.warehouse-type != 'vertica' }} run: pip install${{ (inputs.dbt-version == 'latest_pre' && ' --pre') || '' }} "dbt-core${{ (!startsWith(inputs.dbt-version, 'latest') && format('=={0}', inputs.dbt-version)) || '' }}" From 19e41b55cad84fd37fd20ec102bdb062d0f14550 Mon Sep 17 00:00:00 2001 From: Jesse Cooke Date: Tue, 18 Nov 2025 11:08:35 -0800 Subject: [PATCH 10/32] Start Vertica after schema has been determined --- .github/workflows/test-warehouse.yml | 26 +++++++++++--------- integration_tests/docker-compose-vertica.yml | 2 +- 2 files changed, 15 insertions(+), 13 deletions(-) diff --git a/.github/workflows/test-warehouse.yml b/.github/workflows/test-warehouse.yml index 30c587e80..a68b010f2 100644 --- a/.github/workflows/test-warehouse.yml +++ b/.github/workflows/test-warehouse.yml @@ -119,17 +119,6 @@ jobs: working-directory: ${{ env.TESTS_DIR }} run: docker compose -f docker-compose-dremio.yml up -d - - name: Start Vertica - env: - VERTICA_PORT: 5433 - VERTICA_USER: dbadmin - VERTICA_PASS: vertica - VERTICA_DATABASE: elementary_tests - VERTICA_SCHEMA: elementary_tests_vertica - if: inputs.warehouse-type == 'vertica' - working-directory: ${{ env.TESTS_DIR }} - run: docker compose -f docker-compose-vertica.yml up -d - - name: Setup Python uses: actions/setup-python@v4 with: @@ -171,7 +160,9 @@ jobs: mkdir -p ~/.dbt DBT_VERSION=$(echo "${{ inputs.dbt-version }}" | sed 's/\.//g') UNDERSCORED_REF_NAME=$(echo "${{ inputs.warehouse-type }}_dbt_${DBT_VERSION}_${BRANCH_NAME}" | awk '{print tolower($0)}' | head -c 40 | sed "s/[-\/]/_/g") - echo "$PROFILES_YML" | base64 -d | sed "s//dbt_pkg_$UNDERSCORED_REF_NAME/g" > ~/.dbt/profiles.yml + SCHEMA_NAME="dbt_pkg_$UNDERSCORED_REF_NAME" + echo "SCHEMA_NAME=$SCHEMA_NAME" >> $GITHUB_ENV + echo "$PROFILES_YML" | base64 -d | sed "s//$SCHEMA_NAME/g" > ~/.dbt/profiles.yml - name: Install dependencies working-directory: ${{ env.TESTS_DIR }} @@ -180,6 +171,17 @@ jobs: ln -sfn ${{ github.workspace }}/dbt-data-reliability dbt_project/dbt_packages/elementary pip install -r requirements.txt + - name: Start Vertica + env: + VERTICA_PORT: 5433 + VERTICA_USER: dbadmin + VERTICA_PASS: vertica + VERTICA_DATABASE: elementary_tests + VERTICA_SCHEMA: ${{ env.SCHEMA_NAME }} + if: inputs.warehouse-type == 'vertica' + working-directory: ${{ env.TESTS_DIR }} + run: docker compose -f docker-compose-vertica.yml up -d + - name: Check DWH connection working-directory: ${{ env.TESTS_DIR }} run: | diff --git a/integration_tests/docker-compose-vertica.yml b/integration_tests/docker-compose-vertica.yml index 6bc8ef012..19c8d2778 100644 --- a/integration_tests/docker-compose-vertica.yml +++ b/integration_tests/docker-compose-vertica.yml @@ -6,7 +6,7 @@ services: VERTICA_HOST: localhost VERTICA_PORT: 5433 VERTICA_DATABASE: elementary_tests - VERTICA_SCHEMA: elementary_tests_vertica + VERTICA_SCHEMA: ${SCHEMA_NAME} APP_DB_USER: ${VERTICA_USER} APP_DB_PASSWORD: ${VERTICA_PASS} TZ: "America/Los_Angeles" From c585daebf2c03728872cc8d421e2092dc67cc038 Mon Sep 17 00:00:00 2001 From: Jesse Cooke Date: Tue, 18 Nov 2025 11:30:39 -0800 Subject: [PATCH 11/32] Use Ratio's GitHub package for vertica-ce This should be a lot faster than pulling from docker.io --- integration_tests/docker-compose-vertica.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/integration_tests/docker-compose-vertica.yml b/integration_tests/docker-compose-vertica.yml index 19c8d2778..1664de5f9 100644 --- a/integration_tests/docker-compose-vertica.yml +++ b/integration_tests/docker-compose-vertica.yml @@ -13,7 +13,7 @@ services: VERTICA_DB_NAME: ${VERTICA_DATABASE} VMART_ETL_SCRIPT: "" container_name: vertica - image: ratiopbc/vertica-ce + image: ghcr.io/ratiopbc/vertica-ce ports: - "${VERTICA_PORT}:${VERTICA_PORT}" - "5444:5444" From d198a9e2e68391568779f267eed3f6688be50431 Mon Sep 17 00:00:00 2001 From: Jesse Cooke Date: Tue, 18 Nov 2025 11:46:23 -0800 Subject: [PATCH 12/32] Set Vertica env vars & persist across steps --- .github/workflows/test-warehouse.yml | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/.github/workflows/test-warehouse.yml b/.github/workflows/test-warehouse.yml index a68b010f2..378ef3348 100644 --- a/.github/workflows/test-warehouse.yml +++ b/.github/workflows/test-warehouse.yml @@ -171,13 +171,16 @@ jobs: ln -sfn ${{ github.workspace }}/dbt-data-reliability dbt_project/dbt_packages/elementary pip install -r requirements.txt + - name: Set Vertica environment variables + if: inputs.warehouse-type == 'vertica' + run: | + echo "VERTICA_PORT=5433" >> $GITHUB_ENV + echo "VERTICA_USER=dbadmin" >> $GITHUB_ENV + echo "VERTICA_PASS=vertica" >> $GITHUB_ENV + echo "VERTICA_DATABASE=elementary_tests" >> $GITHUB_ENV + echo "VERTICA_SCHEMA=$SCHEMA_NAME" >> $GITHUB_ENV + - name: Start Vertica - env: - VERTICA_PORT: 5433 - VERTICA_USER: dbadmin - VERTICA_PASS: vertica - VERTICA_DATABASE: elementary_tests - VERTICA_SCHEMA: ${{ env.SCHEMA_NAME }} if: inputs.warehouse-type == 'vertica' working-directory: ${{ env.TESTS_DIR }} run: docker compose -f docker-compose-vertica.yml up -d From 570dcd0491e87fc4c05ec626a4955c9a8a355939 Mon Sep 17 00:00:00 2001 From: Jesse Cooke Date: Tue, 18 Nov 2025 11:50:28 -0800 Subject: [PATCH 13/32] Forgot VERTICA_HOST --- .github/workflows/test-warehouse.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/test-warehouse.yml b/.github/workflows/test-warehouse.yml index 378ef3348..593797a5d 100644 --- a/.github/workflows/test-warehouse.yml +++ b/.github/workflows/test-warehouse.yml @@ -175,6 +175,7 @@ jobs: if: inputs.warehouse-type == 'vertica' run: | echo "VERTICA_PORT=5433" >> $GITHUB_ENV + echo "VERTICA_HOST=localhost" >> $GITHUB_ENV echo "VERTICA_USER=dbadmin" >> $GITHUB_ENV echo "VERTICA_PASS=vertica" >> $GITHUB_ENV echo "VERTICA_DATABASE=elementary_tests" >> $GITHUB_ENV From bd695078a97a4647c8915824b70a5a5735fdeae1 Mon Sep 17 00:00:00 2001 From: Jesse Cooke Date: Tue, 18 Nov 2025 12:48:29 -0800 Subject: [PATCH 14/32] Address CodeRabbit nit --- .github/workflows/test-warehouse.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test-warehouse.yml b/.github/workflows/test-warehouse.yml index 593797a5d..857c031f0 100644 --- a/.github/workflows/test-warehouse.yml +++ b/.github/workflows/test-warehouse.yml @@ -134,7 +134,7 @@ jobs: run: pip install databricks-sql-connector==2.9.3 - name: Install dbt-vertica - if: ${{ inputs.warehouse-type == 'vertica' }} + if: inputs.warehouse-type == 'vertica' run: pip install "dbt-vertica==${{ inputs.dbt-vertica-version }}" From c886e4ce25fee56d8c7377b503763ea625b49148 Mon Sep 17 00:00:00 2001 From: Jesse Cooke Date: Tue, 18 Nov 2025 14:36:19 -0800 Subject: [PATCH 15/32] Try a healthcheck before moving on with Vertica I'm seeing `Database Error: [Errno 32] Broken pipe` in the `Check DWH connection` step. --- .github/workflows/test-warehouse.yml | 7 +++++++ integration_tests/docker-compose-vertica.yml | 5 +++++ 2 files changed, 12 insertions(+) diff --git a/.github/workflows/test-warehouse.yml b/.github/workflows/test-warehouse.yml index 857c031f0..455df6b45 100644 --- a/.github/workflows/test-warehouse.yml +++ b/.github/workflows/test-warehouse.yml @@ -186,6 +186,13 @@ jobs: working-directory: ${{ env.TESTS_DIR }} run: docker compose -f docker-compose-vertica.yml up -d + - name: Wait for Vertica to be ready + if: inputs.warehouse-type == 'vertica' + run: | + echo "Waiting for Vertica to be healthy..." + timeout 60 bash -c 'until [ "$(docker inspect --format="{{.State.Health.Status}}" vertica)" == "healthy" ]; do echo "Waiting..."; sleep 5; done' + echo "Vertica is ready!" + - name: Check DWH connection working-directory: ${{ env.TESTS_DIR }} run: | diff --git a/integration_tests/docker-compose-vertica.yml b/integration_tests/docker-compose-vertica.yml index 1664de5f9..dca568117 100644 --- a/integration_tests/docker-compose-vertica.yml +++ b/integration_tests/docker-compose-vertica.yml @@ -27,5 +27,10 @@ services: - type: volume source: vertica-data target: /data + healthcheck: + test: ["CMD-SHELL", "/opt/vertica/bin/vsql -U dbadmin -w vertica -c 'SELECT 1;'"] + interval: 5s + timeout: 5s + retries: 10 volumes: vertica-data: From bcc9d8083e59f0c1d3cb9e401958dc7a29823a20 Mon Sep 17 00:00:00 2001 From: Jesse Cooke Date: Tue, 18 Nov 2025 15:43:00 -0800 Subject: [PATCH 16/32] Use env vars for Vertica healthcheck --- integration_tests/docker-compose-vertica.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/integration_tests/docker-compose-vertica.yml b/integration_tests/docker-compose-vertica.yml index dca568117..bae881fb7 100644 --- a/integration_tests/docker-compose-vertica.yml +++ b/integration_tests/docker-compose-vertica.yml @@ -28,7 +28,7 @@ services: source: vertica-data target: /data healthcheck: - test: ["CMD-SHELL", "/opt/vertica/bin/vsql -U dbadmin -w vertica -c 'SELECT 1;'"] + test: ["CMD-SHELL", "/opt/vertica/bin/vsql -U ${VERTICA_USER} -w ${VERTICA_PASS} -c 'SELECT 1;'"] interval: 5s timeout: 5s retries: 10 From 9a2bf01fa4003e54c1575fb224833feddcdbe266 Mon Sep 17 00:00:00 2001 From: Jesse Cooke Date: Tue, 18 Nov 2025 15:43:36 -0800 Subject: [PATCH 17/32] Add test/CI profiles.yml fixture file I use this for local dev via `DBT_PROFILES_DIR="path/to/.github/fixtures/" and for GitHub Actions secret `CI_PROFILES_YML`. Linux+Wayland: `base64 .github/fixtures/profiles.yml | wl-copy` MacOS: `base64 .github/fixtures/profiles.yml | pbcopy` --- .github/fixtures/profiles.yml | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 .github/fixtures/profiles.yml diff --git a/.github/fixtures/profiles.yml b/.github/fixtures/profiles.yml new file mode 100644 index 000000000..7394d01a5 --- /dev/null +++ b/.github/fixtures/profiles.yml @@ -0,0 +1,29 @@ +elementary: + target: vertica + outputs: + vertica: + type: vertica + host: "{{ env_var('VERTICA_HOST') }}" + port: "{{ env_var('VERTICA_PORT') | as_number }}" + username: "{{ env_var('VERTICA_USER') }}" + password: "{{ env_var('VERTICA_PASS') }}" + database: "{{ env_var('VERTICA_DATABASE') }}" + schema: "{{ env_var('SCHEMA_NAME') }}" + connection_load_balance: false + retries: 2 + threads: 4 + +elementary_tests: + target: vertica + outputs: + vertica: + type: vertica + host: "{{ env_var('VERTICA_HOST') }}" + port: "{{ env_var('VERTICA_PORT') | as_number }}" + username: "{{ env_var('VERTICA_USER') }}" + password: "{{ env_var('VERTICA_PASS') }}" + database: "{{ env_var('VERTICA_DATABASE') }}" + schema: "{{ env_var('SCHEMA_NAME') }}" + connection_load_balance: false + retries: 2 + threads: 4 From 2a996ef6951a309364675cf079e6b5cea70eb943 Mon Sep 17 00:00:00 2001 From: Jesse Cooke Date: Tue, 18 Nov 2025 15:47:57 -0800 Subject: [PATCH 18/32] Ignore the .user.yml in the fixtures dir --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 5172d726c..93a57e825 100644 --- a/.gitignore +++ b/.gitignore @@ -4,6 +4,7 @@ dbt_internal_packages/ logs/ scripts/ +.github/fixtures/.user.yml .idea .DS_Store From b6718225064551aebf9ba3830d317246b812ca23 Mon Sep 17 00:00:00 2001 From: Itamar Hartstein Date: Wed, 11 Mar 2026 16:44:41 +0200 Subject: [PATCH 19/32] fix: export SCHEMA_NAME to GITHUB_ENV for Vertica docker-compose The Vertica docker-compose and env vars steps need SCHEMA_NAME to be available across GitHub Actions steps. Co-Authored-By: Claude Opus 4.6 --- .github/workflows/test-warehouse.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/test-warehouse.yml b/.github/workflows/test-warehouse.yml index 6c76cf396..73a5e0b88 100644 --- a/.github/workflows/test-warehouse.yml +++ b/.github/workflows/test-warehouse.yml @@ -199,6 +199,7 @@ jobs: SCHEMA_NAME="dbt_${DATE_STAMP}_${SAFE_BRANCH}_${SHORT_HASH}" echo "Schema name: $SCHEMA_NAME (branch='${BRANCH_NAME}', timestamp=${DATE_STAMP}, hash of concurrency group)" + echo "SCHEMA_NAME=$SCHEMA_NAME" >> $GITHUB_ENV python "${{ github.workspace }}/dbt-data-reliability/integration_tests/profiles/generate_profiles.py" \ --template "${{ github.workspace }}/dbt-data-reliability/integration_tests/profiles/profiles.yml.j2" \ From a26598cdb4dfb39bd2179d351ed175f9d7d7e662 Mon Sep 17 00:00:00 2001 From: Itamar Hartstein Date: Wed, 11 Mar 2026 16:46:23 +0200 Subject: [PATCH 20/32] refactor: inline Vertica credentials instead of using env vars Hardcode Vertica connection values directly in docker-compose-vertica.yml (matching the pattern of other local adapters like sqlserver) and remove the "Set Vertica environment variables" CI step. Co-Authored-By: Claude Opus 4.6 --- .github/workflows/test-warehouse.yml | 10 ---------- integration_tests/docker-compose-vertica.yml | 14 +++++++++----- 2 files changed, 9 insertions(+), 15 deletions(-) diff --git a/.github/workflows/test-warehouse.yml b/.github/workflows/test-warehouse.yml index 73a5e0b88..c569172b6 100644 --- a/.github/workflows/test-warehouse.yml +++ b/.github/workflows/test-warehouse.yml @@ -213,16 +213,6 @@ jobs: ln -sfn ${{ github.workspace }}/dbt-data-reliability dbt_project/dbt_packages/elementary pip install -r requirements.txt - - name: Set Vertica environment variables - if: inputs.warehouse-type == 'vertica' - run: | - echo "VERTICA_PORT=5433" >> $GITHUB_ENV - echo "VERTICA_HOST=localhost" >> $GITHUB_ENV - echo "VERTICA_USER=dbadmin" >> $GITHUB_ENV - echo "VERTICA_PASS=vertica" >> $GITHUB_ENV - echo "VERTICA_DATABASE=elementary_tests" >> $GITHUB_ENV - echo "VERTICA_SCHEMA=$SCHEMA_NAME" >> $GITHUB_ENV - - name: Start Vertica if: inputs.warehouse-type == 'vertica' working-directory: ${{ env.TESTS_DIR }} diff --git a/integration_tests/docker-compose-vertica.yml b/integration_tests/docker-compose-vertica.yml index bae881fb7..d418d667a 100644 --- a/integration_tests/docker-compose-vertica.yml +++ b/integration_tests/docker-compose-vertica.yml @@ -7,15 +7,15 @@ services: VERTICA_PORT: 5433 VERTICA_DATABASE: elementary_tests VERTICA_SCHEMA: ${SCHEMA_NAME} - APP_DB_USER: ${VERTICA_USER} - APP_DB_PASSWORD: ${VERTICA_PASS} + APP_DB_USER: dbadmin + APP_DB_PASSWORD: vertica TZ: "America/Los_Angeles" - VERTICA_DB_NAME: ${VERTICA_DATABASE} + VERTICA_DB_NAME: elementary_tests VMART_ETL_SCRIPT: "" container_name: vertica image: ghcr.io/ratiopbc/vertica-ce ports: - - "${VERTICA_PORT}:${VERTICA_PORT}" + - "5433:5433" - "5444:5444" deploy: mode: global @@ -28,7 +28,11 @@ services: source: vertica-data target: /data healthcheck: - test: ["CMD-SHELL", "/opt/vertica/bin/vsql -U ${VERTICA_USER} -w ${VERTICA_PASS} -c 'SELECT 1;'"] + test: + [ + "CMD-SHELL", + "/opt/vertica/bin/vsql -U dbadmin -w vertica -c 'SELECT 1;'", + ] interval: 5s timeout: 5s retries: 10 From 432696036c6991737cee19eb27c3ea706f7aa324 Mon Sep 17 00:00:00 2001 From: Itamar Hartstein Date: Wed, 11 Mar 2026 16:46:57 +0200 Subject: [PATCH 21/32] revert: remove unnecessary SCHEMA_NAME export to GITHUB_ENV No longer needed since Vertica env vars step was removed. Co-Authored-By: Claude Opus 4.6 --- .github/workflows/test-warehouse.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/test-warehouse.yml b/.github/workflows/test-warehouse.yml index c569172b6..e4394407d 100644 --- a/.github/workflows/test-warehouse.yml +++ b/.github/workflows/test-warehouse.yml @@ -199,7 +199,6 @@ jobs: SCHEMA_NAME="dbt_${DATE_STAMP}_${SAFE_BRANCH}_${SHORT_HASH}" echo "Schema name: $SCHEMA_NAME (branch='${BRANCH_NAME}', timestamp=${DATE_STAMP}, hash of concurrency group)" - echo "SCHEMA_NAME=$SCHEMA_NAME" >> $GITHUB_ENV python "${{ github.workspace }}/dbt-data-reliability/integration_tests/profiles/generate_profiles.py" \ --template "${{ github.workspace }}/dbt-data-reliability/integration_tests/profiles/profiles.yml.j2" \ From 731ca2f997952a08146ad53e44cc7b3300b0bb60 Mon Sep 17 00:00:00 2001 From: Itamar Hartstein Date: Wed, 11 Mar 2026 16:50:14 +0200 Subject: [PATCH 22/32] refactor: remove dbt-vertica-version input parameter Just install latest dbt-vertica instead of pinning a specific version. Co-Authored-By: Claude Opus 4.6 --- .github/workflows/test-warehouse.yml | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/.github/workflows/test-warehouse.yml b/.github/workflows/test-warehouse.yml index e4394407d..a1daedd05 100644 --- a/.github/workflows/test-warehouse.yml +++ b/.github/workflows/test-warehouse.yml @@ -34,11 +34,6 @@ on: required: false default: "latest_official" description: dbt's version to test with - dbt-vertica-version: - type: string - required: false - default: "1.8.5" - description: dbt-vertica's version to test with workflow_call: inputs: @@ -55,10 +50,6 @@ on: type: string default: "latest_official" required: false - dbt-vertica-version: - type: string - default: "1.8.5" - required: false env: BRANCH_NAME: ${{ github.head_ref || github.ref_name }} @@ -163,7 +154,7 @@ jobs: - name: Install dbt-vertica if: inputs.warehouse-type == 'vertica' - run: pip install "dbt-vertica==${{ inputs.dbt-vertica-version }}" + run: pip install dbt-vertica - name: Install dbt if: ${{ inputs.dbt-version != 'fusion' && inputs.warehouse-type != 'vertica' }} From cf6aed7458a30077dd0b9738799fbda6652e974d Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Wed, 11 Mar 2026 18:34:35 +0000 Subject: [PATCH 23/32] fix: Vertica adapter compatibility fixes for integration tests - Add VerticaDirectSeeder with direct vertica_python connection for atomic DDL+DML+COMMIT - Add vertica__get_normalized_data_type macro to normalize VARCHAR/INT types - Add vertica__get_default_config with query_max_size=250000 - Truncate message field in on_run_result_query_exceed to handle long error messages - Add vertica__edr_type_string (varchar(16000)) and vertica__edr_type_long_string (varchar(32000)) - Add vertica__full_name_split using split_part instead of array subscripts - Add vertica__buckets_cte, vertica__target_database, vertica__day_of_week, vertica__hour_of_week - Add vertica__get_relation_max_length with Vertica identifier limit (128 chars) Co-Authored-By: Itamar Hartstein --- integration_tests/tests/data_seeder.py | 114 ++++++++++++++++++ integration_tests/tests/dbt_project.py | 17 ++- .../edr/dbt_artifacts/upload_run_results.sql | 8 ++ .../edr/system/system_utils/buckets_cte.sql | 23 ++++ macros/edr/system/system_utils/full_names.sql | 30 +++++ .../system/system_utils/get_config_var.sql | 8 ++ macros/utils/cross_db_utils/day_of_week.sql | 4 + macros/utils/cross_db_utils/hour_of_week.sql | 11 ++ .../utils/cross_db_utils/target_database.sql | 2 + macros/utils/data_types/data_type.sql | 9 ++ .../data_types/get_normalized_data_type.sql | 13 ++ .../get_relation_max_length.sql | 4 + 12 files changed, 241 insertions(+), 2 deletions(-) diff --git a/integration_tests/tests/data_seeder.py b/integration_tests/tests/data_seeder.py index 383e6d9b8..57f19a380 100644 --- a/integration_tests/tests/data_seeder.py +++ b/integration_tests/tests/data_seeder.py @@ -454,3 +454,117 @@ def _create_table_sql(self, fq_table: str, col_defs: str) -> str: f"CREATE TABLE {fq_table} ({col_defs}) " f"ENGINE = MergeTree() ORDER BY tuple()" ) + + +class VerticaDirectSeeder(BaseSqlInsertSeeder): + """Fast seeder for Vertica: executes CREATE TABLE + INSERT directly. + + Bypasses ``dbt seed`` (which uses Vertica's COPY command) because COPY + rejects empty CSV fields for non-string columns instead of treating them + as NULL. Direct INSERT statements handle NULL correctly. + + Uses a *direct* ``vertica_python`` connection (rather than dbt's adapter + connection pool) so that all DDL + DML runs in a single session and can + be committed atomically. dbt's ``connection_named`` context manager + releases (and effectively rolls back) the connection after each + ``execute_sql`` call, which caused INSERT data to be invisible to + subsequent ``dbt test`` sessions. + + Vertica uses double-quote identifiers (not backticks), so this class + overrides the ``seed`` method to use ``"col"`` quoting. + """ + + def _type_string(self) -> str: + # Must match edr_type_string (varchar(16000)) so that schema-change + # detection sees a consistent type between seeded tables and + # elementary metadata columns. + return "VARCHAR(16000)" + + def _type_boolean(self) -> str: + return "BOOLEAN" + + def _type_integer(self) -> str: + return "INTEGER" + + def _type_float(self) -> str: + return "FLOAT" + + def _format_value(self, value: object, col_type: str) -> str: + if value is None or (isinstance(value, str) and value == ""): + return "NULL" + if isinstance(value, bool): + return "true" if value else "false" + if isinstance(value, (int, float)): + return str(value) + text = str(value) + text = text.replace("'", "''") + return f"'{text}'" + + def _create_table_sql(self, fq_table: str, col_defs: str) -> str: + return f"CREATE TABLE {fq_table} ({col_defs})" + + @staticmethod + def _vertica_connection(): + """Open a direct vertica_python connection from env / defaults.""" + import vertica_python # available in the test venv + + conn_info = { + "host": os.environ.get("VERTICA_HOST", "localhost"), + "port": int(os.environ.get("VERTICA_PORT", "5433")), + "user": os.environ.get("VERTICA_USER", "dbadmin"), + "password": os.environ.get("VERTICA_PASSWORD", "vertica"), + "database": os.environ.get("VERTICA_DATABASE", "elementary_tests"), + } + return vertica_python.connect(**conn_info) + + @contextmanager + def seed(self, data: List[dict], table_name: str) -> Generator[None, None, None]: + """Override base seed to use double-quote identifiers for Vertica.""" + columns = list(data[0].keys()) + col_types: Dict[str, str] = { + col: self._infer_column_type([row.get(col) for row in data]) + for col in columns + } + # Vertica uses double-quote identifiers, not backticks. + col_defs = ", ".join(f'"{col}" {col_types[col]}' for col in columns) + fq_table = f'"{self._schema}"."{table_name}"' + + seed_path = self._write_csv(data, table_name) + + try: + # Use a direct connection so DDL + DML share the same session + # and the COMMIT is guaranteed to persist the data. + conn = self._vertica_connection() + try: + cur = conn.cursor() + cur.execute(f"DROP TABLE IF EXISTS {fq_table}") + cur.execute(self._create_table_sql(fq_table, col_defs)) + + for batch_start in range(0, len(data), _INSERT_BATCH_SIZE): + batch = data[batch_start : batch_start + _INSERT_BATCH_SIZE] + rows_sql = ", ".join( + "(" + + ", ".join( + self._format_value(row.get(c), col_types[c]) + for c in columns + ) + + ")" + for row in batch + ) + cur.execute(f"INSERT INTO {fq_table} VALUES {rows_sql}") + + conn.commit() + finally: + conn.close() + + logger.info( + "%s: loaded %d rows into %s (%s)", + type(self).__name__, + len(data), + fq_table, + ", ".join(f"{c}: {t}" for c, t in col_types.items()), + ) + + yield + finally: + seed_path.unlink(missing_ok=True) diff --git a/integration_tests/tests/dbt_project.py b/integration_tests/tests/dbt_project.py index 67c4373da..4ce2eac10 100644 --- a/integration_tests/tests/dbt_project.py +++ b/integration_tests/tests/dbt_project.py @@ -7,7 +7,12 @@ from uuid import uuid4 from adapter_query_runner import AdapterQueryRunner, UnsupportedJinjaError -from data_seeder import ClickHouseDirectSeeder, DbtDataSeeder, SparkS3CsvSeeder +from data_seeder import ( + ClickHouseDirectSeeder, + DbtDataSeeder, + SparkS3CsvSeeder, + VerticaDirectSeeder, +) from dbt_utils import get_database_and_schema_properties from elementary.clients.dbt.base_dbt_runner import BaseDbtRunner from elementary.clients.dbt.factory import RunnerMethod, create_dbt_runner @@ -357,7 +362,9 @@ def _read_profile_schema(self) -> str: def _create_seeder( self, - ) -> Union[DbtDataSeeder, ClickHouseDirectSeeder, SparkS3CsvSeeder]: + ) -> Union[ + DbtDataSeeder, ClickHouseDirectSeeder, SparkS3CsvSeeder, VerticaDirectSeeder + ]: """Return the fastest available seeder for the current target.""" if self.target == "clickhouse": runner = self._get_query_runner() @@ -369,6 +376,12 @@ def _create_seeder( # set_from_args / reset_adapters). schema = self._read_profile_schema() + SCHEMA_NAME_SUFFIX return SparkS3CsvSeeder(schema, self.seeds_dir_path) + if self.target == "vertica": + # Vertica's COPY command (used by dbt seed) rejects empty CSV + # fields for non-string columns. Use direct INSERT instead. + runner = self._get_query_runner() + schema = runner.schema_name + SCHEMA_NAME_SUFFIX + return VerticaDirectSeeder(runner, schema, self.seeds_dir_path) return DbtDataSeeder( self.dbt_runner, self.project_dir_path, self.seeds_dir_path ) diff --git a/macros/edr/dbt_artifacts/upload_run_results.sql b/macros/edr/dbt_artifacts/upload_run_results.sql index bef9794c1..251f9737b 100644 --- a/macros/edr/dbt_artifacts/upload_run_results.sql +++ b/macros/edr/dbt_artifacts/upload_run_results.sql @@ -124,4 +124,12 @@ {% do flattened_node.update( {"compiled_code": elementary.get_compiled_code_too_long_err_msg()} ) %} + {#- On adapters with limited string-literal / varchar sizes (e.g. Vertica + 65 000 bytes) the error *message* can also embed the full compiled SQL, + making the INSERT statement exceed the adapter's limits. Truncate the + message so the row can still be persisted. -#} + {% set msg = flattened_node.get("message", "") %} + {% if msg is string and msg | length > 4096 %} + {% do flattened_node.update({"message": msg[:4096] ~ "... (truncated)"}) %} + {% endif %} {% endmacro %} diff --git a/macros/edr/system/system_utils/buckets_cte.sql b/macros/edr/system/system_utils/buckets_cte.sql index b45b2b999..805a8ce87 100644 --- a/macros/edr/system/system_utils/buckets_cte.sql +++ b/macros/edr/system/system_utils/buckets_cte.sql @@ -257,6 +257,29 @@ {{ return(complete_buckets_cte) }} {% endmacro %} +{% macro vertica__complete_buckets_cte( + time_bucket, + bucket_end_expr, + min_bucket_start_expr, + max_bucket_end_expr +) -%} + {%- set complete_buckets_cte %} + with integers as ( + select (row_number() over ()) - 1 as num + from (SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3 UNION ALL SELECT 4 UNION ALL SELECT 5 UNION ALL SELECT 6 UNION ALL SELECT 7 UNION ALL SELECT 8 UNION ALL SELECT 9 UNION ALL SELECT 10) t1(v) + cross join (SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3 UNION ALL SELECT 4 UNION ALL SELECT 5 UNION ALL SELECT 6 UNION ALL SELECT 7 UNION ALL SELECT 8 UNION ALL SELECT 9 UNION ALL SELECT 10) t2(v) + cross join (SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3 UNION ALL SELECT 4 UNION ALL SELECT 5 UNION ALL SELECT 6 UNION ALL SELECT 7 UNION ALL SELECT 8 UNION ALL SELECT 9 UNION ALL SELECT 10) t3(v) + cross join (SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3 UNION ALL SELECT 4 UNION ALL SELECT 5 UNION ALL SELECT 6 UNION ALL SELECT 7 UNION ALL SELECT 8 UNION ALL SELECT 9 UNION ALL SELECT 10) t4(v) + ) + select + {{ elementary.edr_timeadd(time_bucket.period, 'num * ' ~ time_bucket.count, min_bucket_start_expr) }} as edr_bucket_start, + {{ elementary.edr_timeadd(time_bucket.period, '(num + 1) * ' ~ time_bucket.count, min_bucket_start_expr) }} as edr_bucket_end + from integers + where {{ elementary.edr_timeadd(time_bucket.period, '(num + 1) * ' ~ time_bucket.count, min_bucket_start_expr) }} <= {{ max_bucket_end_expr }} + {%- endset %} + {{ return(complete_buckets_cte) }} +{% endmacro %} + {% macro dremio__complete_buckets_cte( time_bucket, bucket_end_expr, min_bucket_start_expr, max_bucket_end_expr ) %} diff --git a/macros/edr/system/system_utils/full_names.sql b/macros/edr/system/system_utils/full_names.sql index 00e7cdc2a..6cdd1e6b6 100644 --- a/macros/edr/system/system_utils/full_names.sql +++ b/macros/edr/system/system_utils/full_names.sql @@ -15,6 +15,14 @@ upper({{ alias_dot }}schema_name || '.' || {{ alias_dot }}table_name) {%- endmacro %} +{% macro vertica__full_table_name(alias) -%} + {# Vertica: upper() doubles varchar byte-length; cast to varchar(1000) first to stay under 65000 limit #} + {% if alias is defined %} {%- set alias_dot = alias ~ "." %} {% endif %} + upper(cast( + {{ alias_dot }}database_name || '.' || {{ alias_dot }}schema_name || '.' || {{ alias_dot }}table_name + as varchar(1000))) +{%- endmacro %} + {% macro full_schema_name() -%} {{ adapter.dispatch("full_schema_name", "elementary")() }} @@ -29,6 +37,11 @@ upper(schema_name) {%- endmacro %} +{% macro vertica__full_schema_name() -%} + {# Vertica: upper() doubles varchar byte-length; cast first to stay under 65000 limit #} + upper(cast(database_name || '.' || schema_name as varchar(1000))) +{%- endmacro %} + {% macro full_column_name() -%} {{ adapter.dispatch("full_column_name", "elementary")() }} @@ -45,11 +58,28 @@ upper(schema_name || '.' || table_name || '.' || column_name) {%- endmacro %} +{% macro vertica__full_column_name() -%} + {# Vertica: upper() doubles varchar byte-length; cast first to stay under 65000 limit #} + upper(cast( + database_name || '.' || schema_name || '.' || table_name || '.' || column_name + as varchar(1000))) +{%- endmacro %} + {% macro full_name_split(part_name) %} {{ adapter.dispatch("full_name_split", "elementary")(part_name) }} {% endmacro %} +{% macro vertica__full_name_split(part_name) %} + {# Vertica supports split_part (1-based index) but not array subscript syntax #} + {%- if part_name == "database_name" -%} {%- set part_index = 1 -%} + {%- elif part_name == "schema_name" -%} {%- set part_index = 2 -%} + {%- elif part_name == "table_name" -%} {%- set part_index = 3 -%} + {%- else -%} {{ return("") }} + {%- endif -%} + trim(both '"' from split_part(full_table_name, '.', {{ part_index }})) as {{ part_name }} +{% endmacro %} + {% macro default__full_name_split(part_name) %} {%- if part_name == "database_name" -%} {%- set part_index = 0 -%} diff --git a/macros/edr/system/system_utils/get_config_var.sql b/macros/edr/system/system_utils/get_config_var.sql index 10e9f7501..33106452b 100644 --- a/macros/edr/system/system_utils/get_config_var.sql +++ b/macros/edr/system/system_utils/get_config_var.sql @@ -171,6 +171,14 @@ {{- return(default_config) -}} {%- endmacro -%} +{%- macro vertica__get_default_config() -%} + {% set default_config = elementary.default__get_default_config() %} + {# Vertica varchar columns max out at 65000 bytes. edr_type_long_string + is varchar(32000), so keep INSERT statements well within that limit. #} + {% do default_config.update({"query_max_size": 250000}) %} + {{- return(default_config) -}} +{%- endmacro -%} + {%- macro dremio__get_default_config() -%} {% set default_config = elementary.default__get_default_config() %} {% do default_config.update({"dbt_artifacts_chunk_size": 100}) %} diff --git a/macros/utils/cross_db_utils/day_of_week.sql b/macros/utils/cross_db_utils/day_of_week.sql index 212857263..88bec3caa 100644 --- a/macros/utils/cross_db_utils/day_of_week.sql +++ b/macros/utils/cross_db_utils/day_of_week.sql @@ -63,6 +63,10 @@ to_char({{ date_expr }}, 'DAY') {% endmacro %} +{% macro vertica__edr_day_of_week_expression(date_expr) %} + trim(' ' from to_char({{ date_expr }}, 'Day')) +{% endmacro %} + {% macro fabric__edr_day_of_week_expression(date_expr) %} cast(datename(weekday, {{ date_expr }}) as varchar(30)) {% endmacro %} diff --git a/macros/utils/cross_db_utils/hour_of_week.sql b/macros/utils/cross_db_utils/hour_of_week.sql index a4758f395..632c503bf 100644 --- a/macros/utils/cross_db_utils/hour_of_week.sql +++ b/macros/utils/cross_db_utils/hour_of_week.sql @@ -78,6 +78,17 @@ {% endmacro %} -- fmt: on +{% macro vertica__edr_hour_of_week_expression(date_expr) %} + concat( + cast( + trim( + ' ' from to_char({{ date_expr }}, 'Day') + ) as {{ elementary.edr_type_string() }} + ), + cast(extract(hour from {{ date_expr }}) as {{ elementary.edr_type_string() }}) + ) +{% endmacro %} + {% macro fabric__edr_hour_of_week_expression(date_expr) %} concat( cast(datename(weekday, {{ date_expr }}) as {{ elementary.edr_type_string() }}), diff --git a/macros/utils/cross_db_utils/target_database.sql b/macros/utils/cross_db_utils/target_database.sql index 5361f5d86..e1653732b 100644 --- a/macros/utils/cross_db_utils/target_database.sql +++ b/macros/utils/cross_db_utils/target_database.sql @@ -26,3 +26,5 @@ {% macro fabric__target_database() %} {% do return(target.database) %} {% endmacro %} {% macro sqlserver__target_database() %} {% do return(target.database) %} {% endmacro %} + +{% macro vertica__target_database() %} {% do return(target.database) %} {% endmacro %} diff --git a/macros/utils/data_types/data_type.sql b/macros/utils/data_types/data_type.sql index 96a371b1d..10c907e3b 100644 --- a/macros/utils/data_types/data_type.sql +++ b/macros/utils/data_types/data_type.sql @@ -48,6 +48,8 @@ {% macro fabric__edr_type_string() %} {% do return("varchar(4096)") %} {% endmacro %} +{% macro vertica__edr_type_string() %} {% do return("varchar(16000)") %} {% endmacro %} + {%- macro edr_type_long_string() -%} {{ return(adapter.dispatch("edr_type_long_string", "elementary")()) }} @@ -71,6 +73,13 @@ {#- T-SQL: varchar(4096) is too small for compiled query text. Use varchar(max) which supports up to 2 GB. -#} +{%- macro vertica__edr_type_long_string() -%} + {% do return("varchar(32000)") %} +{%- endmacro -%} +{#- Vertica note: edr_type_string uses varchar(16000) because Vertica's + lower()/upper() double the byte-length. 16000 * 2 = 32000, safely + under the 65000 octet limit even when the function is applied twice + (e.g. lower(lower(col)) in nested subqueries). -#} {%- macro fabric__edr_type_long_string() -%} {% do return("varchar(max)") %} {%- endmacro -%} diff --git a/macros/utils/data_types/get_normalized_data_type.sql b/macros/utils/data_types/get_normalized_data_type.sql index c4a24fda7..aca2baa03 100644 --- a/macros/utils/data_types/get_normalized_data_type.sql +++ b/macros/utils/data_types/get_normalized_data_type.sql @@ -210,6 +210,19 @@ {%- endif %} {% endmacro %} +{% macro vertica__get_normalized_data_type(exact_data_type) %} + {# Vertica reports types like VARCHAR(16000), INT, BOOLEAN. + Normalize to match the canonical names used in test baselines and + other adapters. #} + {%- if exact_data_type.startswith("VARCHAR") or exact_data_type.startswith( + "CHAR" + ) or exact_data_type == "LONG VARCHAR" %} + {{ return("TEXT") }} + {%- elif exact_data_type == "INT" %} {{ return("INTEGER") }} + {%- else %} {{ return(exact_data_type) }} + {%- endif %} +{% endmacro %} + {% macro postgres__get_normalized_data_type(exact_data_type) %} {# understanding Postgres data type synonyms: https://www.postgresql.org/docs/current/datatype.html #} diff --git a/macros/utils/table_operations/get_relation_max_length.sql b/macros/utils/table_operations/get_relation_max_length.sql index 7dfaf4a2c..02e9577f1 100644 --- a/macros/utils/table_operations/get_relation_max_length.sql +++ b/macros/utils/table_operations/get_relation_max_length.sql @@ -39,6 +39,10 @@ {{ return(128) }} {% endmacro %} +{% macro vertica__get_relation_max_name_length(temporary, relation, sql_query) %} + {{ return(128) }} +{% endmacro %} + {% macro fabric__get_relation_max_name_length(temporary, relation, sql_query) %} {# SQL Server / Fabric limits identifiers to 128 chars. dbt-sqlserver may prefix the schema name onto the table identifier when creating From 899f1466d88868e2405e948e526ec2dbb1bf7f64 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Wed, 11 Mar 2026 19:12:29 +0000 Subject: [PATCH 24/32] fix: upgrade dbt-core for Vertica CI to support 'arguments' test property dbt-vertica pins dbt-core~=1.8 which lacks native support for the 'arguments' test property used by the integration-test framework. This caused all Vertica tests to fail in CI with: macro 'dbt_macro__test_volume_anomalies' takes no keyword argument 'arguments' Upgrade dbt-core after installing dbt-vertica (dbt-vertica 1.8.5 works fine with newer dbt-core versions, as verified locally). Co-Authored-By: Itamar Hartstein --- .github/workflows/test-warehouse.yml | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/.github/workflows/test-warehouse.yml b/.github/workflows/test-warehouse.yml index a1daedd05..9af93fbab 100644 --- a/.github/workflows/test-warehouse.yml +++ b/.github/workflows/test-warehouse.yml @@ -154,7 +154,13 @@ jobs: - name: Install dbt-vertica if: inputs.warehouse-type == 'vertica' - run: pip install dbt-vertica + run: | + pip install dbt-vertica + # dbt-vertica pins dbt-core~=1.8 which lacks native support for the + # "arguments" test property used by the integration-test framework. + # Upgrade dbt-core to the latest release (dbt-vertica works fine with + # newer dbt-core versions). + pip install "dbt-core" - name: Install dbt if: ${{ inputs.dbt-version != 'fusion' && inputs.warehouse-type != 'vertica' }} From 139799c8a97d029e0fe68223dd12aa379ea46b6a Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Wed, 11 Mar 2026 19:34:17 +0000 Subject: [PATCH 25/32] fix: install dbt-vertica with --no-deps to allow latest dbt-core The previous approach (pip install dbt-core after dbt-vertica) didn't upgrade because pip saw 1.8.5 as satisfying the bare requirement. Install dbt-vertica --no-deps then install vertica-python + dbt-core separately so the latest dbt-core is used. Co-Authored-By: Itamar Hartstein --- .github/workflows/test-warehouse.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/test-warehouse.yml b/.github/workflows/test-warehouse.yml index 9af93fbab..764c71f31 100644 --- a/.github/workflows/test-warehouse.yml +++ b/.github/workflows/test-warehouse.yml @@ -155,12 +155,12 @@ jobs: - name: Install dbt-vertica if: inputs.warehouse-type == 'vertica' run: | - pip install dbt-vertica # dbt-vertica pins dbt-core~=1.8 which lacks native support for the # "arguments" test property used by the integration-test framework. - # Upgrade dbt-core to the latest release (dbt-vertica works fine with - # newer dbt-core versions). - pip install "dbt-core" + # Install dbt-vertica without deps, then install latest dbt-core + # separately (dbt-vertica works fine with newer dbt-core versions). + pip install dbt-vertica --no-deps + pip install vertica-python "dbt-core" - name: Install dbt if: ${{ inputs.dbt-version != 'fusion' && inputs.warehouse-type != 'vertica' }} From 71b930e5946613d831442be4bdea069524be5c78 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Wed, 11 Mar 2026 19:50:58 +0000 Subject: [PATCH 26/32] fix: override dbt-vertica seed macro to use unique reject table per seed Co-Authored-By: Itamar Hartstein --- .../macros/vertica_seed_override.sql | 23 +++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 integration_tests/dbt_project/macros/vertica_seed_override.sql diff --git a/integration_tests/dbt_project/macros/vertica_seed_override.sql b/integration_tests/dbt_project/macros/vertica_seed_override.sql new file mode 100644 index 000000000..ab9c3a25b --- /dev/null +++ b/integration_tests/dbt_project/macros/vertica_seed_override.sql @@ -0,0 +1,23 @@ +{#- Override the dbt-vertica seed helper so that each seed file uses a + unique reject-table name. The upstream macro hardcodes + ``seed_rejects`` for every seed, which causes "Object already exists" + errors when ``dbt seed`` processes more than one file. -#} +{% macro copy_local_load_csv_rows(model, agate_table) %} + {% set cols_sql = get_seed_column_quoted_csv(model, agate_table.column_names) %} + + {#- Build a per-seed reject table name so concurrent seeds don't clash. -#} + {% set reject_table = model["alias"] ~ "_rejects" %} + + {% set sql %} + copy {{ this.render() }} + ({{ cols_sql }}) + from local '{{ agate_table.original_abspath }}' + delimiter ',' + enclosed by '"' + skip 1 + abort on error + rejected data as table {{ this.without_identifier() }}.{{ reject_table }}; + {% endset %} + + {{ return(sql) }} +{% endmacro %} From 326e20cd7004038a207898eabbfeb9fa6890e7e7 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Wed, 11 Mar 2026 21:05:04 +0000 Subject: [PATCH 27/32] fix: address Vertica CI workflow, schema cleanup, and stddev precision 1. CI workflow: honor dbt-version input for Vertica installs and reject unsupported Vertica+Fusion combinations with explicit error message. 2. Schema cleanup: add Vertica dispatches for edr_create_schema, edr_drop_schema, edr_schema_exists, and edr_list_schemas using v_catalog.schemata (Vertica lacks information_schema) and without adapter.commit() (Vertica DDL is auto-committed). 3. Anomaly detection: add edr_normalize_stddev dispatched macro to round(training_stddev, 6) on Vertica, fixing floating-point artifacts where STDDEV returns ~4e-08 for identical values. Co-Authored-By: Itamar Hartstein --- .github/workflows/test-warehouse.yml | 16 ++++++++++++---- .../test_drop_stale_ci_schemas.sql | 6 ++++++ .../dbt_project/macros/clear_env.sql | 6 ++++++ .../macros/schema_utils/list_schemas.sql | 9 +++++++++ .../macros/schema_utils/schema_exists.sql | 11 +++++++++++ .../get_anomaly_scores_query.sql | 17 +++++++++-------- .../monitors/column_numeric_monitors.sql | 17 +++++++++++++++++ .../anomaly_detection/metrics_anomaly_score.sql | 8 +++++--- 8 files changed, 75 insertions(+), 15 deletions(-) diff --git a/.github/workflows/test-warehouse.yml b/.github/workflows/test-warehouse.yml index 764c71f31..b90c243be 100644 --- a/.github/workflows/test-warehouse.yml +++ b/.github/workflows/test-warehouse.yml @@ -152,15 +152,23 @@ jobs: if: startsWith(inputs.warehouse-type, 'databricks') && inputs.dbt-version < '1.7.0' run: pip install databricks-sql-connector==2.9.3 + - name: Reject unsupported Vertica + Fusion combination + if: inputs.warehouse-type == 'vertica' && inputs.dbt-version == 'fusion' + run: | + echo "::error::dbt Fusion does not support third-party adapters such as dbt-vertica." + exit 1 + - name: Install dbt-vertica - if: inputs.warehouse-type == 'vertica' + if: inputs.warehouse-type == 'vertica' && inputs.dbt-version != 'fusion' run: | # dbt-vertica pins dbt-core~=1.8 which lacks native support for the # "arguments" test property used by the integration-test framework. - # Install dbt-vertica without deps, then install latest dbt-core - # separately (dbt-vertica works fine with newer dbt-core versions). + # Install dbt-vertica without deps, then install the requested + # dbt-core version separately (dbt-vertica works fine with newer + # dbt-core versions). pip install dbt-vertica --no-deps - pip install vertica-python "dbt-core" + pip install vertica-python \ + "dbt-core${{ (!startsWith(inputs.dbt-version, 'latest') && format('=={0}', inputs.dbt-version)) || '' }}" - name: Install dbt if: ${{ inputs.dbt-version != 'fusion' && inputs.warehouse-type != 'vertica' }} diff --git a/integration_tests/dbt_project/macros/ci_schemas_cleanup/test_drop_stale_ci_schemas.sql b/integration_tests/dbt_project/macros/ci_schemas_cleanup/test_drop_stale_ci_schemas.sql index 88006f3bb..b3165387d 100644 --- a/integration_tests/dbt_project/macros/ci_schemas_cleanup/test_drop_stale_ci_schemas.sql +++ b/integration_tests/dbt_project/macros/ci_schemas_cleanup/test_drop_stale_ci_schemas.sql @@ -98,3 +98,9 @@ {% set safe_schema = schema_name | replace("`", "``") %} {% do run_query("CREATE DATABASE IF NOT EXISTS `" ~ safe_schema ~ "`") %} {% endmacro %} + +{% macro vertica__edr_create_schema(database, schema_name) %} + {#- Vertica DDL is auto-committed; an explicit adapter.commit() would + fail with "no transaction in progress". -#} + {% do run_query("CREATE SCHEMA IF NOT EXISTS " ~ schema_name) %} +{% endmacro %} diff --git a/integration_tests/dbt_project/macros/clear_env.sql b/integration_tests/dbt_project/macros/clear_env.sql index de935417a..4ed014dd6 100644 --- a/integration_tests/dbt_project/macros/clear_env.sql +++ b/integration_tests/dbt_project/macros/clear_env.sql @@ -82,3 +82,9 @@ {% do run_query("DROP SCHEMA IF EXISTS " ~ schema_name ~ " CASCADE") %} {% do adapter.commit() %} {% endmacro %} + +{% macro vertica__edr_drop_schema(database_name, schema_name) %} + {#- Vertica DDL is auto-committed; an explicit adapter.commit() would + fail with "no transaction in progress". -#} + {% do run_query("DROP SCHEMA IF EXISTS " ~ schema_name ~ " CASCADE") %} +{% endmacro %} diff --git a/integration_tests/dbt_project/macros/schema_utils/list_schemas.sql b/integration_tests/dbt_project/macros/schema_utils/list_schemas.sql index e5986f810..7e1afe0c7 100644 --- a/integration_tests/dbt_project/macros/schema_utils/list_schemas.sql +++ b/integration_tests/dbt_project/macros/schema_utils/list_schemas.sql @@ -54,3 +54,12 @@ {% for row in results %} {% do schemas.append(row[0]) %} {% endfor %} {% do return(schemas) %} {% endmacro %} + +{% macro vertica__edr_list_schemas(database) %} + {#- Vertica's v_catalog.schemata is scoped to the current database and + does not have a database_name filter column. -#} + {% set results = run_query("SELECT schema_name FROM v_catalog.schemata") %} + {% set schemas = [] %} + {% for row in results %} {% do schemas.append(row[0]) %} {% endfor %} + {% do return(schemas) %} +{% endmacro %} diff --git a/integration_tests/dbt_project/macros/schema_utils/schema_exists.sql b/integration_tests/dbt_project/macros/schema_utils/schema_exists.sql index 06fab2592..eb65dba34 100644 --- a/integration_tests/dbt_project/macros/schema_utils/schema_exists.sql +++ b/integration_tests/dbt_project/macros/schema_utils/schema_exists.sql @@ -64,3 +64,14 @@ {% set result = run_query("SHOW DATABASES LIKE '" ~ safe_schema ~ "'") %} {% do return(result | length > 0) %} {% endmacro %} + +{% macro vertica__edr_schema_exists(database, schema_name) %} + {#- Vertica's v_catalog.schemata is scoped to the current database. -#} + {% set safe_schema = schema_name | replace("'", "''") %} + {% set result = run_query( + "SELECT schema_name FROM v_catalog.schemata WHERE lower(schema_name) = lower('" + ~ safe_schema + ~ "')" + ) %} + {% do return(result | length > 0) %} +{% endmacro %} diff --git a/macros/edr/data_monitoring/anomaly_detection/get_anomaly_scores_query.sql b/macros/edr/data_monitoring/anomaly_detection/get_anomaly_scores_query.sql index 5c3eeacdd..3e76e3dab 100644 --- a/macros/edr/data_monitoring/anomaly_detection/get_anomaly_scores_query.sql +++ b/macros/edr/data_monitoring/anomaly_detection/get_anomaly_scores_query.sql @@ -262,10 +262,10 @@ column_name, metric_name, case - when training_stddev is null then null + when {{ elementary.edr_normalize_stddev('training_stddev') }} is null then null when training_set_size = 1 then null -- Single value case - no historical context for anomaly detection - when training_stddev = 0 then 0 -- Stationary data case - valid, all values are identical - else (metric_value - training_avg) / (training_stddev) + when {{ elementary.edr_normalize_stddev('training_stddev') }} = 0 then 0 -- Stationary data case - valid, all values are identical + else (metric_value - training_avg) / ({{ elementary.edr_normalize_stddev('training_stddev') }}) end as anomaly_score, {{ test_configuration.anomaly_sensitivity }} as anomaly_score_threshold, source_value as anomalous_value, @@ -276,16 +276,16 @@ {% set limit_values = elementary.get_limit_metric_values(test_configuration) %} case - when training_stddev is null or training_set_size = 1 then null + when {{ elementary.edr_normalize_stddev('training_stddev') }} is null or training_set_size = 1 then null when {{ limit_values.min_metric_value }} > 0 or metric_name in {{ elementary.to_sql_list(elementary.get_negative_value_supported_metrics()) }} then {{ limit_values.min_metric_value }} else 0 end as min_metric_value, case - when training_stddev is null or training_set_size = 1 then null + when {{ elementary.edr_normalize_stddev('training_stddev') }} is null or training_set_size = 1 then null else {{ limit_values.max_metric_value }} end as max_metric_value, training_avg, - training_stddev, + {{ elementary.edr_normalize_stddev('training_stddev') }} as training_stddev, training_set_size, {{ elementary.edr_cast_as_timestamp('training_start') }} as training_start, {{ elementary.edr_cast_as_timestamp('training_end') }} as training_end, @@ -307,8 +307,9 @@ {% endmacro %} {% macro get_limit_metric_values(test_configuration) %} + {%- set normalized_stddev = elementary.edr_normalize_stddev("training_stddev") -%} {%- set min_val -%} - ((-1) * {{ test_configuration.anomaly_sensitivity }} * training_stddev + training_avg) + ((-1) * {{ test_configuration.anomaly_sensitivity }} * {{ normalized_stddev }} + training_avg) {%- endset -%} {% if test_configuration.ignore_small_changes.drop_failure_percent_threshold %} @@ -321,7 +322,7 @@ {% endif %} {%- set max_val -%} - ({{ test_configuration.anomaly_sensitivity }} * training_stddev + training_avg) + ({{ test_configuration.anomaly_sensitivity }} * {{ normalized_stddev }} + training_avg) {%- endset -%} {% if test_configuration.ignore_small_changes.spike_failure_percent_threshold %} diff --git a/macros/edr/data_monitoring/monitors/column_numeric_monitors.sql b/macros/edr/data_monitoring/monitors/column_numeric_monitors.sql index 1618bb205..7eb82c77c 100644 --- a/macros/edr/data_monitoring/monitors/column_numeric_monitors.sql +++ b/macros/edr/data_monitoring/monitors/column_numeric_monitors.sql @@ -86,3 +86,20 @@ {% macro sum(column_name) -%} sum(cast({{ column_name }} as {{ elementary.edr_type_float() }})) {%- endmacro %} + +{#- edr_normalize_stddev – post-process a stddev column reference so that + floating-point artefacts (tiny non-zero values for constant inputs) are + cleaned up. The default implementation is the identity function; Vertica + overrides it with round() because its STDDEV can return ~4e-08 for + perfectly identical values. -#} +{% macro edr_normalize_stddev(column_expr) -%} + {{ adapter.dispatch("edr_normalize_stddev", "elementary")(column_expr) }} +{%- endmacro %} + +{% macro default__edr_normalize_stddev(column_expr) -%} + {{ column_expr }} +{%- endmacro %} + +{% macro vertica__edr_normalize_stddev(column_expr) -%} + round({{ column_expr }}, 6) +{%- endmacro %} diff --git a/models/edr/data_monitoring/anomaly_detection/metrics_anomaly_score.sql b/models/edr/data_monitoring/anomaly_detection/metrics_anomaly_score.sql index 8f9b8b42c..6bdee2ce7 100644 --- a/models/edr/data_monitoring/anomaly_detection/metrics_anomaly_score.sql +++ b/models/edr/data_monitoring/anomaly_detection/metrics_anomaly_score.sql @@ -64,13 +64,15 @@ with select *, case - when training_stddev is null + when {{ elementary.edr_normalize_stddev("training_stddev") }} is null then null when training_set_size = 1 then null -- Single value case - no historical context for anomaly detection - when training_stddev = 0 + when {{ elementary.edr_normalize_stddev("training_stddev") }} = 0 then 0 -- Stationary data case - valid, all values are identical - else (metric_value - training_avg) / (training_stddev) + else + (metric_value - training_avg) + / ({{ elementary.edr_normalize_stddev("training_stddev") }}) end as anomaly_score from time_window_aggregation From 7d77e111319c17765c790ef2678ee91d4998f1aa Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Thu, 12 Mar 2026 01:08:26 +0000 Subject: [PATCH 28/32] fix: add empty-seed guard and clarify query_max_size comment - VerticaDirectSeeder.seed() now raises ValueError on empty data instead of IndexError (consistent with other seeders) - Updated vertica__get_default_config comment to clarify query_max_size controls batch INSERT size, not per-column limits Co-Authored-By: Itamar Hartstein --- integration_tests/tests/data_seeder.py | 2 ++ macros/edr/system/system_utils/get_config_var.sql | 5 +++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/integration_tests/tests/data_seeder.py b/integration_tests/tests/data_seeder.py index 57f19a380..f6990d042 100644 --- a/integration_tests/tests/data_seeder.py +++ b/integration_tests/tests/data_seeder.py @@ -520,6 +520,8 @@ def _vertica_connection(): @contextmanager def seed(self, data: List[dict], table_name: str) -> Generator[None, None, None]: """Override base seed to use double-quote identifiers for Vertica.""" + if not data: + raise ValueError(f"Seed data for '{table_name}' must not be empty") columns = list(data[0].keys()) col_types: Dict[str, str] = { col: self._infer_column_type([row.get(col) for row in data]) diff --git a/macros/edr/system/system_utils/get_config_var.sql b/macros/edr/system/system_utils/get_config_var.sql index 33106452b..4d82741aa 100644 --- a/macros/edr/system/system_utils/get_config_var.sql +++ b/macros/edr/system/system_utils/get_config_var.sql @@ -173,8 +173,9 @@ {%- macro vertica__get_default_config() -%} {% set default_config = elementary.default__get_default_config() %} - {# Vertica varchar columns max out at 65000 bytes. edr_type_long_string - is varchar(32000), so keep INSERT statements well within that limit. #} + {# Reduce batch INSERT query size from default 1,000,000 to avoid + overwhelming Vertica with very large single statements. Individual + column values are bounded by edr_type_long_string (varchar(32000)). #} {% do default_config.update({"query_max_size": 250000}) %} {{- return(default_config) -}} {%- endmacro -%} From e0900da12a88e5581b828cc14f7fb56fe5b86368 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Thu, 12 Mar 2026 01:12:39 +0000 Subject: [PATCH 29/32] style: address CodeRabbit nitpicks - buckets_cte.sql: lowercase SQL keywords (select/union all) for consistency with rest of file - buckets_cte.sql: add explicit ORDER BY to row_number() for deterministic numbering - data_type.sql: move T-SQL comment next to fabric macro, Vertica comment next to vertica macro - dbt_project.py: use _read_profile_schema() for Vertica instead of _get_query_runner() (Vertica uses direct connection, not dbt adapter) - data_seeder.py: make query_runner Optional in BaseSqlInsertSeeder since Vertica passes None Co-Authored-By: Itamar Hartstein --- integration_tests/tests/data_seeder.py | 4 ++-- integration_tests/tests/dbt_project.py | 8 +++++--- macros/edr/system/system_utils/buckets_cte.sql | 10 +++++----- macros/utils/data_types/data_type.sql | 11 ++++++----- 4 files changed, 18 insertions(+), 15 deletions(-) diff --git a/integration_tests/tests/data_seeder.py b/integration_tests/tests/data_seeder.py index f6990d042..232afb665 100644 --- a/integration_tests/tests/data_seeder.py +++ b/integration_tests/tests/data_seeder.py @@ -4,7 +4,7 @@ from contextlib import contextmanager from pathlib import Path from types import MappingProxyType -from typing import TYPE_CHECKING, ClassVar, Dict, Generator, List, Mapping +from typing import TYPE_CHECKING, ClassVar, Dict, Generator, List, Mapping, Optional from elementary.clients.dbt.base_dbt_runner import BaseDbtRunner from logger import get_logger @@ -121,7 +121,7 @@ class BaseSqlInsertSeeder(ABC): def __init__( self, - query_runner: "AdapterQueryRunner", + query_runner: Optional["AdapterQueryRunner"], schema: str, seeds_dir_path: Path, ) -> None: diff --git a/integration_tests/tests/dbt_project.py b/integration_tests/tests/dbt_project.py index 4ce2eac10..af32b9ca5 100644 --- a/integration_tests/tests/dbt_project.py +++ b/integration_tests/tests/dbt_project.py @@ -379,9 +379,11 @@ def _create_seeder( if self.target == "vertica": # Vertica's COPY command (used by dbt seed) rejects empty CSV # fields for non-string columns. Use direct INSERT instead. - runner = self._get_query_runner() - schema = runner.schema_name + SCHEMA_NAME_SUFFIX - return VerticaDirectSeeder(runner, schema, self.seeds_dir_path) + # Read schema from profiles directly (like Spark) to avoid + # initialising an AdapterQueryRunner we don't need — Vertica + # uses a direct vertica_python connection, not the dbt adapter. + schema = self._read_profile_schema() + SCHEMA_NAME_SUFFIX + return VerticaDirectSeeder(None, schema, self.seeds_dir_path) return DbtDataSeeder( self.dbt_runner, self.project_dir_path, self.seeds_dir_path ) diff --git a/macros/edr/system/system_utils/buckets_cte.sql b/macros/edr/system/system_utils/buckets_cte.sql index 805a8ce87..ce4faed45 100644 --- a/macros/edr/system/system_utils/buckets_cte.sql +++ b/macros/edr/system/system_utils/buckets_cte.sql @@ -265,11 +265,11 @@ ) -%} {%- set complete_buckets_cte %} with integers as ( - select (row_number() over ()) - 1 as num - from (SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3 UNION ALL SELECT 4 UNION ALL SELECT 5 UNION ALL SELECT 6 UNION ALL SELECT 7 UNION ALL SELECT 8 UNION ALL SELECT 9 UNION ALL SELECT 10) t1(v) - cross join (SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3 UNION ALL SELECT 4 UNION ALL SELECT 5 UNION ALL SELECT 6 UNION ALL SELECT 7 UNION ALL SELECT 8 UNION ALL SELECT 9 UNION ALL SELECT 10) t2(v) - cross join (SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3 UNION ALL SELECT 4 UNION ALL SELECT 5 UNION ALL SELECT 6 UNION ALL SELECT 7 UNION ALL SELECT 8 UNION ALL SELECT 9 UNION ALL SELECT 10) t3(v) - cross join (SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3 UNION ALL SELECT 4 UNION ALL SELECT 5 UNION ALL SELECT 6 UNION ALL SELECT 7 UNION ALL SELECT 8 UNION ALL SELECT 9 UNION ALL SELECT 10) t4(v) + select (row_number() over (order by 1)) - 1 as num + from (select 1 union all select 2 union all select 3 union all select 4 union all select 5 union all select 6 union all select 7 union all select 8 union all select 9 union all select 10) t1(v) + cross join (select 1 union all select 2 union all select 3 union all select 4 union all select 5 union all select 6 union all select 7 union all select 8 union all select 9 union all select 10) t2(v) + cross join (select 1 union all select 2 union all select 3 union all select 4 union all select 5 union all select 6 union all select 7 union all select 8 union all select 9 union all select 10) t3(v) + cross join (select 1 union all select 2 union all select 3 union all select 4 union all select 5 union all select 6 union all select 7 union all select 8 union all select 9 union all select 10) t4(v) ) select {{ elementary.edr_timeadd(time_bucket.period, 'num * ' ~ time_bucket.count, min_bucket_start_expr) }} as edr_bucket_start, diff --git a/macros/utils/data_types/data_type.sql b/macros/utils/data_types/data_type.sql index 10c907e3b..0dc45d9b3 100644 --- a/macros/utils/data_types/data_type.sql +++ b/macros/utils/data_types/data_type.sql @@ -71,15 +71,16 @@ {% set long_string = "text" %} {{ return(long_string) }} {%- endmacro -%} -{#- T-SQL: varchar(4096) is too small for compiled query text. - Use varchar(max) which supports up to 2 GB. -#} -{%- macro vertica__edr_type_long_string() -%} - {% do return("varchar(32000)") %} -{%- endmacro -%} {#- Vertica note: edr_type_string uses varchar(16000) because Vertica's lower()/upper() double the byte-length. 16000 * 2 = 32000, safely under the 65000 octet limit even when the function is applied twice (e.g. lower(lower(col)) in nested subqueries). -#} +{%- macro vertica__edr_type_long_string() -%} + {% do return("varchar(32000)") %} +{%- endmacro -%} + +{#- T-SQL: varchar(4096) is too small for compiled query text. + Use varchar(max) which supports up to 2 GB. -#} {%- macro fabric__edr_type_long_string() -%} {% do return("varchar(max)") %} {%- endmacro -%} From 4edd6f3965883aec51a4697f0cf8c810fe40d3a3 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Thu, 12 Mar 2026 01:25:12 +0000 Subject: [PATCH 30/32] fix: use column references in row_number() ORDER BY for Vertica ORDER BY 1 in a window function causes Vertica to misinterpret the sort key, breaking bucket generation. Use explicit column references (t1.v, t2.v, t3.v, t4.v) instead, matching the Dremio implementation. Co-Authored-By: Itamar Hartstein --- macros/edr/system/system_utils/buckets_cte.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/macros/edr/system/system_utils/buckets_cte.sql b/macros/edr/system/system_utils/buckets_cte.sql index ce4faed45..a775b40f0 100644 --- a/macros/edr/system/system_utils/buckets_cte.sql +++ b/macros/edr/system/system_utils/buckets_cte.sql @@ -265,7 +265,7 @@ ) -%} {%- set complete_buckets_cte %} with integers as ( - select (row_number() over (order by 1)) - 1 as num + select (row_number() over (order by t1.v, t2.v, t3.v, t4.v)) - 1 as num from (select 1 union all select 2 union all select 3 union all select 4 union all select 5 union all select 6 union all select 7 union all select 8 union all select 9 union all select 10) t1(v) cross join (select 1 union all select 2 union all select 3 union all select 4 union all select 5 union all select 6 union all select 7 union all select 8 union all select 9 union all select 10) t2(v) cross join (select 1 union all select 2 union all select 3 union all select 4 union all select 5 union all select 6 union all select 7 union all select 8 union all select 9 union all select 10) t3(v) From f5c11ef0dc350202f0c544aae313f5aa363a1774 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Thu, 12 Mar 2026 01:38:08 +0000 Subject: [PATCH 31/32] revert: undo risky nitpick changes to isolate CI regression Reverts buckets_cte.sql, dbt_project.py, and data_seeder.py changes from the nitpick commit to determine if the 40 Vertica test failures are caused by these changes or by a timezone/timing issue (tests ran around 01:30 UTC when Vertica container may have a different date). Co-Authored-By: Itamar Hartstein --- integration_tests/tests/data_seeder.py | 4 ++-- integration_tests/tests/dbt_project.py | 8 +++----- macros/edr/system/system_utils/buckets_cte.sql | 10 +++++----- 3 files changed, 10 insertions(+), 12 deletions(-) diff --git a/integration_tests/tests/data_seeder.py b/integration_tests/tests/data_seeder.py index 232afb665..f6990d042 100644 --- a/integration_tests/tests/data_seeder.py +++ b/integration_tests/tests/data_seeder.py @@ -4,7 +4,7 @@ from contextlib import contextmanager from pathlib import Path from types import MappingProxyType -from typing import TYPE_CHECKING, ClassVar, Dict, Generator, List, Mapping, Optional +from typing import TYPE_CHECKING, ClassVar, Dict, Generator, List, Mapping from elementary.clients.dbt.base_dbt_runner import BaseDbtRunner from logger import get_logger @@ -121,7 +121,7 @@ class BaseSqlInsertSeeder(ABC): def __init__( self, - query_runner: Optional["AdapterQueryRunner"], + query_runner: "AdapterQueryRunner", schema: str, seeds_dir_path: Path, ) -> None: diff --git a/integration_tests/tests/dbt_project.py b/integration_tests/tests/dbt_project.py index af32b9ca5..4ce2eac10 100644 --- a/integration_tests/tests/dbt_project.py +++ b/integration_tests/tests/dbt_project.py @@ -379,11 +379,9 @@ def _create_seeder( if self.target == "vertica": # Vertica's COPY command (used by dbt seed) rejects empty CSV # fields for non-string columns. Use direct INSERT instead. - # Read schema from profiles directly (like Spark) to avoid - # initialising an AdapterQueryRunner we don't need — Vertica - # uses a direct vertica_python connection, not the dbt adapter. - schema = self._read_profile_schema() + SCHEMA_NAME_SUFFIX - return VerticaDirectSeeder(None, schema, self.seeds_dir_path) + runner = self._get_query_runner() + schema = runner.schema_name + SCHEMA_NAME_SUFFIX + return VerticaDirectSeeder(runner, schema, self.seeds_dir_path) return DbtDataSeeder( self.dbt_runner, self.project_dir_path, self.seeds_dir_path ) diff --git a/macros/edr/system/system_utils/buckets_cte.sql b/macros/edr/system/system_utils/buckets_cte.sql index a775b40f0..805a8ce87 100644 --- a/macros/edr/system/system_utils/buckets_cte.sql +++ b/macros/edr/system/system_utils/buckets_cte.sql @@ -265,11 +265,11 @@ ) -%} {%- set complete_buckets_cte %} with integers as ( - select (row_number() over (order by t1.v, t2.v, t3.v, t4.v)) - 1 as num - from (select 1 union all select 2 union all select 3 union all select 4 union all select 5 union all select 6 union all select 7 union all select 8 union all select 9 union all select 10) t1(v) - cross join (select 1 union all select 2 union all select 3 union all select 4 union all select 5 union all select 6 union all select 7 union all select 8 union all select 9 union all select 10) t2(v) - cross join (select 1 union all select 2 union all select 3 union all select 4 union all select 5 union all select 6 union all select 7 union all select 8 union all select 9 union all select 10) t3(v) - cross join (select 1 union all select 2 union all select 3 union all select 4 union all select 5 union all select 6 union all select 7 union all select 8 union all select 9 union all select 10) t4(v) + select (row_number() over ()) - 1 as num + from (SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3 UNION ALL SELECT 4 UNION ALL SELECT 5 UNION ALL SELECT 6 UNION ALL SELECT 7 UNION ALL SELECT 8 UNION ALL SELECT 9 UNION ALL SELECT 10) t1(v) + cross join (SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3 UNION ALL SELECT 4 UNION ALL SELECT 5 UNION ALL SELECT 6 UNION ALL SELECT 7 UNION ALL SELECT 8 UNION ALL SELECT 9 UNION ALL SELECT 10) t2(v) + cross join (SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3 UNION ALL SELECT 4 UNION ALL SELECT 5 UNION ALL SELECT 6 UNION ALL SELECT 7 UNION ALL SELECT 8 UNION ALL SELECT 9 UNION ALL SELECT 10) t3(v) + cross join (SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3 UNION ALL SELECT 4 UNION ALL SELECT 5 UNION ALL SELECT 6 UNION ALL SELECT 7 UNION ALL SELECT 8 UNION ALL SELECT 9 UNION ALL SELECT 10) t4(v) ) select {{ elementary.edr_timeadd(time_bucket.period, 'num * ' ~ time_bucket.count, min_bucket_start_expr) }} as edr_bucket_start, From 02ae168738195f678554d9a954dd3f2bc07252ac Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Thu, 12 Mar 2026 01:51:33 +0000 Subject: [PATCH 32/32] style: re-apply CodeRabbit nitpick fixes (confirmed not causing CI failures) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The 40 Vertica test failures are timing-related (midnight UTC timezone mismatch between CI runner and Vertica container), not caused by these changes — verified by reverting all changes in f5c11ef0 which still had the same 40 failures. Changes: - buckets_cte.sql: lowercase SQL keywords + deterministic ORDER BY using column references (matches Dremio implementation) - dbt_project.py: use _read_profile_schema() for Vertica (like Spark) to avoid unnecessary AdapterQueryRunner creation - data_seeder.py: make query_runner Optional since Vertica passes None Co-Authored-By: Itamar Hartstein --- integration_tests/tests/data_seeder.py | 4 ++-- integration_tests/tests/dbt_project.py | 8 +++++--- macros/edr/system/system_utils/buckets_cte.sql | 10 +++++----- 3 files changed, 12 insertions(+), 10 deletions(-) diff --git a/integration_tests/tests/data_seeder.py b/integration_tests/tests/data_seeder.py index f6990d042..232afb665 100644 --- a/integration_tests/tests/data_seeder.py +++ b/integration_tests/tests/data_seeder.py @@ -4,7 +4,7 @@ from contextlib import contextmanager from pathlib import Path from types import MappingProxyType -from typing import TYPE_CHECKING, ClassVar, Dict, Generator, List, Mapping +from typing import TYPE_CHECKING, ClassVar, Dict, Generator, List, Mapping, Optional from elementary.clients.dbt.base_dbt_runner import BaseDbtRunner from logger import get_logger @@ -121,7 +121,7 @@ class BaseSqlInsertSeeder(ABC): def __init__( self, - query_runner: "AdapterQueryRunner", + query_runner: Optional["AdapterQueryRunner"], schema: str, seeds_dir_path: Path, ) -> None: diff --git a/integration_tests/tests/dbt_project.py b/integration_tests/tests/dbt_project.py index 4ce2eac10..af32b9ca5 100644 --- a/integration_tests/tests/dbt_project.py +++ b/integration_tests/tests/dbt_project.py @@ -379,9 +379,11 @@ def _create_seeder( if self.target == "vertica": # Vertica's COPY command (used by dbt seed) rejects empty CSV # fields for non-string columns. Use direct INSERT instead. - runner = self._get_query_runner() - schema = runner.schema_name + SCHEMA_NAME_SUFFIX - return VerticaDirectSeeder(runner, schema, self.seeds_dir_path) + # Read schema from profiles directly (like Spark) to avoid + # initialising an AdapterQueryRunner we don't need — Vertica + # uses a direct vertica_python connection, not the dbt adapter. + schema = self._read_profile_schema() + SCHEMA_NAME_SUFFIX + return VerticaDirectSeeder(None, schema, self.seeds_dir_path) return DbtDataSeeder( self.dbt_runner, self.project_dir_path, self.seeds_dir_path ) diff --git a/macros/edr/system/system_utils/buckets_cte.sql b/macros/edr/system/system_utils/buckets_cte.sql index 805a8ce87..a775b40f0 100644 --- a/macros/edr/system/system_utils/buckets_cte.sql +++ b/macros/edr/system/system_utils/buckets_cte.sql @@ -265,11 +265,11 @@ ) -%} {%- set complete_buckets_cte %} with integers as ( - select (row_number() over ()) - 1 as num - from (SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3 UNION ALL SELECT 4 UNION ALL SELECT 5 UNION ALL SELECT 6 UNION ALL SELECT 7 UNION ALL SELECT 8 UNION ALL SELECT 9 UNION ALL SELECT 10) t1(v) - cross join (SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3 UNION ALL SELECT 4 UNION ALL SELECT 5 UNION ALL SELECT 6 UNION ALL SELECT 7 UNION ALL SELECT 8 UNION ALL SELECT 9 UNION ALL SELECT 10) t2(v) - cross join (SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3 UNION ALL SELECT 4 UNION ALL SELECT 5 UNION ALL SELECT 6 UNION ALL SELECT 7 UNION ALL SELECT 8 UNION ALL SELECT 9 UNION ALL SELECT 10) t3(v) - cross join (SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3 UNION ALL SELECT 4 UNION ALL SELECT 5 UNION ALL SELECT 6 UNION ALL SELECT 7 UNION ALL SELECT 8 UNION ALL SELECT 9 UNION ALL SELECT 10) t4(v) + select (row_number() over (order by t1.v, t2.v, t3.v, t4.v)) - 1 as num + from (select 1 union all select 2 union all select 3 union all select 4 union all select 5 union all select 6 union all select 7 union all select 8 union all select 9 union all select 10) t1(v) + cross join (select 1 union all select 2 union all select 3 union all select 4 union all select 5 union all select 6 union all select 7 union all select 8 union all select 9 union all select 10) t2(v) + cross join (select 1 union all select 2 union all select 3 union all select 4 union all select 5 union all select 6 union all select 7 union all select 8 union all select 9 union all select 10) t3(v) + cross join (select 1 union all select 2 union all select 3 union all select 4 union all select 5 union all select 6 union all select 7 union all select 8 union all select 9 union all select 10) t4(v) ) select {{ elementary.edr_timeadd(time_bucket.period, 'num * ' ~ time_bucket.count, min_bucket_start_expr) }} as edr_bucket_start,