diff --git a/.github/workflows/test-all-warehouses.yml b/.github/workflows/test-all-warehouses.yml index de6ba8f80..b5c3eab61 100644 --- a/.github/workflows/test-all-warehouses.yml +++ b/.github/workflows/test-all-warehouses.yml @@ -102,6 +102,7 @@ jobs: spark, fabric, sqlserver, + vertica, ] uses: ./.github/workflows/test-warehouse.yml with: diff --git a/.github/workflows/test-warehouse.yml b/.github/workflows/test-warehouse.yml index 83fad4337..c43b1969f 100644 --- a/.github/workflows/test-warehouse.yml +++ b/.github/workflows/test-warehouse.yml @@ -20,6 +20,7 @@ on: - dremio - fabric - sqlserver + - vertica elementary-ref: type: string required: false @@ -167,6 +168,12 @@ jobs: run: | docker compose up -d --wait sqlserver + - name: Start Vertica + if: inputs.warehouse-type == 'vertica' + working-directory: ${{ env.E2E_DBT_PROJECT_DIR }} + run: | + docker compose up -d --wait vertica + - name: Setup Python uses: actions/setup-python@v5 with: @@ -185,15 +192,34 @@ jobs: sudo ACCEPT_EULA=Y apt-get install -y msodbcsql18 unixodbc-dev - name: Install dbt + if: inputs.warehouse-type != 'vertica' run: > pip install "dbt-core${{ inputs.dbt-version && format('=={0}', inputs.dbt-version) }}" "dbt-${{ (inputs.warehouse-type == 'databricks_catalog' && 'databricks') || (inputs.warehouse-type == 'athena' && 'athena-community') || (inputs.warehouse-type == 'dremio' && 'dremio') || inputs.warehouse-type }}${{ (inputs.warehouse-type == 'spark' && '[PyHive]') || '' }}${{ inputs.dbt-version && format('~={0}', inputs.dbt-version) }}" + # dbt-vertica pins dbt-core~=1.8 which lacks the 'arguments' attribute + # used by newer dbt-core. Install dbt-vertica without deps first, then + # install the latest compatible dbt-core separately. We also install + # vertica-python (dbt-vertica's runtime dep) explicitly. + - name: Install dbt (Vertica) + if: inputs.warehouse-type == 'vertica' + run: | + pip install --no-deps dbt-vertica + pip install vertica-python + pip install "dbt-core${{ inputs.dbt-version && format('=={0}', inputs.dbt-version) }}" + - name: Install Elementary run: | pip install -r dev-requirements.txt - pip install ".[${{ (inputs.warehouse-type == 'databricks_catalog' && 'databricks') || inputs.warehouse-type }}]" + # For Vertica, dbt-vertica is already installed with --no-deps above; + # using ".[vertica]" would re-resolve dbt-vertica's deps and downgrade + # dbt-core to ~=1.8. Install elementary without the adapter extra. + if [ "${{ inputs.warehouse-type }}" = "vertica" ]; then + pip install "." + else + pip install ".[${{ (inputs.warehouse-type == 'databricks_catalog' && 'databricks') || inputs.warehouse-type }}]" + fi - name: Write dbt profiles env: @@ -204,7 +230,7 @@ jobs: # This enables caching the seeded database state between runs. IS_DOCKER=false case "${{ inputs.warehouse-type }}" in - postgres|clickhouse|trino|dremio|duckdb|spark|sqlserver) IS_DOCKER=true ;; + postgres|clickhouse|trino|dremio|duckdb|spark|sqlserver|vertica) IS_DOCKER=true ;; esac if [ "$IS_DOCKER" = "true" ]; then diff --git a/elementary/clients/dbt/transient_errors.py b/elementary/clients/dbt/transient_errors.py index 5f6cedaa2..483a1d97c 100644 --- a/elementary/clients/dbt/transient_errors.py +++ b/elementary/clients/dbt/transient_errors.py @@ -120,6 +120,11 @@ ), "fabric": _TSQL_TRANSIENT, "sqlserver": _TSQL_TRANSIENT, + "vertica": ( + "connection timed out", + "could not connect to the server", + "ssl syscall error", + ), } # Pre-computed union of all adapter-specific patterns for the fallback path diff --git a/elementary/monitor/dbt_project/package-lock.yml b/elementary/monitor/dbt_project/package-lock.yml index 070158da0..a1f260788 100644 --- a/elementary/monitor/dbt_project/package-lock.yml +++ b/elementary/monitor/dbt_project/package-lock.yml @@ -4,5 +4,5 @@ packages: version: 0.8.6 - git: https://github.com/elementary-data/dbt-data-reliability.git name: elementary - revision: 534afc63c75d28b87d7cbd3b222dd3ea9a980f7b -sha1_hash: cb18b7df65415901187dcf469dcd377e56c0dc70 + revision: 2ab66fbe7e347c3cbbf2910c91f03cd6db2ef517 +sha1_hash: 7dc83ea83a781be623eea141eca2a0cceb4878e9 diff --git a/elementary/monitor/dbt_project/packages.yml b/elementary/monitor/dbt_project/packages.yml index 3a77430a4..5e394310d 100644 --- a/elementary/monitor/dbt_project/packages.yml +++ b/elementary/monitor/dbt_project/packages.yml @@ -2,7 +2,7 @@ packages: - package: dbt-labs/dbt_utils version: [">=0.8.0", "<0.9.0"] - git: https://github.com/elementary-data/dbt-data-reliability.git - revision: 534afc63c75d28b87d7cbd3b222dd3ea9a980f7b + revision: 2ab66fbe7e347c3cbbf2910c91f03cd6db2ef517 # NOTE - for unreleased CLI versions we often need to update the package version to a commit hash (please leave this # commented, so it will be easy to access) diff --git a/pyproject.toml b/pyproject.toml index 057f208c1..d87d51e68 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -58,6 +58,7 @@ dbt-duckdb = {version = ">=1.5.0,<2.0.0", optional = true} dbt-dremio = {version = ">=1.5.0,<2.0.0", optional = true} dbt-fabric = {version = ">=1.4,<2.0.0", optional = true} dbt-sqlserver = {version = ">=1.4,<2.0.0", optional = true} +dbt-vertica = {version = ">=1.7,<2.0.0", optional = true} [tool.poetry.extras] snowflake = ["dbt-snowflake"] bigquery = ["dbt-bigquery"] @@ -72,7 +73,8 @@ duckdb = ["dbt-duckdb"] dremio = ["dbt-dremio"] fabric = ["dbt-fabric"] sqlserver = ["dbt-sqlserver"] -all = ["dbt-snowflake", "dbt-bigquery", "dbt-redshift", "dbt-postgres", "dbt-databricks", "dbt-spark", "dbt-clickhouse", "dbt-athena-community", "dbt-trino", "dbt-duckdb", "dbt-dremio", "dbt-fabric", "dbt-sqlserver"] +vertica = ["dbt-vertica"] +all = ["dbt-snowflake", "dbt-bigquery", "dbt-redshift", "dbt-postgres", "dbt-databricks", "dbt-spark", "dbt-clickhouse", "dbt-athena-community", "dbt-trino", "dbt-duckdb", "dbt-dremio", "dbt-fabric", "dbt-sqlserver", "dbt-vertica"] [build-system] requires = ["poetry-core>=1.0.0"] diff --git a/tests/e2e_dbt_project/docker-compose.yml b/tests/e2e_dbt_project/docker-compose.yml index 542d31537..7c7c1dc93 100644 --- a/tests/e2e_dbt_project/docker-compose.yml +++ b/tests/e2e_dbt_project/docker-compose.yml @@ -290,6 +290,36 @@ services: timeout: 5s retries: 10 + # ── Vertica CE ──────────────────────────────────────────────────── + vertica: + image: ghcr.io/ratiopbc/vertica-ce + container_name: vertica + ports: + - "127.0.0.1:5433:5433" + environment: + APP_DB_USER: dbadmin + APP_DB_PASSWORD: vertica + TZ: "UTC" + VERTICA_DB_NAME: elementary_tests + VMART_ETL_SCRIPT: "" + deploy: + mode: global + ulimits: + nofile: + soft: 65536 + hard: 65536 + volumes: + - vertica-data:/data + healthcheck: + test: + [ + "CMD-SHELL", + "/opt/vertica/bin/vsql -U dbadmin -w vertica -c 'SELECT 1;'", + ] + interval: 5s + timeout: 5s + retries: 10 + # ── SQL Server (for Fabric / SQL Server adapters) ───────────────── sqlserver: image: mcr.microsoft.com/mssql/server:2022-latest @@ -316,3 +346,4 @@ volumes: dremio-minio-data: spark-warehouse: spark-hive-metastore: + vertica-data: diff --git a/tests/e2e_dbt_project/macros/vertica_seed_override.sql b/tests/e2e_dbt_project/macros/vertica_seed_override.sql new file mode 100644 index 000000000..ab9c3a25b --- /dev/null +++ b/tests/e2e_dbt_project/macros/vertica_seed_override.sql @@ -0,0 +1,23 @@ +{#- Override the dbt-vertica seed helper so that each seed file uses a + unique reject-table name. The upstream macro hardcodes + ``seed_rejects`` for every seed, which causes "Object already exists" + errors when ``dbt seed`` processes more than one file. -#} +{% macro copy_local_load_csv_rows(model, agate_table) %} + {% set cols_sql = get_seed_column_quoted_csv(model, agate_table.column_names) %} + + {#- Build a per-seed reject table name so concurrent seeds don't clash. -#} + {% set reject_table = model["alias"] ~ "_rejects" %} + + {% set sql %} + copy {{ this.render() }} + ({{ cols_sql }}) + from local '{{ agate_table.original_abspath }}' + delimiter ',' + enclosed by '"' + skip 1 + abort on error + rejected data as table {{ this.without_identifier() }}.{{ reject_table }}; + {% endset %} + + {{ return(sql) }} +{% endmacro %} diff --git a/tests/profiles/profiles.yml.j2 b/tests/profiles/profiles.yml.j2 index 3543496c6..0ea3a7257 100644 --- a/tests/profiles/profiles.yml.j2 +++ b/tests/profiles/profiles.yml.j2 @@ -73,6 +73,16 @@ elementary_tests: trust_cert: true threads: 4 + vertica: &vertica + type: vertica + host: 127.0.0.1 + port: 5433 + username: dbadmin + password: vertica + database: elementary_tests + schema: {{ schema_name }} + threads: 4 + # ── Cloud targets (secrets substituted at CI time) ───────────────── fabric: &fabric @@ -148,7 +158,7 @@ elementary_tests: elementary: target: postgres outputs: -{%- set targets = ['postgres', 'clickhouse', 'trino', 'dremio', 'duckdb', 'spark', 'fabric', 'sqlserver', 'snowflake', 'bigquery', 'redshift', 'databricks_catalog', 'athena'] %} +{%- set targets = ['postgres', 'clickhouse', 'trino', 'dremio', 'duckdb', 'spark', 'fabric', 'sqlserver', 'vertica', 'snowflake', 'bigquery', 'redshift', 'databricks_catalog', 'athena'] %} {%- for t in targets %} {{ t }}: <<: *{{ t }}