elementary-data · haritamar · Mar 12, 2026 · Oct 8, 2025 · Nov 17, 2025 · Nov 17, 2025
diff --git a/.github/workflows/test-all-warehouses.yml b/.github/workflows/test-all-warehouses.yml
@@ -49,7 +49,16 @@ jobs:
           ${{ inputs.dbt-version && fromJSON(format('["{0}"]', inputs.dbt-version)) ||
           fromJSON('["latest_official", "latest_pre"]') }}
         warehouse-type:
-          [postgres, clickhouse, trino, dremio, spark, duckdb, sqlserver]
+          [
+            postgres,
+            clickhouse,
+            trino,
+            dremio,
+            spark,
+            duckdb,
+            sqlserver,
+            vertica,
+          ]
         exclude:
           # latest_pre is only tested on postgres
           - dbt-version: latest_pre
@@ -64,6 +73,8 @@ jobs:
             warehouse-type: duckdb
           - dbt-version: latest_pre
             warehouse-type: sqlserver
+          - dbt-version: latest_pre
+            warehouse-type: vertica
     uses: ./.github/workflows/test-warehouse.yml
     with:
       warehouse-type: ${{ matrix.warehouse-type }}

diff --git a/.github/workflows/test-warehouse.yml b/.github/workflows/test-warehouse.yml
@@ -20,6 +20,7 @@ on:
           - duckdb
           - sqlserver
           - fabric
+          - vertica
       elementary-ref:
         type: string
         required: false
@@ -151,8 +152,18 @@ jobs:
         if: startsWith(inputs.warehouse-type, 'databricks') && inputs.dbt-version < '1.7.0'
         run: pip install databricks-sql-connector==2.9.3
 
+      - name: Install dbt-vertica
+        if: inputs.warehouse-type == 'vertica'
+        run: |
+          # dbt-vertica pins dbt-core~=1.8 which lacks native support for the
+          # "arguments" test property used by the integration-test framework.
+          # Install dbt-vertica without deps, then install latest dbt-core
+          # separately (dbt-vertica works fine with newer dbt-core versions).
+          pip install dbt-vertica --no-deps
+          pip install vertica-python "dbt-core"
+
       - name: Install dbt
-        if: ${{ inputs.dbt-version != 'fusion' }}
+        if: ${{ inputs.dbt-version != 'fusion' && inputs.warehouse-type != 'vertica' }}
         run:
           pip install${{ (inputs.dbt-version == 'latest_pre' && ' --pre') || '' }}
           "dbt-core${{ (!startsWith(inputs.dbt-version, 'latest') && format('=={0}', inputs.dbt-version)) || '' }}"
@@ -198,6 +209,18 @@ jobs:
           ln -sfn ${{ github.workspace }}/dbt-data-reliability dbt_project/dbt_packages/elementary
           pip install -r requirements.txt
 
+      - name: Start Vertica
+        if: inputs.warehouse-type == 'vertica'
+        working-directory: ${{ env.TESTS_DIR }}
+        run: docker compose -f docker-compose-vertica.yml up -d
+
+      - name: Wait for Vertica to be ready
+        if: inputs.warehouse-type == 'vertica'
+        run: |
+          echo "Waiting for Vertica to be healthy..."
+          timeout 60 bash -c 'until [ "$(docker inspect --format="{{.State.Health.Status}}" vertica)" == "healthy" ]; do echo "Waiting..."; sleep 5; done'
+          echo "Vertica is ready!"
+
       - name: Check DWH connection
         working-directory: ${{ env.TESTS_DIR }}
         run: |

diff --git a/.gitignore b/.gitignore
@@ -4,6 +4,7 @@ dbt_internal_packages/
 logs/
 scripts/
 
+.github/fixtures/.user.yml
 .idea
 .DS_Store
 

diff --git a/integration_tests/docker-compose-vertica.yml b/integration_tests/docker-compose-vertica.yml
@@ -0,0 +1,40 @@
+services:
+  vertica:
+    environment:
+      VERTICA_USER: dbadmin
+      VERTICA_PASS: vertica
+      VERTICA_HOST: localhost
+      VERTICA_PORT: 5433
+      VERTICA_DATABASE: elementary_tests
+      VERTICA_SCHEMA: ${SCHEMA_NAME}
+      APP_DB_USER: dbadmin
+      APP_DB_PASSWORD: vertica
+      TZ: "America/Los_Angeles"
+      VERTICA_DB_NAME: elementary_tests
+      VMART_ETL_SCRIPT: ""
+    container_name: vertica
+    image: ghcr.io/ratiopbc/vertica-ce
+    ports:
+      - "5433:5433"
+      - "5444:5444"
+    deploy:
+      mode: global
+    ulimits:
+      nofile:
+        soft: 65536
+        hard: 65536
+    volumes:
+      - type: volume
+        source: vertica-data
+        target: /data
+    healthcheck:
+      test:
+        [
+          "CMD-SHELL",
+          "/opt/vertica/bin/vsql -U dbadmin -w vertica -c 'SELECT 1;'",
+        ]
+      interval: 5s
+      timeout: 5s
+      retries: 10
+volumes:
+  vertica-data:
diff --git a/integration_tests/profiles/profiles.yml.j2 b/integration_tests/profiles/profiles.yml.j2
@@ -75,6 +75,18 @@ elementary_tests:
       trust_cert: true
       threads: 4
 
+    vertica: &vertica
+      type: vertica
+      host: localhost
+      port: 5433
+      username: dbadmin
+      password: vertica
+      database: elementary_tests
+      schema: {{ schema_name }}
+      connection_load_balance: false
+      retries: 2
+      threads: 4
+
     # ── Cloud targets (secrets substituted at CI time) ─────────────────
 
     snowflake: &snowflake
@@ -150,7 +162,7 @@ elementary_tests:
 elementary:
   target: postgres
   outputs:
-{%- set targets = ['postgres', 'clickhouse', 'trino', 'dremio', 'spark', 'duckdb', 'sqlserver', 'snowflake', 'bigquery', 'redshift', 'databricks_catalog', 'athena', 'fabric'] %}
+{%- set targets = ['postgres', 'clickhouse', 'trino', 'dremio', 'spark', 'duckdb', 'sqlserver', 'vertica', 'snowflake', 'bigquery', 'redshift', 'databricks_catalog', 'athena', 'fabric'] %}
 {%- for t in targets %}
     {{ t }}:
       <<: *{{ t }}

diff --git a/integration_tests/tests/data_seeder.py b/integration_tests/tests/data_seeder.py
@@ -454,3 +454,117 @@ def _create_table_sql(self, fq_table: str, col_defs: str) -> str:
             f"CREATE TABLE {fq_table} ({col_defs}) "
             f"ENGINE = MergeTree() ORDER BY tuple()"
         )
+
+
+class VerticaDirectSeeder(BaseSqlInsertSeeder):
+    """Fast seeder for Vertica: executes CREATE TABLE + INSERT directly.
+
+    Bypasses ``dbt seed`` (which uses Vertica's COPY command) because COPY
+    rejects empty CSV fields for non-string columns instead of treating them
+    as NULL.  Direct INSERT statements handle NULL correctly.
+
+    Uses a *direct* ``vertica_python`` connection (rather than dbt's adapter
+    connection pool) so that all DDL + DML runs in a single session and can
+    be committed atomically.  dbt's ``connection_named`` context manager
+    releases (and effectively rolls back) the connection after each
+    ``execute_sql`` call, which caused INSERT data to be invisible to
+    subsequent ``dbt test`` sessions.
+
+    Vertica uses double-quote identifiers (not backticks), so this class
+    overrides the ``seed`` method to use ``"col"`` quoting.
+    """
+
+    def _type_string(self) -> str:
+        # Must match edr_type_string (varchar(16000)) so that schema-change
+        # detection sees a consistent type between seeded tables and
+        # elementary metadata columns.
+        return "VARCHAR(16000)"
+
+    def _type_boolean(self) -> str:
+        return "BOOLEAN"
+
+    def _type_integer(self) -> str:
+        return "INTEGER"
+
+    def _type_float(self) -> str:
+        return "FLOAT"
+
+    def _format_value(self, value: object, col_type: str) -> str:
+        if value is None or (isinstance(value, str) and value == ""):
+            return "NULL"
+        if isinstance(value, bool):
+            return "true" if value else "false"
+        if isinstance(value, (int, float)):
+            return str(value)
+        text = str(value)
+        text = text.replace("'", "''")
+        return f"'{text}'"
+
+    def _create_table_sql(self, fq_table: str, col_defs: str) -> str:
+        return f"CREATE TABLE {fq_table} ({col_defs})"
+
+    @staticmethod
+    def _vertica_connection():
+        """Open a direct vertica_python connection from env / defaults."""
+        import vertica_python  # available in the test venv
+
+        conn_info = {
+            "host": os.environ.get("VERTICA_HOST", "localhost"),
+            "port": int(os.environ.get("VERTICA_PORT", "5433")),
+            "user": os.environ.get("VERTICA_USER", "dbadmin"),
+            "password": os.environ.get("VERTICA_PASSWORD", "vertica"),
+            "database": os.environ.get("VERTICA_DATABASE", "elementary_tests"),
+        }
+        return vertica_python.connect(**conn_info)
+
+    @contextmanager
+    def seed(self, data: List[dict], table_name: str) -> Generator[None, None, None]:
+        """Override base seed to use double-quote identifiers for Vertica."""
+        columns = list(data[0].keys())
+        col_types: Dict[str, str] = {
+            col: self._infer_column_type([row.get(col) for row in data])
+            for col in columns
+        }
+        # Vertica uses double-quote identifiers, not backticks.
+        col_defs = ", ".join(f'"{col}" {col_types[col]}' for col in columns)
+        fq_table = f'"{self._schema}"."{table_name}"'
+
+        seed_path = self._write_csv(data, table_name)
+
+        try:
+            # Use a direct connection so DDL + DML share the same session
+            # and the COMMIT is guaranteed to persist the data.
+            conn = self._vertica_connection()
+            try:
+                cur = conn.cursor()
+                cur.execute(f"DROP TABLE IF EXISTS {fq_table}")
+                cur.execute(self._create_table_sql(fq_table, col_defs))
+
+                for batch_start in range(0, len(data), _INSERT_BATCH_SIZE):
+                    batch = data[batch_start : batch_start + _INSERT_BATCH_SIZE]
+                    rows_sql = ", ".join(
+                        "("
+                        + ", ".join(
+                            self._format_value(row.get(c), col_types[c])
+                            for c in columns
+                        )
+                        + ")"
+                        for row in batch
+                    )
+                    cur.execute(f"INSERT INTO {fq_table} VALUES {rows_sql}")
+
+                conn.commit()
+            finally:
+                conn.close()
+
+            logger.info(
+                "%s: loaded %d rows into %s (%s)",
+                type(self).__name__,
+                len(data),
+                fq_table,
+                ", ".join(f"{c}: {t}" for c, t in col_types.items()),
+            )
+
+            yield
+        finally:
+            seed_path.unlink(missing_ok=True)
diff --git a/integration_tests/tests/dbt_project.py b/integration_tests/tests/dbt_project.py
@@ -7,7 +7,12 @@
 from uuid import uuid4
 
 from adapter_query_runner import AdapterQueryRunner, UnsupportedJinjaError
-from data_seeder import ClickHouseDirectSeeder, DbtDataSeeder, SparkS3CsvSeeder
+from data_seeder import (
+    ClickHouseDirectSeeder,
+    DbtDataSeeder,
+    SparkS3CsvSeeder,
+    VerticaDirectSeeder,
+)
 from dbt_utils import get_database_and_schema_properties
 from elementary.clients.dbt.base_dbt_runner import BaseDbtRunner
 from elementary.clients.dbt.factory import RunnerMethod, create_dbt_runner
@@ -357,7 +362,9 @@ def _read_profile_schema(self) -> str:
 
     def _create_seeder(
         self,
-    ) -> Union[DbtDataSeeder, ClickHouseDirectSeeder, SparkS3CsvSeeder]:
+    ) -> Union[
+        DbtDataSeeder, ClickHouseDirectSeeder, SparkS3CsvSeeder, VerticaDirectSeeder
+    ]:
         """Return the fastest available seeder for the current target."""
         if self.target == "clickhouse":
             runner = self._get_query_runner()
@@ -369,6 +376,12 @@ def _create_seeder(
             # set_from_args / reset_adapters).
             schema = self._read_profile_schema() + SCHEMA_NAME_SUFFIX
             return SparkS3CsvSeeder(schema, self.seeds_dir_path)
+        if self.target == "vertica":
+            # Vertica's COPY command (used by dbt seed) rejects empty CSV
+            # fields for non-string columns.  Use direct INSERT instead.
+            runner = self._get_query_runner()
+            schema = runner.schema_name + SCHEMA_NAME_SUFFIX
+            return VerticaDirectSeeder(runner, schema, self.seeds_dir_path)
         return DbtDataSeeder(
             self.dbt_runner, self.project_dir_path, self.seeds_dir_path
         )

diff --git a/macros/edr/dbt_artifacts/upload_run_results.sql b/macros/edr/dbt_artifacts/upload_run_results.sql
@@ -124,4 +124,12 @@
     {% do flattened_node.update(
         {"compiled_code": elementary.get_compiled_code_too_long_err_msg()}
     ) %}
+    {#- On adapters with limited string-literal / varchar sizes (e.g. Vertica
+        65 000 bytes) the error *message* can also embed the full compiled SQL,
+        making the INSERT statement exceed the adapter's limits.  Truncate the
+        message so the row can still be persisted. -#}
+    {% set msg = flattened_node.get("message", "") %}
+    {% if msg is string and msg | length > 4096 %}
+        {% do flattened_node.update({"message": msg[:4096] ~ "... (truncated)"}) %}
+    {% endif %}
 {% endmacro %}
diff --git a/macros/edr/system/system_utils/buckets_cte.sql b/macros/edr/system/system_utils/buckets_cte.sql
@@ -257,6 +257,29 @@
     {{ return(complete_buckets_cte) }}
 {% endmacro %}
 
+{% macro vertica__complete_buckets_cte(
+    time_bucket,
+    bucket_end_expr,
+    min_bucket_start_expr,
+    max_bucket_end_expr
+) -%}
+    {%- set complete_buckets_cte %}
+        with integers as (
+            select (row_number() over ()) - 1 as num
+            from (SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3 UNION ALL SELECT 4 UNION ALL SELECT 5 UNION ALL SELECT 6 UNION ALL SELECT 7 UNION ALL SELECT 8 UNION ALL SELECT 9 UNION ALL SELECT 10) t1(v)
+            cross join (SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3 UNION ALL SELECT 4 UNION ALL SELECT 5 UNION ALL SELECT 6 UNION ALL SELECT 7 UNION ALL SELECT 8 UNION ALL SELECT 9 UNION ALL SELECT 10) t2(v)
+            cross join (SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3 UNION ALL SELECT 4 UNION ALL SELECT 5 UNION ALL SELECT 6 UNION ALL SELECT 7 UNION ALL SELECT 8 UNION ALL SELECT 9 UNION ALL SELECT 10) t3(v)
+            cross join (SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3 UNION ALL SELECT 4 UNION ALL SELECT 5 UNION ALL SELECT 6 UNION ALL SELECT 7 UNION ALL SELECT 8 UNION ALL SELECT 9 UNION ALL SELECT 10) t4(v)
+        )
+        select
+            {{ elementary.edr_timeadd(time_bucket.period, 'num * ' ~ time_bucket.count, min_bucket_start_expr) }} as edr_bucket_start,
+            {{ elementary.edr_timeadd(time_bucket.period, '(num + 1) * ' ~ time_bucket.count, min_bucket_start_expr) }} as edr_bucket_end
+        from integers
+        where {{ elementary.edr_timeadd(time_bucket.period, '(num + 1) * ' ~ time_bucket.count, min_bucket_start_expr) }} <= {{ max_bucket_end_expr }}
+    {%- endset %}
+    {{ return(complete_buckets_cte) }}
+{% endmacro %}
+
 {% macro dremio__complete_buckets_cte(
     time_bucket, bucket_end_expr, min_bucket_start_expr, max_bucket_end_expr
 ) %}

diff --git a/macros/edr/system/system_utils/empty_table.sql b/macros/edr/system/system_utils/empty_table.sql
@@ -150,7 +150,7 @@
     {%- set empty_table_query -%}
         select * from (
             select
-            {% for column in column_name_and_type_list %}
+            {%- for column in column_name_and_type_list -%}
                 {{ elementary.empty_column(column[0], column[1]) }} {%- if not loop.last -%},{%- endif %}
             {%- endfor %}
         ) as empty_table
-Original file line number
+Diff line change
@@ Expand Up / @@ -4,6 +4,7 @@ dbt_internal_packages/ @@
     logs/
     scripts/
+    .github/fixtures/.user.yml
     .idea
     .DS_Store
@@ Expand Down @@