Skip to content
Merged
Show file tree
Hide file tree
Changes from 26 commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
8b84461
Remove extra newlines that Vertica could not parse
jc00ke Oct 8, 2025
7146da5
Add Vertica-specific escape macro
jc00ke Nov 17, 2025
27e924d
Add Vertica-specific timeadd macro
jc00ke Nov 17, 2025
667054b
Attempt to set up Vertica in CI
jc00ke Nov 17, 2025
2130a7a
Debug missing port
jc00ke Nov 17, 2025
d7a9c0f
Add more missing env vars for CI
jc00ke Nov 17, 2025
8d1975d
Try opentext namespace for CI image
jc00ke Nov 17, 2025
fe2585e
Use Ratio's Vertica-CE
jc00ke Nov 17, 2025
2154163
Add dbt-vertica-version
jc00ke Nov 18, 2025
19e41b5
Start Vertica after schema has been determined
jc00ke Nov 18, 2025
c585dae
Use Ratio's GitHub package for vertica-ce
jc00ke Nov 18, 2025
d198a9e
Set Vertica env vars & persist across steps
jc00ke Nov 18, 2025
570dcd0
Forgot VERTICA_HOST
jc00ke Nov 18, 2025
bd69507
Address CodeRabbit nit
jc00ke Nov 18, 2025
c886e4c
Try a healthcheck before moving on with Vertica
jc00ke Nov 18, 2025
bcc9d80
Use env vars for Vertica healthcheck
jc00ke Nov 18, 2025
9a2bf01
Add test/CI profiles.yml fixture file
jc00ke Nov 18, 2025
2a996ef
Ignore the .user.yml in the fixtures dir
jc00ke Nov 18, 2025
795fe85
Merge master into vertica-compat and migrate profiles to .j2 template
haritamar Mar 11, 2026
b671822
fix: export SCHEMA_NAME to GITHUB_ENV for Vertica docker-compose
haritamar Mar 11, 2026
a26598c
refactor: inline Vertica credentials instead of using env vars
haritamar Mar 11, 2026
4326960
revert: remove unnecessary SCHEMA_NAME export to GITHUB_ENV
haritamar Mar 11, 2026
731ca2f
refactor: remove dbt-vertica-version input parameter
haritamar Mar 11, 2026
cf6aed7
fix: Vertica adapter compatibility fixes for integration tests
devin-ai-integration[bot] Mar 11, 2026
899f146
fix: upgrade dbt-core for Vertica CI to support 'arguments' test prop…
devin-ai-integration[bot] Mar 11, 2026
139799c
fix: install dbt-vertica with --no-deps to allow latest dbt-core
devin-ai-integration[bot] Mar 11, 2026
71b930e
fix: override dbt-vertica seed macro to use unique reject table per seed
devin-ai-integration[bot] Mar 11, 2026
326e20c
fix: address Vertica CI workflow, schema cleanup, and stddev precision
devin-ai-integration[bot] Mar 11, 2026
7d77e11
fix: add empty-seed guard and clarify query_max_size comment
devin-ai-integration[bot] Mar 12, 2026
e0900da
style: address CodeRabbit nitpicks
devin-ai-integration[bot] Mar 12, 2026
4edd6f3
fix: use column references in row_number() ORDER BY for Vertica
devin-ai-integration[bot] Mar 12, 2026
f5c11ef
revert: undo risky nitpick changes to isolate CI regression
devin-ai-integration[bot] Mar 12, 2026
02ae168
style: re-apply CodeRabbit nitpick fixes (confirmed not causing CI fa…
devin-ai-integration[bot] Mar 12, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 12 additions & 1 deletion .github/workflows/test-all-warehouses.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,16 @@ jobs:
${{ inputs.dbt-version && fromJSON(format('["{0}"]', inputs.dbt-version)) ||
fromJSON('["latest_official", "latest_pre"]') }}
warehouse-type:
[postgres, clickhouse, trino, dremio, spark, duckdb, sqlserver]
[
postgres,
clickhouse,
trino,
dremio,
spark,
duckdb,
sqlserver,
vertica,
]
exclude:
# latest_pre is only tested on postgres
- dbt-version: latest_pre
Expand All @@ -64,6 +73,8 @@ jobs:
warehouse-type: duckdb
- dbt-version: latest_pre
warehouse-type: sqlserver
- dbt-version: latest_pre
warehouse-type: vertica
uses: ./.github/workflows/test-warehouse.yml
with:
warehouse-type: ${{ matrix.warehouse-type }}
Expand Down
25 changes: 24 additions & 1 deletion .github/workflows/test-warehouse.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ on:
- duckdb
- sqlserver
- fabric
- vertica
elementary-ref:
type: string
required: false
Expand Down Expand Up @@ -151,8 +152,18 @@ jobs:
if: startsWith(inputs.warehouse-type, 'databricks') && inputs.dbt-version < '1.7.0'
run: pip install databricks-sql-connector==2.9.3

- name: Install dbt-vertica
if: inputs.warehouse-type == 'vertica'
run: |
# dbt-vertica pins dbt-core~=1.8 which lacks native support for the
# "arguments" test property used by the integration-test framework.
# Install dbt-vertica without deps, then install latest dbt-core
# separately (dbt-vertica works fine with newer dbt-core versions).
pip install dbt-vertica --no-deps
pip install vertica-python "dbt-core"

- name: Install dbt
if: ${{ inputs.dbt-version != 'fusion' }}
if: ${{ inputs.dbt-version != 'fusion' && inputs.warehouse-type != 'vertica' }}
Comment thread
coderabbitai[bot] marked this conversation as resolved.
run:
pip install${{ (inputs.dbt-version == 'latest_pre' && ' --pre') || '' }}
"dbt-core${{ (!startsWith(inputs.dbt-version, 'latest') && format('=={0}', inputs.dbt-version)) || '' }}"
Expand Down Expand Up @@ -198,6 +209,18 @@ jobs:
ln -sfn ${{ github.workspace }}/dbt-data-reliability dbt_project/dbt_packages/elementary
pip install -r requirements.txt

- name: Start Vertica
if: inputs.warehouse-type == 'vertica'
working-directory: ${{ env.TESTS_DIR }}
run: docker compose -f docker-compose-vertica.yml up -d

- name: Wait for Vertica to be ready
if: inputs.warehouse-type == 'vertica'
run: |
echo "Waiting for Vertica to be healthy..."
timeout 60 bash -c 'until [ "$(docker inspect --format="{{.State.Health.Status}}" vertica)" == "healthy" ]; do echo "Waiting..."; sleep 5; done'
echo "Vertica is ready!"

- name: Check DWH connection
working-directory: ${{ env.TESTS_DIR }}
run: |
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ dbt_internal_packages/
logs/
scripts/

.github/fixtures/.user.yml
.idea
.DS_Store

Expand Down
40 changes: 40 additions & 0 deletions integration_tests/docker-compose-vertica.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
services:
vertica:
environment:
VERTICA_USER: dbadmin
VERTICA_PASS: vertica
VERTICA_HOST: localhost
VERTICA_PORT: 5433
VERTICA_DATABASE: elementary_tests
VERTICA_SCHEMA: ${SCHEMA_NAME}
APP_DB_USER: dbadmin
APP_DB_PASSWORD: vertica
TZ: "America/Los_Angeles"
VERTICA_DB_NAME: elementary_tests
VMART_ETL_SCRIPT: ""
container_name: vertica
image: ghcr.io/ratiopbc/vertica-ce
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

🧩 Analysis chain

🏁 Script executed:

#!/bin/bash
# Check if the image is accessible and get basic info
# Note: This verifies the image exists and is pullable

# Check GitHub Container Registry for the image
curl -s "https://ghcr.io/v2/ratiopbc/vertica-ce/tags/list" | head -c 500

# Alternative: check if there's an official Vertica CE image
echo "---"
echo "Searching for official Vertica images..."
curl -s "https://hub.docker.com/v2/search/repositories/?query=vertica&page_size=5" | jq '.results[] | {name: .repo_name, description: .short_description}'

Repository: elementary-data/dbt-data-reliability

Length of output: 638


🏁 Script executed:

# Search for references to the Vertica image in the codebase
rg "vertica-ce" -i --type yaml --type json

# Check git history for this file
git log --oneline --all -- integration_tests/docker-compose-vertica.yml | head -20

# Search for any documentation about image choice
rg "ratiopbc" -i

Repository: elementary-data/dbt-data-reliability

Length of output: 309


🌐 Web query:

Vertica Community Edition official Docker image repository

💡 Result:

  • Official source repository (to build the CE image yourself): vertica/vertica-containersone-node-ce directory. [1][2]
  • Prebuilt “official” CE Docker Hub image: OpenText’s current docs state the CE single-node image is no longer available (so there isn’t an active official Docker Hub repo for vertica-ce anymore). [3]

Sources:
[1] (raw.githubusercontent.com)
[2] (raw.githubusercontent.com)
[3] (docs.vertica.com)

Citations:


🏁 Script executed:

# Search GitHub for the ratiopbc/vertica-ce repository info
curl -s "https://api.github.com/repos/ratiopbc/vertica-ce" | jq '{name, description, updated_at, watchers_count, forks_count, open_issues_count}' 2>/dev/null || echo "Not found or private"

# Also check if the file has any comments or documentation about why this image was chosen
rg -B 5 -A 5 "vertica-ce" integration_tests/docker-compose-vertica.yml

Repository: elementary-data/dbt-data-reliability

Length of output: 500


Replace or document the container image source.

The image ghcr.io/ratiopbc/vertica-ce appears to have no public repository presence. Since the official Vertica CE prebuilt image is no longer available, either:

  1. Build from the official source: vertica/vertica-containers repository (one-node-ce directory), or
  2. Document and justify the choice of this third-party image and verify its maintenance status.
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@integration_tests/docker-compose-vertica.yml` at line 16, The docker-compose
entry uses the unverified image string "ghcr.io/ratiopbc/vertica-ce" in
docker-compose-vertica.yml; either replace that value with a reproducible build
from the official vertica/vertica-containers one-node-ce (build and reference
your local image or a trusted registry tag) or add a clear comment and
documentation (in the compose file header and project README) justifying the
third-party image choice, its source, how it was vetted, and how to
reproduce/build it; update the image line and accompanying docs accordingly to
ensure provenance and maintainability.

ports:
- "5433:5433"
- "5444:5444"
deploy:
mode: global
ulimits:
nofile:
soft: 65536
hard: 65536
volumes:
- type: volume
source: vertica-data
target: /data
healthcheck:
test:
[
"CMD-SHELL",
"/opt/vertica/bin/vsql -U dbadmin -w vertica -c 'SELECT 1;'",
]
interval: 5s
timeout: 5s
retries: 10
volumes:
vertica-data:
14 changes: 13 additions & 1 deletion integration_tests/profiles/profiles.yml.j2
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,18 @@ elementary_tests:
trust_cert: true
threads: 4

vertica: &vertica
type: vertica
host: localhost
port: 5433
username: dbadmin
password: vertica
database: elementary_tests
schema: {{ schema_name }}
connection_load_balance: false
retries: 2
threads: 4

# ── Cloud targets (secrets substituted at CI time) ─────────────────

snowflake: &snowflake
Expand Down Expand Up @@ -150,7 +162,7 @@ elementary_tests:
elementary:
target: postgres
outputs:
{%- set targets = ['postgres', 'clickhouse', 'trino', 'dremio', 'spark', 'duckdb', 'sqlserver', 'snowflake', 'bigquery', 'redshift', 'databricks_catalog', 'athena', 'fabric'] %}
{%- set targets = ['postgres', 'clickhouse', 'trino', 'dremio', 'spark', 'duckdb', 'sqlserver', 'vertica', 'snowflake', 'bigquery', 'redshift', 'databricks_catalog', 'athena', 'fabric'] %}
{%- for t in targets %}
{{ t }}:
<<: *{{ t }}
Expand Down
114 changes: 114 additions & 0 deletions integration_tests/tests/data_seeder.py
Original file line number Diff line number Diff line change
Expand Up @@ -454,3 +454,117 @@ def _create_table_sql(self, fq_table: str, col_defs: str) -> str:
f"CREATE TABLE {fq_table} ({col_defs}) "
f"ENGINE = MergeTree() ORDER BY tuple()"
)


class VerticaDirectSeeder(BaseSqlInsertSeeder):
"""Fast seeder for Vertica: executes CREATE TABLE + INSERT directly.

Bypasses ``dbt seed`` (which uses Vertica's COPY command) because COPY
rejects empty CSV fields for non-string columns instead of treating them
as NULL. Direct INSERT statements handle NULL correctly.

Uses a *direct* ``vertica_python`` connection (rather than dbt's adapter
connection pool) so that all DDL + DML runs in a single session and can
be committed atomically. dbt's ``connection_named`` context manager
releases (and effectively rolls back) the connection after each
``execute_sql`` call, which caused INSERT data to be invisible to
subsequent ``dbt test`` sessions.

Vertica uses double-quote identifiers (not backticks), so this class
overrides the ``seed`` method to use ``"col"`` quoting.
"""

def _type_string(self) -> str:
# Must match edr_type_string (varchar(16000)) so that schema-change
# detection sees a consistent type between seeded tables and
# elementary metadata columns.
return "VARCHAR(16000)"

def _type_boolean(self) -> str:
return "BOOLEAN"

def _type_integer(self) -> str:
return "INTEGER"

def _type_float(self) -> str:
return "FLOAT"

def _format_value(self, value: object, col_type: str) -> str:
if value is None or (isinstance(value, str) and value == ""):
return "NULL"
if isinstance(value, bool):
return "true" if value else "false"
if isinstance(value, (int, float)):
return str(value)
text = str(value)
text = text.replace("'", "''")
return f"'{text}'"

def _create_table_sql(self, fq_table: str, col_defs: str) -> str:
return f"CREATE TABLE {fq_table} ({col_defs})"

@staticmethod
def _vertica_connection():
"""Open a direct vertica_python connection from env / defaults."""
import vertica_python # available in the test venv

conn_info = {
"host": os.environ.get("VERTICA_HOST", "localhost"),
"port": int(os.environ.get("VERTICA_PORT", "5433")),
"user": os.environ.get("VERTICA_USER", "dbadmin"),
"password": os.environ.get("VERTICA_PASSWORD", "vertica"),
"database": os.environ.get("VERTICA_DATABASE", "elementary_tests"),
}
return vertica_python.connect(**conn_info)

@contextmanager
def seed(self, data: List[dict], table_name: str) -> Generator[None, None, None]:
"""Override base seed to use double-quote identifiers for Vertica."""
columns = list(data[0].keys())
Comment thread
coderabbitai[bot] marked this conversation as resolved.
col_types: Dict[str, str] = {
col: self._infer_column_type([row.get(col) for row in data])
for col in columns
}
# Vertica uses double-quote identifiers, not backticks.
col_defs = ", ".join(f'"{col}" {col_types[col]}' for col in columns)
fq_table = f'"{self._schema}"."{table_name}"'

seed_path = self._write_csv(data, table_name)

try:
# Use a direct connection so DDL + DML share the same session
# and the COMMIT is guaranteed to persist the data.
conn = self._vertica_connection()
try:
cur = conn.cursor()
cur.execute(f"DROP TABLE IF EXISTS {fq_table}")
cur.execute(self._create_table_sql(fq_table, col_defs))

for batch_start in range(0, len(data), _INSERT_BATCH_SIZE):
batch = data[batch_start : batch_start + _INSERT_BATCH_SIZE]
rows_sql = ", ".join(
"("
+ ", ".join(
self._format_value(row.get(c), col_types[c])
for c in columns
)
+ ")"
for row in batch
)
cur.execute(f"INSERT INTO {fq_table} VALUES {rows_sql}")

conn.commit()
finally:
conn.close()

logger.info(
"%s: loaded %d rows into %s (%s)",
type(self).__name__,
len(data),
fq_table,
", ".join(f"{c}: {t}" for c, t in col_types.items()),
)

yield
finally:
seed_path.unlink(missing_ok=True)
17 changes: 15 additions & 2 deletions integration_tests/tests/dbt_project.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,12 @@
from uuid import uuid4

from adapter_query_runner import AdapterQueryRunner, UnsupportedJinjaError
from data_seeder import ClickHouseDirectSeeder, DbtDataSeeder, SparkS3CsvSeeder
from data_seeder import (
ClickHouseDirectSeeder,
DbtDataSeeder,
SparkS3CsvSeeder,
VerticaDirectSeeder,
)
from dbt_utils import get_database_and_schema_properties
from elementary.clients.dbt.base_dbt_runner import BaseDbtRunner
from elementary.clients.dbt.factory import RunnerMethod, create_dbt_runner
Expand Down Expand Up @@ -357,7 +362,9 @@ def _read_profile_schema(self) -> str:

def _create_seeder(
self,
) -> Union[DbtDataSeeder, ClickHouseDirectSeeder, SparkS3CsvSeeder]:
) -> Union[
DbtDataSeeder, ClickHouseDirectSeeder, SparkS3CsvSeeder, VerticaDirectSeeder
]:
"""Return the fastest available seeder for the current target."""
if self.target == "clickhouse":
runner = self._get_query_runner()
Expand All @@ -369,6 +376,12 @@ def _create_seeder(
# set_from_args / reset_adapters).
schema = self._read_profile_schema() + SCHEMA_NAME_SUFFIX
return SparkS3CsvSeeder(schema, self.seeds_dir_path)
if self.target == "vertica":
# Vertica's COPY command (used by dbt seed) rejects empty CSV
# fields for non-string columns. Use direct INSERT instead.
runner = self._get_query_runner()
schema = runner.schema_name + SCHEMA_NAME_SUFFIX
return VerticaDirectSeeder(runner, schema, self.seeds_dir_path)
return DbtDataSeeder(
self.dbt_runner, self.project_dir_path, self.seeds_dir_path
)
Expand Down
8 changes: 8 additions & 0 deletions macros/edr/dbt_artifacts/upload_run_results.sql
Original file line number Diff line number Diff line change
Expand Up @@ -124,4 +124,12 @@
{% do flattened_node.update(
{"compiled_code": elementary.get_compiled_code_too_long_err_msg()}
) %}
{#- On adapters with limited string-literal / varchar sizes (e.g. Vertica
65 000 bytes) the error *message* can also embed the full compiled SQL,
making the INSERT statement exceed the adapter's limits. Truncate the
message so the row can still be persisted. -#}
{% set msg = flattened_node.get("message", "") %}
{% if msg is string and msg | length > 4096 %}
{% do flattened_node.update({"message": msg[:4096] ~ "... (truncated)"}) %}
{% endif %}
{% endmacro %}
23 changes: 23 additions & 0 deletions macros/edr/system/system_utils/buckets_cte.sql
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,29 @@
{{ return(complete_buckets_cte) }}
{% endmacro %}

{% macro vertica__complete_buckets_cte(
time_bucket,
bucket_end_expr,
min_bucket_start_expr,
max_bucket_end_expr
) -%}
{%- set complete_buckets_cte %}
with integers as (
select (row_number() over ()) - 1 as num
from (SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3 UNION ALL SELECT 4 UNION ALL SELECT 5 UNION ALL SELECT 6 UNION ALL SELECT 7 UNION ALL SELECT 8 UNION ALL SELECT 9 UNION ALL SELECT 10) t1(v)
cross join (SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3 UNION ALL SELECT 4 UNION ALL SELECT 5 UNION ALL SELECT 6 UNION ALL SELECT 7 UNION ALL SELECT 8 UNION ALL SELECT 9 UNION ALL SELECT 10) t2(v)
cross join (SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3 UNION ALL SELECT 4 UNION ALL SELECT 5 UNION ALL SELECT 6 UNION ALL SELECT 7 UNION ALL SELECT 8 UNION ALL SELECT 9 UNION ALL SELECT 10) t3(v)
cross join (SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3 UNION ALL SELECT 4 UNION ALL SELECT 5 UNION ALL SELECT 6 UNION ALL SELECT 7 UNION ALL SELECT 8 UNION ALL SELECT 9 UNION ALL SELECT 10) t4(v)
)
select
{{ elementary.edr_timeadd(time_bucket.period, 'num * ' ~ time_bucket.count, min_bucket_start_expr) }} as edr_bucket_start,
{{ elementary.edr_timeadd(time_bucket.period, '(num + 1) * ' ~ time_bucket.count, min_bucket_start_expr) }} as edr_bucket_end
from integers
where {{ elementary.edr_timeadd(time_bucket.period, '(num + 1) * ' ~ time_bucket.count, min_bucket_start_expr) }} <= {{ max_bucket_end_expr }}
{%- endset %}
{{ return(complete_buckets_cte) }}
{% endmacro %}

{% macro dremio__complete_buckets_cte(
time_bucket, bucket_end_expr, min_bucket_start_expr, max_bucket_end_expr
) %}
Expand Down
2 changes: 1 addition & 1 deletion macros/edr/system/system_utils/empty_table.sql
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@
{%- set empty_table_query -%}
select * from (
select
{% for column in column_name_and_type_list %}
{%- for column in column_name_and_type_list -%}
{{ elementary.empty_column(column[0], column[1]) }} {%- if not loop.last -%},{%- endif %}
{%- endfor %}
) as empty_table
Expand Down
Loading
Loading