Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 23 additions & 10 deletions .github/workflows/test-all-warehouses.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,27 @@ on:
description: Whether to generate new data

jobs:
# ── Docker targets ────────────────────────────────────────────────────
# No secrets needed — run on pull_request (works for forks without approval).
# Skipped on pull_request_target to avoid duplicate runs for internal PRs.
test-docker:
if: github.event_name != 'pull_request_target'
strategy:
fail-fast: false
matrix:
dbt-version: ${{ inputs.dbt-version && fromJSON(format('["{0}"]', inputs.dbt-version)) || fromJSON('[null]') }}
warehouse-type: [postgres, clickhouse]
uses: ./.github/workflows/test-warehouse.yml
with:
warehouse-type: ${{ matrix.warehouse-type }}
elementary-ref: ${{ inputs.elementary-ref || ((github.event_name == 'pull_request_target' || github.event_name == 'pull_request') && github.event.pull_request.head.sha) || '' }}
dbt-data-reliability-ref: ${{ inputs.dbt-data-reliability-ref }}
dbt-version: ${{ matrix.dbt-version }}
generate-data: ${{ inputs.generate-data || false }}

# ── Cloud targets ─────────────────────────────────────────────────────
# Require secrets — use fork check / approval gate for pull_request_target.

# Determine if this is a fork PR and skip if wrong trigger is used
check-fork-status:
runs-on: ubuntu-latest
Expand Down Expand Up @@ -74,7 +95,7 @@ jobs:
- name: Approved
run: echo "Fork PR approved for testing"

test:
test-cloud:
needs: [check-fork-status, approve-fork]
if: |
! cancelled() &&
Expand All @@ -86,15 +107,7 @@ jobs:
matrix:
dbt-version: ${{ inputs.dbt-version && fromJSON(format('["{0}"]', inputs.dbt-version)) || fromJSON('[null]') }}
warehouse-type:
[
postgres,
snowflake,
bigquery,
redshift,
databricks_catalog,
athena,
clickhouse,
]
[snowflake, bigquery, redshift, databricks_catalog, athena]
uses: ./.github/workflows/test-warehouse.yml
with:
warehouse-type: ${{ matrix.warehouse-type }}
Expand Down
20 changes: 16 additions & 4 deletions .github/workflows/test-warehouse.yml
Original file line number Diff line number Diff line change
Expand Up @@ -114,12 +114,15 @@ jobs:

- name: Write dbt profiles
env:
PROFILES_YML: ${{ secrets.CI_PROFILES_YML }}
CI_WAREHOUSE_SECRETS: ${{ secrets.CI_WAREHOUSE_SECRETS || '' }}
run: |
mkdir -p ~/.dbt
DBT_VERSION=$(pip show dbt-core | grep -i version | awk '{print $2}' | sed 's/\.//g')
UNDERSCORED_REF_NAME=$(echo "${{ inputs.warehouse-type }}_dbt_${DBT_VERSION}_${BRANCH_NAME}" | awk '{print tolower($0)}' | head -c 40 | sed "s/[-\/]/_/g")
echo "$PROFILES_YML" | base64 -d | sed "s/<SCHEMA_NAME>/py_$UNDERSCORED_REF_NAME/g" > ~/.dbt/profiles.yml

python "${{ github.workspace }}/elementary/tests/profiles/generate_profiles.py" \
--template "${{ github.workspace }}/elementary/tests/profiles/profiles.yml.j2" \
--output ~/.dbt/profiles.yml \
--schema-name "py_$UNDERSCORED_REF_NAME"

- name: Run Python package unit tests
run: pytest -vv tests/unit --warehouse-type ${{ inputs.warehouse-type }}
Expand Down Expand Up @@ -181,6 +184,7 @@ jobs:
run: edr --help

- name: Run monitor
if: ${{ !contains(fromJSON('["postgres", "clickhouse"]'), inputs.warehouse-type) }}
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Aaah that's not good. I'm guessing this is because of the secrets, right?
So for now run all the tests with pull_request_target like before and don't separate.
(Only in this PR, the dbt-data-reliability one is fine)

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right, the edr monitor/report/send-report steps all need secrets (SLACK_WEBHOOK, GCS_KEYFILE, etc.) so docker targets can't run without pull_request_target. Reverted — all targets now run under pull_request_target with the approval gate, same as before. Removed all the docker target guards from test-warehouse.yml too.

env:
SLACK_WEBHOOK: ${{ secrets.CI_SLACK_WEBHOOK }}
run: >
Expand All @@ -192,36 +196,42 @@ jobs:
--slack-webhook "$SLACK_WEBHOOK"

- name: Validate alerts statuses were updated
if: ${{ !contains(fromJSON('["postgres", "clickhouse"]'), inputs.warehouse-type) }}
working-directory: ${{ env.CLI_INTERNAL_DBT_PKG_DIR }}
run: |
dbt deps
dbt run-operation validate_alert_statuses_are_updated -t "${{ inputs.warehouse-type }}"

- name: Run report
if: ${{ !contains(fromJSON('["postgres", "clickhouse"]'), inputs.warehouse-type) }}
run: >
edr monitor report
-t "${{ inputs.warehouse-type }}"
--project-dir "${{ env.E2E_DBT_PROJECT_DIR }}"
--project-profile-target "${{ inputs.warehouse-type }}"

- name: Set report artifact name
if: ${{ !contains(fromJSON('["postgres", "clickhouse"]'), inputs.warehouse-type) }}
id: set_report_artifact_name
run: |
ARTIFACT_NAME=$(echo "report_${{ inputs.warehouse-type }}_${BRANCH_NAME}_dbt_${{ inputs.dbt-version || '' }}.html" | awk '{print tolower($0)}' | sed 's#[":/\\<>|*?-]#_#g')
echo "artifact_name=$ARTIFACT_NAME" >> "$GITHUB_OUTPUT"

- name: Upload report artifact
if: ${{ !contains(fromJSON('["postgres", "clickhouse"]'), inputs.warehouse-type) }}
uses: actions/upload-artifact@v4
with:
name: ${{ steps.set_report_artifact_name.outputs.artifact_name }}
path: elementary/edr_target/elementary_report.html

- name: Write GCS keyfile
if: ${{ !contains(fromJSON('["postgres", "clickhouse"]'), inputs.warehouse-type) }}
env:
GCS_KEYFILE: ${{ secrets.GCS_KEYFILE }}
run: echo "$GCS_KEYFILE" | base64 -d > /tmp/gcs_keyfile.json

- name: Run send report
if: ${{ !contains(fromJSON('["postgres", "clickhouse"]'), inputs.warehouse-type) }}
env:
SLACK_TOKEN: ${{ secrets.CI_SLACK_TOKEN }}
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
Expand All @@ -246,17 +256,19 @@ jobs:
--update-bucket-website true

- name: Set artifact name
if: ${{ !contains(fromJSON('["postgres", "clickhouse"]'), inputs.warehouse-type) }}
id: set_artifact_name
run: |
ARTIFACT_NAME=$(echo "edr_${{ inputs.warehouse-type }}_${BRANCH_NAME}_dbt_${{ inputs.dbt-version || '' }}.log" | awk '{print tolower($0)}' | sed 's#[":/\\<>|*?-]#_#g')
echo "artifact_name=$ARTIFACT_NAME" >> "$GITHUB_OUTPUT"

- name: Upload edr log
if: ${{ always() }}
if: ${{ always() && !contains(fromJSON('["postgres", "clickhouse"]'), inputs.warehouse-type) }}
uses: actions/upload-artifact@v4
with:
name: ${{ steps.set_artifact_name.outputs.artifact_name }}
path: elementary/edr_target/edr.log

- name: Run Python package e2e tests
if: ${{ !contains(fromJSON('["postgres", "clickhouse"]'), inputs.warehouse-type) }}
run: pytest -vv tests/e2e --warehouse-type ${{ inputs.warehouse-type }}
120 changes: 120 additions & 0 deletions tests/profiles/generate_profiles.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
#!/usr/bin/env python3
"""Generate ~/.dbt/profiles.yml from a Jinja2 template and an optional secrets JSON."""

from __future__ import annotations

import base64
import json
import os
from pathlib import Path
from typing import Any

import click
import yaml
from jinja2 import BaseLoader, Environment, StrictUndefined, Undefined


class _NullUndefined(Undefined):
"""Render missing variables as empty strings so docker-only runs don't crash."""

def __str__(self) -> str:
return ""

def __iter__(self):
return iter([])

def __bool__(self) -> bool:
return False


def _yaml_inline(value: Any) -> str:
"""Dump *value* as a compact inline YAML scalar / mapping."""
if isinstance(value, Undefined):
return "{}"
return yaml.dump(value, default_flow_style=True).strip()


@click.command()
@click.option(
"--template",
required=True,
type=click.Path(exists=True, dir_okay=False, path_type=Path),
help="Path to the Jinja2 profiles template (e.g. profiles.yml.j2).",
)
@click.option(
"--output",
required=True,
type=click.Path(dir_okay=False, path_type=Path),
help="Destination path for the rendered profiles.yml.",
)
@click.option(
"--schema-name",
required=True,
help="Base schema name (e.g. dbt_pkg_<ref> or py_<ref>).",
)
@click.option(
"--secrets-json-env",
default="CI_WAREHOUSE_SECRETS",
show_default=True,
help="Name of the env-var holding the base64-encoded JSON secrets blob.",
)
def main(
template: Path,
output: Path,
schema_name: str,
secrets_json_env: str,
) -> None:
"""Render a Jinja2 profiles template into a dbt profiles.yml file.

Resolution order:
1. If the env-var named by ``--secrets-json-env`` is set, decode it and
use its key/value pairs (plus *schema_name*) as template variables.
2. Otherwise render the template with only *schema_name* populated (all
other variables resolve to empty strings — suitable for docker-only
targets on fork PRs).
"""
output.parent.mkdir(parents=True, exist_ok=True)

secrets_b64 = os.environ.get(secrets_json_env, "").strip()

# ── Build template context ──────────────────────────────────────────
context: dict[str, object] = {"schema_name": schema_name}

if secrets_b64:
try:
decoded: dict = json.loads(base64.b64decode(secrets_b64))
except (ValueError, json.JSONDecodeError) as e:
raise click.ClickException(
f"Failed to decode ${secrets_json_env}: {e}"
) from e
Comment thread
haritamar marked this conversation as resolved.
for key, value in decoded.items():
context[key.lower()] = value
Comment thread
coderabbitai[bot] marked this conversation as resolved.
click.echo(
f"Loaded {len(decoded)} secret(s) from ${secrets_json_env}.",
err=True,
)
else:
click.echo(
"No secrets found — rendering template for docker-only targets.",
err=True,
)

# ── Render ──────────────────────────────────────────────────────────
# When secrets are loaded, use StrictUndefined so typos in secret keys
# fail fast. For docker-only runs (no secrets) use _NullUndefined so
# cloud placeholders silently resolve to empty strings.
undefined_cls = StrictUndefined if secrets_b64 else _NullUndefined
env = Environment(
loader=BaseLoader(),
undefined=undefined_cls,
keep_trailing_newline=True,
)
env.filters["toyaml"] = _yaml_inline
tmpl = env.from_string(template.read_text())
rendered = tmpl.render(**context)
output.write_text(rendered)
click.echo(f"Wrote {output}", err=True)


if __name__ == "__main__":
main()
88 changes: 88 additions & 0 deletions tests/profiles/profiles.yml.j2
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
elementary_tests:
target: postgres
outputs:

# ── Docker targets (plaintext, no secrets needed) ──────────────────

postgres: &postgres
type: postgres
host: 127.0.0.1
port: 5432
user: admin
password: admin
dbname: postgres
schema: {{ schema_name }}
threads: 32

clickhouse: &clickhouse
type: clickhouse
host: localhost
port: 8123
user: default
password: default
schema: {{ schema_name }}
threads: 4

# ── Cloud targets (secrets substituted at CI time) ─────────────────

snowflake: &snowflake
type: snowflake
account: {{ snowflake_account }}
user: {{ snowflake_user }}
password: {{ snowflake_password }}
role: {{ snowflake_role }}
database: {{ snowflake_database }}
warehouse: {{ snowflake_warehouse }}
schema: {{ schema_name }}
threads: 4

bigquery: &bigquery
type: bigquery
method: service-account-json
project: {{ bigquery_project }}
dataset: {{ schema_name }}
keyfile_json: {{ bigquery_keyfile | toyaml }}
threads: 4

redshift: &redshift
type: redshift
host: {{ redshift_host }}
user: {{ redshift_user }}
password: {{ redshift_password }}
port: {{ redshift_port }}
dbname: {{ redshift_dbname }}
schema: {{ schema_name }}
threads: 4

databricks_catalog: &databricks_catalog
type: databricks
host: {{ databricks_host }}
http_path: {{ databricks_http_path }}
catalog: {{ databricks_catalog }}
schema: {{ schema_name }}
client_id: {{ databricks_client_id }}
client_secret: {{ databricks_client_secret }}
threads: 4

athena: &athena
type: athena
s3_staging_dir: {{ athena_s3_staging_dir }}
s3_data_dir: {{ athena_s3_data_dir }}
region_name: {{ athena_region }}
database: awsdatacatalog
schema: {{ schema_name }}
aws_access_key_id: {{ athena_aws_access_key_id }}
aws_secret_access_key: {{ athena_aws_secret_access_key }}
threads: 4

# The internal CLI dbt_project uses profile "elementary", so we alias the
# same targets but override the schema to <base>_elementary.
elementary:
target: postgres
outputs:
{%- set targets = ['postgres', 'clickhouse', 'snowflake', 'bigquery', 'redshift', 'databricks_catalog', 'athena'] %}
{%- for t in targets %}
{{ t }}:
<<: *{{ t }}
{{ 'dataset' if t == 'bigquery' else 'schema' }}: {{ schema_name }}_elementary
{%- endfor %}
Loading