From ac63b89c4379b186ffe78b31480504520b0ffce4 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Thu, 26 Feb 2026 01:16:16 +0000 Subject: [PATCH 01/17] refactor: replace opaque CI_PROFILES_YML secret with committed template + envsubst - Add profiles.yml.template with plaintext docker targets and ${VAR} cloud placeholders - Update test-warehouse.yml to use CI_WAREHOUSE_SECRETS with envsubst (+ PROFILES_YML fallback) - Split test-all-warehouses.yml into test-docker (pull_request) and test-cloud (pull_request_target) - Guard BigQuery keyfile extraction against missing key - Use explicit envsubst '$SCHEMA_NAME' for fork PR path Co-Authored-By: Itamar Hartstein --- .github/workflows/test-all-warehouses.yml | 33 ++++++--- .github/workflows/test-warehouse.yml | 41 ++++++++++- tests/profiles/profiles.yml.template | 90 +++++++++++++++++++++++ 3 files changed, 153 insertions(+), 11 deletions(-) create mode 100644 tests/profiles/profiles.yml.template diff --git a/.github/workflows/test-all-warehouses.yml b/.github/workflows/test-all-warehouses.yml index a90ba5b72..0fe19f67d 100644 --- a/.github/workflows/test-all-warehouses.yml +++ b/.github/workflows/test-all-warehouses.yml @@ -35,6 +35,27 @@ on: description: Whether to generate new data jobs: + # ── Docker targets ──────────────────────────────────────────────────── + # No secrets needed — run on pull_request (works for forks without approval). + # Skipped on pull_request_target to avoid duplicate runs for internal PRs. + test-docker: + if: github.event_name != 'pull_request_target' + strategy: + fail-fast: false + matrix: + dbt-version: ${{ inputs.dbt-version && fromJSON(format('["{0}"]', inputs.dbt-version)) || fromJSON('[null]') }} + warehouse-type: [postgres, clickhouse] + uses: ./.github/workflows/test-warehouse.yml + with: + warehouse-type: ${{ matrix.warehouse-type }} + elementary-ref: ${{ inputs.elementary-ref || ((github.event_name == 'pull_request_target' || github.event_name == 'pull_request') && github.event.pull_request.head.sha) || '' }} + dbt-data-reliability-ref: ${{ inputs.dbt-data-reliability-ref }} + dbt-version: ${{ matrix.dbt-version }} + generate-data: ${{ inputs.generate-data || false }} + + # ── Cloud targets ───────────────────────────────────────────────────── + # Require secrets — use fork check / approval gate for pull_request_target. + # Determine if this is a fork PR and skip if wrong trigger is used check-fork-status: runs-on: ubuntu-latest @@ -74,7 +95,7 @@ jobs: - name: Approved run: echo "Fork PR approved for testing" - test: + test-cloud: needs: [check-fork-status, approve-fork] if: | ! cancelled() && @@ -86,15 +107,7 @@ jobs: matrix: dbt-version: ${{ inputs.dbt-version && fromJSON(format('["{0}"]', inputs.dbt-version)) || fromJSON('[null]') }} warehouse-type: - [ - postgres, - snowflake, - bigquery, - redshift, - databricks_catalog, - athena, - clickhouse, - ] + [snowflake, bigquery, redshift, databricks_catalog, athena] uses: ./.github/workflows/test-warehouse.yml with: warehouse-type: ${{ matrix.warehouse-type }} diff --git a/.github/workflows/test-warehouse.yml b/.github/workflows/test-warehouse.yml index 0a1f7fd71..c9aa46b9e 100644 --- a/.github/workflows/test-warehouse.yml +++ b/.github/workflows/test-warehouse.yml @@ -114,12 +114,51 @@ jobs: - name: Write dbt profiles env: + # New lean JSON secret (base64 encoded) + SECRETS_JSON: ${{ secrets.CI_WAREHOUSE_SECRETS || '' }} + # Backwards-compat fallback while migrating secrets PROFILES_YML: ${{ secrets.CI_PROFILES_YML }} run: | mkdir -p ~/.dbt + + if ! command -v envsubst >/dev/null 2>&1; then + sudo apt-get update + sudo apt-get install -y gettext-base + fi + DBT_VERSION=$(pip show dbt-core | grep -i version | awk '{print $2}' | sed 's/\.//g') UNDERSCORED_REF_NAME=$(echo "${{ inputs.warehouse-type }}_dbt_${DBT_VERSION}_${BRANCH_NAME}" | awk '{print tolower($0)}' | head -c 40 | sed "s/[-\/]/_/g") - echo "$PROFILES_YML" | base64 -d | sed "s//py_$UNDERSCORED_REF_NAME/g" > ~/.dbt/profiles.yml + export SCHEMA_NAME="py_$UNDERSCORED_REF_NAME" + + if [ -n "$SECRETS_JSON" ]; then + DECODED=$(echo "$SECRETS_JSON" | base64 -d) + BQ_KEYFILE_CONTENT=$(echo "$DECODED" | jq -r '.BIGQUERY_KEYFILE // empty') + if [ -n "$BQ_KEYFILE_CONTENT" ]; then + BIGQUERY_KEYFILE_PATH="$(mktemp /tmp/bigquery_keyfile.XXXXXX.json)" + echo "$BQ_KEYFILE_CONTENT" > "$BIGQUERY_KEYFILE_PATH" + chmod 600 "$BIGQUERY_KEYFILE_PATH" + export BIGQUERY_KEYFILE_PATH + fi + + while IFS= read -r entry; do + key=$(jq -r '.key' <<<"$entry") + value=$(jq -r '.value' <<<"$entry") + if [[ "$key" =~ ^[A-Z_][A-Z0-9_]*$ ]]; then + printf -v "$key" '%s' "$value" + export "$key" + else + echo "Skipping invalid secret key: $key" >&2 + fi + done < <(echo "$DECODED" | jq -c 'to_entries[] | select(.key != "BIGQUERY_KEYFILE" and (.value | type == "string"))') + + envsubst < "${{ github.workspace }}/elementary/tests/profiles/profiles.yml.template" > ~/.dbt/profiles.yml + elif [ -n "$PROFILES_YML" ]; then + echo "$PROFILES_YML" | base64 -d | sed "s//$SCHEMA_NAME/g" > ~/.dbt/profiles.yml + else + # Fork PRs: no secrets available — only substitute SCHEMA_NAME so + # cloud-target placeholders stay as-is (only docker targets work). + envsubst '$SCHEMA_NAME' < "${{ github.workspace }}/elementary/tests/profiles/profiles.yml.template" > ~/.dbt/profiles.yml + fi - name: Run Python package unit tests run: pytest -vv tests/unit --warehouse-type ${{ inputs.warehouse-type }} diff --git a/tests/profiles/profiles.yml.template b/tests/profiles/profiles.yml.template new file mode 100644 index 000000000..dfc73fccc --- /dev/null +++ b/tests/profiles/profiles.yml.template @@ -0,0 +1,90 @@ +elementary_tests: + target: postgres + outputs: &outputs + + # ── Docker targets (plaintext, no secrets needed) ────────────────── + + postgres: + type: postgres + host: 127.0.0.1 + port: 5432 + user: admin + password: admin + dbname: postgres + schema: ${SCHEMA_NAME} + threads: 32 + + clickhouse: + type: clickhouse + host: localhost + port: 8123 + user: default + password: default + schema: ${SCHEMA_NAME} + threads: 4 + + # ── Cloud targets (secrets substituted at CI time) ───────────────── + + snowflake: + type: snowflake + account: ${SNOWFLAKE_ACCOUNT} + user: ${SNOWFLAKE_USER} + password: ${SNOWFLAKE_PASSWORD} + role: ${SNOWFLAKE_ROLE} + database: ${SNOWFLAKE_DATABASE} + warehouse: ${SNOWFLAKE_WAREHOUSE} + schema: ${SCHEMA_NAME} + threads: 4 + + bigquery: + type: bigquery + method: service-account + project: ${BIGQUERY_PROJECT} + dataset: ${SCHEMA_NAME} + keyfile: ${BIGQUERY_KEYFILE_PATH} + threads: 4 + + redshift: + type: redshift + host: ${REDSHIFT_HOST} + user: ${REDSHIFT_USER} + password: ${REDSHIFT_PASSWORD} + port: ${REDSHIFT_PORT} + dbname: ${REDSHIFT_DBNAME} + schema: ${SCHEMA_NAME} + threads: 4 + + databricks_catalog: + type: databricks + host: ${DATABRICKS_HOST} + http_path: ${DATABRICKS_HTTP_PATH} + catalog: ${DATABRICKS_CATALOG} + schema: ${SCHEMA_NAME} + client_id: ${DATABRICKS_CLIENT_ID} + client_secret: ${DATABRICKS_CLIENT_SECRET} + threads: 4 + + spark: + type: spark + method: http + host: ${SPARK_HOST} + token: ${SPARK_TOKEN} + cluster: ${SPARK_CLUSTER} + schema: ${SCHEMA_NAME} + threads: 4 + + athena: + type: athena + s3_staging_dir: ${ATHENA_S3_STAGING_DIR} + s3_data_dir: ${ATHENA_S3_DATA_DIR} + region_name: ${ATHENA_REGION} + database: awsdatacatalog + schema: ${SCHEMA_NAME} + aws_access_key_id: ${ATHENA_AWS_ACCESS_KEY_ID} + aws_secret_access_key: ${ATHENA_AWS_SECRET_ACCESS_KEY} + threads: 4 + +# The internal CLI dbt_project uses profile "elementary", so we alias the same outputs. +elementary: + target: postgres + outputs: *outputs From c2a16d4b21b3b9a2e35c7711cdbd486805969934 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Thu, 26 Feb 2026 01:22:11 +0000 Subject: [PATCH 02/17] fix: guard secret-dependent steps for docker targets (postgres, clickhouse) Co-Authored-By: Itamar Hartstein --- .github/workflows/test-warehouse.yml | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/.github/workflows/test-warehouse.yml b/.github/workflows/test-warehouse.yml index c9aa46b9e..54b92da39 100644 --- a/.github/workflows/test-warehouse.yml +++ b/.github/workflows/test-warehouse.yml @@ -220,6 +220,7 @@ jobs: run: edr --help - name: Run monitor + if: ${{ !contains(fromJSON('["postgres", "clickhouse"]'), inputs.warehouse-type) }} env: SLACK_WEBHOOK: ${{ secrets.CI_SLACK_WEBHOOK }} run: > @@ -231,12 +232,14 @@ jobs: --slack-webhook "$SLACK_WEBHOOK" - name: Validate alerts statuses were updated + if: ${{ !contains(fromJSON('["postgres", "clickhouse"]'), inputs.warehouse-type) }} working-directory: ${{ env.CLI_INTERNAL_DBT_PKG_DIR }} run: | dbt deps dbt run-operation validate_alert_statuses_are_updated -t "${{ inputs.warehouse-type }}" - name: Run report + if: ${{ !contains(fromJSON('["postgres", "clickhouse"]'), inputs.warehouse-type) }} run: > edr monitor report -t "${{ inputs.warehouse-type }}" @@ -244,23 +247,27 @@ jobs: --project-profile-target "${{ inputs.warehouse-type }}" - name: Set report artifact name + if: ${{ !contains(fromJSON('["postgres", "clickhouse"]'), inputs.warehouse-type) }} id: set_report_artifact_name run: | ARTIFACT_NAME=$(echo "report_${{ inputs.warehouse-type }}_${BRANCH_NAME}_dbt_${{ inputs.dbt-version || '' }}.html" | awk '{print tolower($0)}' | sed 's#[":/\\<>|*?-]#_#g') echo "artifact_name=$ARTIFACT_NAME" >> "$GITHUB_OUTPUT" - name: Upload report artifact + if: ${{ !contains(fromJSON('["postgres", "clickhouse"]'), inputs.warehouse-type) }} uses: actions/upload-artifact@v4 with: name: ${{ steps.set_report_artifact_name.outputs.artifact_name }} path: elementary/edr_target/elementary_report.html - name: Write GCS keyfile + if: ${{ !contains(fromJSON('["postgres", "clickhouse"]'), inputs.warehouse-type) }} env: GCS_KEYFILE: ${{ secrets.GCS_KEYFILE }} run: echo "$GCS_KEYFILE" | base64 -d > /tmp/gcs_keyfile.json - name: Run send report + if: ${{ !contains(fromJSON('["postgres", "clickhouse"]'), inputs.warehouse-type) }} env: SLACK_TOKEN: ${{ secrets.CI_SLACK_TOKEN }} AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} @@ -285,17 +292,19 @@ jobs: --update-bucket-website true - name: Set artifact name + if: ${{ !contains(fromJSON('["postgres", "clickhouse"]'), inputs.warehouse-type) }} id: set_artifact_name run: | ARTIFACT_NAME=$(echo "edr_${{ inputs.warehouse-type }}_${BRANCH_NAME}_dbt_${{ inputs.dbt-version || '' }}.log" | awk '{print tolower($0)}' | sed 's#[":/\\<>|*?-]#_#g') echo "artifact_name=$ARTIFACT_NAME" >> "$GITHUB_OUTPUT" - name: Upload edr log - if: ${{ always() }} + if: ${{ always() && !contains(fromJSON('["postgres", "clickhouse"]'), inputs.warehouse-type) }} uses: actions/upload-artifact@v4 with: name: ${{ steps.set_artifact_name.outputs.artifact_name }} path: elementary/edr_target/edr.log - name: Run Python package e2e tests + if: ${{ !contains(fromJSON('["postgres", "clickhouse"]'), inputs.warehouse-type) }} run: pytest -vv tests/e2e --warehouse-type ${{ inputs.warehouse-type }} From cf1f2221d0b7c721fbe01c8ee01d27c6d2126432 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Thu, 26 Feb 2026 01:44:54 +0000 Subject: [PATCH 03/17] refactor: replace shell-based profiles generation with Python CLI script + Jinja2 template - Add generate_profiles.py (click CLI) for rendering profiles from secrets - Convert profiles.yml.template to profiles.yml.j2 (Jinja2) - Use service-account-json method for BigQuery (no temp keyfile needed) - Elementary profile schema uses _elementary suffix via YAML merge keys - Simplify test-warehouse.yml Write dbt profiles step Co-Authored-By: Itamar Hartstein --- .github/workflows/test-warehouse.yml | 49 ++-------- tests/profiles/generate_profiles.py | 129 +++++++++++++++++++++++++++ tests/profiles/profiles.yml.j2 | 103 +++++++++++++++++++++ tests/profiles/profiles.yml.template | 90 ------------------- 4 files changed, 240 insertions(+), 131 deletions(-) create mode 100644 tests/profiles/generate_profiles.py create mode 100644 tests/profiles/profiles.yml.j2 delete mode 100644 tests/profiles/profiles.yml.template diff --git a/.github/workflows/test-warehouse.yml b/.github/workflows/test-warehouse.yml index 54b92da39..5c904c252 100644 --- a/.github/workflows/test-warehouse.yml +++ b/.github/workflows/test-warehouse.yml @@ -114,51 +114,18 @@ jobs: - name: Write dbt profiles env: - # New lean JSON secret (base64 encoded) - SECRETS_JSON: ${{ secrets.CI_WAREHOUSE_SECRETS || '' }} + CI_WAREHOUSE_SECRETS: ${{ secrets.CI_WAREHOUSE_SECRETS || '' }} # Backwards-compat fallback while migrating secrets - PROFILES_YML: ${{ secrets.CI_PROFILES_YML }} + CI_PROFILES_YML: ${{ secrets.CI_PROFILES_YML }} run: | - mkdir -p ~/.dbt - - if ! command -v envsubst >/dev/null 2>&1; then - sudo apt-get update - sudo apt-get install -y gettext-base - fi - DBT_VERSION=$(pip show dbt-core | grep -i version | awk '{print $2}' | sed 's/\.//g') UNDERSCORED_REF_NAME=$(echo "${{ inputs.warehouse-type }}_dbt_${DBT_VERSION}_${BRANCH_NAME}" | awk '{print tolower($0)}' | head -c 40 | sed "s/[-\/]/_/g") - export SCHEMA_NAME="py_$UNDERSCORED_REF_NAME" - - if [ -n "$SECRETS_JSON" ]; then - DECODED=$(echo "$SECRETS_JSON" | base64 -d) - BQ_KEYFILE_CONTENT=$(echo "$DECODED" | jq -r '.BIGQUERY_KEYFILE // empty') - if [ -n "$BQ_KEYFILE_CONTENT" ]; then - BIGQUERY_KEYFILE_PATH="$(mktemp /tmp/bigquery_keyfile.XXXXXX.json)" - echo "$BQ_KEYFILE_CONTENT" > "$BIGQUERY_KEYFILE_PATH" - chmod 600 "$BIGQUERY_KEYFILE_PATH" - export BIGQUERY_KEYFILE_PATH - fi - - while IFS= read -r entry; do - key=$(jq -r '.key' <<<"$entry") - value=$(jq -r '.value' <<<"$entry") - if [[ "$key" =~ ^[A-Z_][A-Z0-9_]*$ ]]; then - printf -v "$key" '%s' "$value" - export "$key" - else - echo "Skipping invalid secret key: $key" >&2 - fi - done < <(echo "$DECODED" | jq -c 'to_entries[] | select(.key != "BIGQUERY_KEYFILE" and (.value | type == "string"))') - - envsubst < "${{ github.workspace }}/elementary/tests/profiles/profiles.yml.template" > ~/.dbt/profiles.yml - elif [ -n "$PROFILES_YML" ]; then - echo "$PROFILES_YML" | base64 -d | sed "s//$SCHEMA_NAME/g" > ~/.dbt/profiles.yml - else - # Fork PRs: no secrets available — only substitute SCHEMA_NAME so - # cloud-target placeholders stay as-is (only docker targets work). - envsubst '$SCHEMA_NAME' < "${{ github.workspace }}/elementary/tests/profiles/profiles.yml.template" > ~/.dbt/profiles.yml - fi + + python "${{ github.workspace }}/elementary/tests/profiles/generate_profiles.py" \ + --template "${{ github.workspace }}/elementary/tests/profiles/profiles.yml.j2" \ + --output ~/.dbt/profiles.yml \ + --schema-name "py_$UNDERSCORED_REF_NAME" \ + --profiles-yml-env CI_PROFILES_YML - name: Run Python package unit tests run: pytest -vv tests/unit --warehouse-type ${{ inputs.warehouse-type }} diff --git a/tests/profiles/generate_profiles.py b/tests/profiles/generate_profiles.py new file mode 100644 index 000000000..c65bbe739 --- /dev/null +++ b/tests/profiles/generate_profiles.py @@ -0,0 +1,129 @@ +#!/usr/bin/env python3 +"""Generate ~/.dbt/profiles.yml from a Jinja2 template and an optional secrets JSON.""" + +from __future__ import annotations + +import base64 +import json +import os +from pathlib import Path +from typing import Any + +import click +import yaml +from jinja2 import BaseLoader, Environment, Undefined + + +class _NullUndefined(Undefined): + """Render missing variables as empty strings so docker-only runs don't crash.""" + + def __str__(self) -> str: + return "" + + def __iter__(self): + return iter([]) + + def __bool__(self) -> bool: + return False + + +def _yaml_inline(value: Any) -> str: + """Dump *value* as a compact inline YAML scalar / mapping.""" + if isinstance(value, Undefined): + return "{}" + return yaml.dump(value, default_flow_style=True).strip() + + +@click.command() +@click.option( + "--template", + required=True, + type=click.Path(exists=True, dir_okay=False, path_type=Path), + help="Path to the Jinja2 profiles template (e.g. profiles.yml.j2).", +) +@click.option( + "--output", + required=True, + type=click.Path(dir_okay=False, path_type=Path), + help="Destination path for the rendered profiles.yml.", +) +@click.option( + "--schema-name", + required=True, + help="Base schema name (e.g. dbt_pkg_ or py_).", +) +@click.option( + "--secrets-json-env", + default="CI_WAREHOUSE_SECRETS", + show_default=True, + help="Name of the env-var holding the base64-encoded JSON secrets blob.", +) +@click.option( + "--profiles-yml-env", + default="", + help="Name of an env-var holding a legacy base64-encoded profiles.yml (fallback).", +) +def main( + template: Path, + output: Path, + schema_name: str, + secrets_json_env: str, + profiles_yml_env: str, +) -> None: + """Render a Jinja2 profiles template into a dbt profiles.yml file. + + Resolution order: + 1. If the env-var named by ``--secrets-json-env`` is set, decode it and + use its key/value pairs (plus *schema_name*) as template variables. + 2. Else if ``--profiles-yml-env`` names a non-empty env-var, decode that + as a legacy base64 profiles.yml and write it directly (replacing + ```` with *schema_name*). + 3. Otherwise render the template with only *schema_name* populated (all + other variables resolve to empty strings — suitable for docker-only + targets on fork PRs). + """ + output.parent.mkdir(parents=True, exist_ok=True) + + secrets_b64 = os.environ.get(secrets_json_env, "").strip() + legacy_b64 = os.environ.get(profiles_yml_env, "").strip() if profiles_yml_env else "" + + # ── Path 2: legacy base64 profiles.yml ────────────────────────────── + if not secrets_b64 and legacy_b64: + click.echo("Using legacy base64 profiles.yml fallback.", err=True) + content = base64.b64decode(legacy_b64).decode() + content = content.replace("", schema_name) + output.write_text(content) + return + + # ── Build template context ────────────────────────────────────────── + context: dict[str, object] = {"schema_name": schema_name} + + if secrets_b64: + decoded: dict = json.loads(base64.b64decode(secrets_b64)) + for key, value in decoded.items(): + context[key.lower()] = value + click.echo( + f"Loaded {len(decoded)} secret(s) from ${secrets_json_env}.", + err=True, + ) + else: + click.echo( + "No secrets found — rendering template for docker-only targets.", + err=True, + ) + + # ── Render ────────────────────────────────────────────────────────── + env = Environment( + loader=BaseLoader(), + undefined=_NullUndefined, + keep_trailing_newline=True, + ) + env.filters["toyaml"] = _yaml_inline + tmpl = env.from_string(template.read_text()) + rendered = tmpl.render(**context) + output.write_text(rendered) + click.echo(f"Wrote {output}", err=True) + + +if __name__ == "__main__": + main() diff --git a/tests/profiles/profiles.yml.j2 b/tests/profiles/profiles.yml.j2 new file mode 100644 index 000000000..b8d1cffcb --- /dev/null +++ b/tests/profiles/profiles.yml.j2 @@ -0,0 +1,103 @@ +elementary_tests: + target: postgres + outputs: + + # ── Docker targets (plaintext, no secrets needed) ────────────────── + + postgres: &postgres + type: postgres + host: 127.0.0.1 + port: 5432 + user: admin + password: admin + dbname: postgres + schema: {{ schema_name }} + threads: 32 + + clickhouse: &clickhouse + type: clickhouse + host: localhost + port: 8123 + user: default + password: default + schema: {{ schema_name }} + threads: 4 + + # ── Cloud targets (secrets substituted at CI time) ───────────────── + + snowflake: &snowflake + type: snowflake + account: {{ snowflake_account }} + user: {{ snowflake_user }} + password: {{ snowflake_password }} + role: {{ snowflake_role }} + database: {{ snowflake_database }} + warehouse: {{ snowflake_warehouse }} + schema: {{ schema_name }} + threads: 4 + + bigquery: &bigquery + type: bigquery + method: service-account-json + project: {{ bigquery_project }} + dataset: {{ schema_name }} + keyfile_json: {{ bigquery_keyfile | toyaml }} + threads: 4 + + redshift: &redshift + type: redshift + host: {{ redshift_host }} + user: {{ redshift_user }} + password: {{ redshift_password }} + port: {{ redshift_port }} + dbname: {{ redshift_dbname }} + schema: {{ schema_name }} + threads: 4 + + databricks_catalog: &databricks_catalog + type: databricks + host: {{ databricks_host }} + http_path: {{ databricks_http_path }} + catalog: {{ databricks_catalog }} + schema: {{ schema_name }} + client_id: {{ databricks_client_id }} + client_secret: {{ databricks_client_secret }} + threads: 4 + + athena: &athena + type: athena + s3_staging_dir: {{ athena_s3_staging_dir }} + s3_data_dir: {{ athena_s3_data_dir }} + region_name: {{ athena_region }} + database: awsdatacatalog + schema: {{ schema_name }} + aws_access_key_id: {{ athena_aws_access_key_id }} + aws_secret_access_key: {{ athena_aws_secret_access_key }} + threads: 4 + +# The internal CLI dbt_project uses profile "elementary", so we alias the +# same targets but override the schema to _elementary. +elementary: + target: postgres + outputs: + postgres: + <<: *postgres + schema: {{ schema_name }}_elementary + clickhouse: + <<: *clickhouse + schema: {{ schema_name }}_elementary + snowflake: + <<: *snowflake + schema: {{ schema_name }}_elementary + bigquery: + <<: *bigquery + dataset: {{ schema_name }}_elementary + redshift: + <<: *redshift + schema: {{ schema_name }}_elementary + databricks_catalog: + <<: *databricks_catalog + schema: {{ schema_name }}_elementary + athena: + <<: *athena + schema: {{ schema_name }}_elementary diff --git a/tests/profiles/profiles.yml.template b/tests/profiles/profiles.yml.template deleted file mode 100644 index dfc73fccc..000000000 --- a/tests/profiles/profiles.yml.template +++ /dev/null @@ -1,90 +0,0 @@ -elementary_tests: - target: postgres - outputs: &outputs - - # ── Docker targets (plaintext, no secrets needed) ────────────────── - - postgres: - type: postgres - host: 127.0.0.1 - port: 5432 - user: admin - password: admin - dbname: postgres - schema: ${SCHEMA_NAME} - threads: 32 - - clickhouse: - type: clickhouse - host: localhost - port: 8123 - user: default - password: default - schema: ${SCHEMA_NAME} - threads: 4 - - # ── Cloud targets (secrets substituted at CI time) ───────────────── - - snowflake: - type: snowflake - account: ${SNOWFLAKE_ACCOUNT} - user: ${SNOWFLAKE_USER} - password: ${SNOWFLAKE_PASSWORD} - role: ${SNOWFLAKE_ROLE} - database: ${SNOWFLAKE_DATABASE} - warehouse: ${SNOWFLAKE_WAREHOUSE} - schema: ${SCHEMA_NAME} - threads: 4 - - bigquery: - type: bigquery - method: service-account - project: ${BIGQUERY_PROJECT} - dataset: ${SCHEMA_NAME} - keyfile: ${BIGQUERY_KEYFILE_PATH} - threads: 4 - - redshift: - type: redshift - host: ${REDSHIFT_HOST} - user: ${REDSHIFT_USER} - password: ${REDSHIFT_PASSWORD} - port: ${REDSHIFT_PORT} - dbname: ${REDSHIFT_DBNAME} - schema: ${SCHEMA_NAME} - threads: 4 - - databricks_catalog: - type: databricks - host: ${DATABRICKS_HOST} - http_path: ${DATABRICKS_HTTP_PATH} - catalog: ${DATABRICKS_CATALOG} - schema: ${SCHEMA_NAME} - client_id: ${DATABRICKS_CLIENT_ID} - client_secret: ${DATABRICKS_CLIENT_SECRET} - threads: 4 - - spark: - type: spark - method: http - host: ${SPARK_HOST} - token: ${SPARK_TOKEN} - cluster: ${SPARK_CLUSTER} - schema: ${SCHEMA_NAME} - threads: 4 - - athena: - type: athena - s3_staging_dir: ${ATHENA_S3_STAGING_DIR} - s3_data_dir: ${ATHENA_S3_DATA_DIR} - region_name: ${ATHENA_REGION} - database: awsdatacatalog - schema: ${SCHEMA_NAME} - aws_access_key_id: ${ATHENA_AWS_ACCESS_KEY_ID} - aws_secret_access_key: ${ATHENA_AWS_SECRET_ACCESS_KEY} - threads: 4 - -# The internal CLI dbt_project uses profile "elementary", so we alias the same outputs. -elementary: - target: postgres - outputs: *outputs From 451ebb3f86e43605e383798a1c7ee6f4ec6d7d2e Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Thu, 26 Feb 2026 01:50:39 +0000 Subject: [PATCH 04/17] refactor: use Jinja for-loop for elementary profile, remove legacy fallback Co-Authored-By: Itamar Hartstein --- .github/workflows/test-warehouse.yml | 5 +---- tests/profiles/generate_profiles.py | 27 +++++++-------------------- tests/profiles/profiles.yml.j2 | 27 ++++++--------------------- 3 files changed, 14 insertions(+), 45 deletions(-) diff --git a/.github/workflows/test-warehouse.yml b/.github/workflows/test-warehouse.yml index 5c904c252..b76da891d 100644 --- a/.github/workflows/test-warehouse.yml +++ b/.github/workflows/test-warehouse.yml @@ -115,8 +115,6 @@ jobs: - name: Write dbt profiles env: CI_WAREHOUSE_SECRETS: ${{ secrets.CI_WAREHOUSE_SECRETS || '' }} - # Backwards-compat fallback while migrating secrets - CI_PROFILES_YML: ${{ secrets.CI_PROFILES_YML }} run: | DBT_VERSION=$(pip show dbt-core | grep -i version | awk '{print $2}' | sed 's/\.//g') UNDERSCORED_REF_NAME=$(echo "${{ inputs.warehouse-type }}_dbt_${DBT_VERSION}_${BRANCH_NAME}" | awk '{print tolower($0)}' | head -c 40 | sed "s/[-\/]/_/g") @@ -124,8 +122,7 @@ jobs: python "${{ github.workspace }}/elementary/tests/profiles/generate_profiles.py" \ --template "${{ github.workspace }}/elementary/tests/profiles/profiles.yml.j2" \ --output ~/.dbt/profiles.yml \ - --schema-name "py_$UNDERSCORED_REF_NAME" \ - --profiles-yml-env CI_PROFILES_YML + --schema-name "py_$UNDERSCORED_REF_NAME" - name: Run Python package unit tests run: pytest -vv tests/unit --warehouse-type ${{ inputs.warehouse-type }} diff --git a/tests/profiles/generate_profiles.py b/tests/profiles/generate_profiles.py index c65bbe739..74c0c8ca8 100644 --- a/tests/profiles/generate_profiles.py +++ b/tests/profiles/generate_profiles.py @@ -58,48 +58,35 @@ def _yaml_inline(value: Any) -> str: show_default=True, help="Name of the env-var holding the base64-encoded JSON secrets blob.", ) -@click.option( - "--profiles-yml-env", - default="", - help="Name of an env-var holding a legacy base64-encoded profiles.yml (fallback).", -) def main( template: Path, output: Path, schema_name: str, secrets_json_env: str, - profiles_yml_env: str, ) -> None: """Render a Jinja2 profiles template into a dbt profiles.yml file. Resolution order: 1. If the env-var named by ``--secrets-json-env`` is set, decode it and use its key/value pairs (plus *schema_name*) as template variables. - 2. Else if ``--profiles-yml-env`` names a non-empty env-var, decode that - as a legacy base64 profiles.yml and write it directly (replacing - ```` with *schema_name*). - 3. Otherwise render the template with only *schema_name* populated (all + 2. Otherwise render the template with only *schema_name* populated (all other variables resolve to empty strings — suitable for docker-only targets on fork PRs). """ output.parent.mkdir(parents=True, exist_ok=True) secrets_b64 = os.environ.get(secrets_json_env, "").strip() - legacy_b64 = os.environ.get(profiles_yml_env, "").strip() if profiles_yml_env else "" - - # ── Path 2: legacy base64 profiles.yml ────────────────────────────── - if not secrets_b64 and legacy_b64: - click.echo("Using legacy base64 profiles.yml fallback.", err=True) - content = base64.b64decode(legacy_b64).decode() - content = content.replace("", schema_name) - output.write_text(content) - return # ── Build template context ────────────────────────────────────────── context: dict[str, object] = {"schema_name": schema_name} if secrets_b64: - decoded: dict = json.loads(base64.b64decode(secrets_b64)) + try: + decoded: dict = json.loads(base64.b64decode(secrets_b64)) + except (ValueError, json.JSONDecodeError) as e: + raise click.ClickException( + f"Failed to decode ${secrets_json_env}: {e}" + ) from e for key, value in decoded.items(): context[key.lower()] = value click.echo( diff --git a/tests/profiles/profiles.yml.j2 b/tests/profiles/profiles.yml.j2 index b8d1cffcb..dd700bd6d 100644 --- a/tests/profiles/profiles.yml.j2 +++ b/tests/profiles/profiles.yml.j2 @@ -80,24 +80,9 @@ elementary_tests: elementary: target: postgres outputs: - postgres: - <<: *postgres - schema: {{ schema_name }}_elementary - clickhouse: - <<: *clickhouse - schema: {{ schema_name }}_elementary - snowflake: - <<: *snowflake - schema: {{ schema_name }}_elementary - bigquery: - <<: *bigquery - dataset: {{ schema_name }}_elementary - redshift: - <<: *redshift - schema: {{ schema_name }}_elementary - databricks_catalog: - <<: *databricks_catalog - schema: {{ schema_name }}_elementary - athena: - <<: *athena - schema: {{ schema_name }}_elementary +{%- set targets = ['postgres', 'clickhouse', 'snowflake', 'bigquery', 'redshift', 'databricks_catalog', 'athena'] %} +{%- for t in targets %} + {{ t }}: + <<: *{{ t }} + {{ 'dataset' if t == 'bigquery' else 'schema' }}: {{ schema_name }}_elementary +{%- endfor %} From 01ddcd519f7db4e03cc92f5dd6ab6e37bd5cb81f Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Thu, 26 Feb 2026 01:52:04 +0000 Subject: [PATCH 05/17] feat: use StrictUndefined when secrets are present to catch typos Co-Authored-By: Itamar Hartstein --- tests/profiles/generate_profiles.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/profiles/generate_profiles.py b/tests/profiles/generate_profiles.py index 74c0c8ca8..e5b977b66 100644 --- a/tests/profiles/generate_profiles.py +++ b/tests/profiles/generate_profiles.py @@ -11,7 +11,7 @@ import click import yaml -from jinja2 import BaseLoader, Environment, Undefined +from jinja2 import BaseLoader, Environment, StrictUndefined, Undefined class _NullUndefined(Undefined): @@ -100,9 +100,13 @@ def main( ) # ── Render ────────────────────────────────────────────────────────── + # When secrets are loaded, use StrictUndefined so typos in secret keys + # fail fast. For docker-only runs (no secrets) use _NullUndefined so + # cloud placeholders silently resolve to empty strings. + undefined_cls = StrictUndefined if secrets_b64 else _NullUndefined env = Environment( loader=BaseLoader(), - undefined=_NullUndefined, + undefined=undefined_cls, keep_trailing_newline=True, ) env.filters["toyaml"] = _yaml_inline From b0cc1f4551ebaee0e04d65aacaacb8fd89c164e4 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Thu, 26 Feb 2026 01:58:22 +0000 Subject: [PATCH 06/17] fix: add isinstance(decoded, dict) validation for secrets JSON Co-Authored-By: Itamar Hartstein --- tests/profiles/generate_profiles.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/profiles/generate_profiles.py b/tests/profiles/generate_profiles.py index e5b977b66..fb12a8f52 100644 --- a/tests/profiles/generate_profiles.py +++ b/tests/profiles/generate_profiles.py @@ -87,6 +87,11 @@ def main( raise click.ClickException( f"Failed to decode ${secrets_json_env}: {e}" ) from e + if not isinstance(decoded, dict): + raise click.ClickException( + f"Expected JSON object for ${secrets_json_env}, " + f"got {type(decoded).__name__}" + ) for key, value in decoded.items(): context[key.lower()] = value click.echo( From a0511885ced5e0533dde6cf16ca844a65df4ede7 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Thu, 26 Feb 2026 02:04:58 +0000 Subject: [PATCH 07/17] fix: add auth_type:oauth for databricks, catch binascii.Error Co-Authored-By: Itamar Hartstein --- tests/profiles/generate_profiles.py | 3 ++- tests/profiles/profiles.yml.j2 | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/profiles/generate_profiles.py b/tests/profiles/generate_profiles.py index fb12a8f52..38c1aaeef 100644 --- a/tests/profiles/generate_profiles.py +++ b/tests/profiles/generate_profiles.py @@ -4,6 +4,7 @@ from __future__ import annotations import base64 +import binascii import json import os from pathlib import Path @@ -83,7 +84,7 @@ def main( if secrets_b64: try: decoded: dict = json.loads(base64.b64decode(secrets_b64)) - except (ValueError, json.JSONDecodeError) as e: + except (binascii.Error, json.JSONDecodeError) as e: raise click.ClickException( f"Failed to decode ${secrets_json_env}: {e}" ) from e diff --git a/tests/profiles/profiles.yml.j2 b/tests/profiles/profiles.yml.j2 index dd700bd6d..ced5ef83d 100644 --- a/tests/profiles/profiles.yml.j2 +++ b/tests/profiles/profiles.yml.j2 @@ -60,6 +60,7 @@ elementary_tests: http_path: {{ databricks_http_path }} catalog: {{ databricks_catalog }} schema: {{ schema_name }} + auth_type: oauth client_id: {{ databricks_client_id }} client_secret: {{ databricks_client_secret }} threads: 4 From a92a5b4c54ce083b97a6c3f04220fae66159a3bd Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Thu, 26 Feb 2026 02:10:10 +0000 Subject: [PATCH 08/17] =?UTF-8?q?fix:=20revert=20docker/cloud=20split=20in?= =?UTF-8?q?=20elementary=20=E2=80=94=20run=20all=20targets=20under=20pull?= =?UTF-8?q?=5Frequest=5Ftarget?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Itamar Hartstein --- .github/workflows/test-all-warehouses.yml | 33 +++++++---------------- .github/workflows/test-warehouse.yml | 11 +------- 2 files changed, 11 insertions(+), 33 deletions(-) diff --git a/.github/workflows/test-all-warehouses.yml b/.github/workflows/test-all-warehouses.yml index 0fe19f67d..a90ba5b72 100644 --- a/.github/workflows/test-all-warehouses.yml +++ b/.github/workflows/test-all-warehouses.yml @@ -35,27 +35,6 @@ on: description: Whether to generate new data jobs: - # ── Docker targets ──────────────────────────────────────────────────── - # No secrets needed — run on pull_request (works for forks without approval). - # Skipped on pull_request_target to avoid duplicate runs for internal PRs. - test-docker: - if: github.event_name != 'pull_request_target' - strategy: - fail-fast: false - matrix: - dbt-version: ${{ inputs.dbt-version && fromJSON(format('["{0}"]', inputs.dbt-version)) || fromJSON('[null]') }} - warehouse-type: [postgres, clickhouse] - uses: ./.github/workflows/test-warehouse.yml - with: - warehouse-type: ${{ matrix.warehouse-type }} - elementary-ref: ${{ inputs.elementary-ref || ((github.event_name == 'pull_request_target' || github.event_name == 'pull_request') && github.event.pull_request.head.sha) || '' }} - dbt-data-reliability-ref: ${{ inputs.dbt-data-reliability-ref }} - dbt-version: ${{ matrix.dbt-version }} - generate-data: ${{ inputs.generate-data || false }} - - # ── Cloud targets ───────────────────────────────────────────────────── - # Require secrets — use fork check / approval gate for pull_request_target. - # Determine if this is a fork PR and skip if wrong trigger is used check-fork-status: runs-on: ubuntu-latest @@ -95,7 +74,7 @@ jobs: - name: Approved run: echo "Fork PR approved for testing" - test-cloud: + test: needs: [check-fork-status, approve-fork] if: | ! cancelled() && @@ -107,7 +86,15 @@ jobs: matrix: dbt-version: ${{ inputs.dbt-version && fromJSON(format('["{0}"]', inputs.dbt-version)) || fromJSON('[null]') }} warehouse-type: - [snowflake, bigquery, redshift, databricks_catalog, athena] + [ + postgres, + snowflake, + bigquery, + redshift, + databricks_catalog, + athena, + clickhouse, + ] uses: ./.github/workflows/test-warehouse.yml with: warehouse-type: ${{ matrix.warehouse-type }} diff --git a/.github/workflows/test-warehouse.yml b/.github/workflows/test-warehouse.yml index b76da891d..fe52d9c7f 100644 --- a/.github/workflows/test-warehouse.yml +++ b/.github/workflows/test-warehouse.yml @@ -184,7 +184,6 @@ jobs: run: edr --help - name: Run monitor - if: ${{ !contains(fromJSON('["postgres", "clickhouse"]'), inputs.warehouse-type) }} env: SLACK_WEBHOOK: ${{ secrets.CI_SLACK_WEBHOOK }} run: > @@ -196,14 +195,12 @@ jobs: --slack-webhook "$SLACK_WEBHOOK" - name: Validate alerts statuses were updated - if: ${{ !contains(fromJSON('["postgres", "clickhouse"]'), inputs.warehouse-type) }} working-directory: ${{ env.CLI_INTERNAL_DBT_PKG_DIR }} run: | dbt deps dbt run-operation validate_alert_statuses_are_updated -t "${{ inputs.warehouse-type }}" - name: Run report - if: ${{ !contains(fromJSON('["postgres", "clickhouse"]'), inputs.warehouse-type) }} run: > edr monitor report -t "${{ inputs.warehouse-type }}" @@ -211,27 +208,23 @@ jobs: --project-profile-target "${{ inputs.warehouse-type }}" - name: Set report artifact name - if: ${{ !contains(fromJSON('["postgres", "clickhouse"]'), inputs.warehouse-type) }} id: set_report_artifact_name run: | ARTIFACT_NAME=$(echo "report_${{ inputs.warehouse-type }}_${BRANCH_NAME}_dbt_${{ inputs.dbt-version || '' }}.html" | awk '{print tolower($0)}' | sed 's#[":/\\<>|*?-]#_#g') echo "artifact_name=$ARTIFACT_NAME" >> "$GITHUB_OUTPUT" - name: Upload report artifact - if: ${{ !contains(fromJSON('["postgres", "clickhouse"]'), inputs.warehouse-type) }} uses: actions/upload-artifact@v4 with: name: ${{ steps.set_report_artifact_name.outputs.artifact_name }} path: elementary/edr_target/elementary_report.html - name: Write GCS keyfile - if: ${{ !contains(fromJSON('["postgres", "clickhouse"]'), inputs.warehouse-type) }} env: GCS_KEYFILE: ${{ secrets.GCS_KEYFILE }} run: echo "$GCS_KEYFILE" | base64 -d > /tmp/gcs_keyfile.json - name: Run send report - if: ${{ !contains(fromJSON('["postgres", "clickhouse"]'), inputs.warehouse-type) }} env: SLACK_TOKEN: ${{ secrets.CI_SLACK_TOKEN }} AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} @@ -256,19 +249,17 @@ jobs: --update-bucket-website true - name: Set artifact name - if: ${{ !contains(fromJSON('["postgres", "clickhouse"]'), inputs.warehouse-type) }} id: set_artifact_name run: | ARTIFACT_NAME=$(echo "edr_${{ inputs.warehouse-type }}_${BRANCH_NAME}_dbt_${{ inputs.dbt-version || '' }}.log" | awk '{print tolower($0)}' | sed 's#[":/\\<>|*?-]#_#g') echo "artifact_name=$ARTIFACT_NAME" >> "$GITHUB_OUTPUT" - name: Upload edr log - if: ${{ always() && !contains(fromJSON('["postgres", "clickhouse"]'), inputs.warehouse-type) }} + if: ${{ always() }} uses: actions/upload-artifact@v4 with: name: ${{ steps.set_artifact_name.outputs.artifact_name }} path: elementary/edr_target/edr.log - name: Run Python package e2e tests - if: ${{ !contains(fromJSON('["postgres", "clickhouse"]'), inputs.warehouse-type) }} run: pytest -vv tests/e2e --warehouse-type ${{ inputs.warehouse-type }} From 22c58496a6857f41e626ab45ac3b60573b0b28aa Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Thu, 26 Feb 2026 09:36:28 +0000 Subject: [PATCH 09/17] fix: add missing bigquery profile fields (location, priority) Co-Authored-By: Itamar Hartstein --- tests/profiles/profiles.yml.j2 | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/profiles/profiles.yml.j2 b/tests/profiles/profiles.yml.j2 index ced5ef83d..752ef20b0 100644 --- a/tests/profiles/profiles.yml.j2 +++ b/tests/profiles/profiles.yml.j2 @@ -42,6 +42,8 @@ elementary_tests: project: {{ bigquery_project }} dataset: {{ schema_name }} keyfile_json: {{ bigquery_keyfile | toyaml }} + location: US + priority: interactive threads: 4 redshift: &redshift From 23f04abb68c84a3c5587e79b5cf20337ec7bd0b9 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Thu, 26 Feb 2026 22:16:00 +0000 Subject: [PATCH 10/17] fix: apply toyaml filter to all secret-backed credential fields Co-Authored-By: Itamar Hartstein --- tests/profiles/profiles.yml.j2 | 44 +++++++++++++++++----------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/tests/profiles/profiles.yml.j2 b/tests/profiles/profiles.yml.j2 index 752ef20b0..c1c2accf7 100644 --- a/tests/profiles/profiles.yml.j2 +++ b/tests/profiles/profiles.yml.j2 @@ -27,19 +27,19 @@ elementary_tests: snowflake: &snowflake type: snowflake - account: {{ snowflake_account }} - user: {{ snowflake_user }} - password: {{ snowflake_password }} - role: {{ snowflake_role }} - database: {{ snowflake_database }} - warehouse: {{ snowflake_warehouse }} + account: {{ snowflake_account | toyaml }} + user: {{ snowflake_user | toyaml }} + password: {{ snowflake_password | toyaml }} + role: {{ snowflake_role | toyaml }} + database: {{ snowflake_database | toyaml }} + warehouse: {{ snowflake_warehouse | toyaml }} schema: {{ schema_name }} threads: 4 bigquery: &bigquery type: bigquery method: service-account-json - project: {{ bigquery_project }} + project: {{ bigquery_project | toyaml }} dataset: {{ schema_name }} keyfile_json: {{ bigquery_keyfile | toyaml }} location: US @@ -48,34 +48,34 @@ elementary_tests: redshift: &redshift type: redshift - host: {{ redshift_host }} - user: {{ redshift_user }} - password: {{ redshift_password }} - port: {{ redshift_port }} - dbname: {{ redshift_dbname }} + host: {{ redshift_host | toyaml }} + user: {{ redshift_user | toyaml }} + password: {{ redshift_password | toyaml }} + port: {{ redshift_port | toyaml }} + dbname: {{ redshift_dbname | toyaml }} schema: {{ schema_name }} threads: 4 databricks_catalog: &databricks_catalog type: databricks - host: {{ databricks_host }} - http_path: {{ databricks_http_path }} - catalog: {{ databricks_catalog }} + host: {{ databricks_host | toyaml }} + http_path: {{ databricks_http_path | toyaml }} + catalog: {{ databricks_catalog | toyaml }} schema: {{ schema_name }} auth_type: oauth - client_id: {{ databricks_client_id }} - client_secret: {{ databricks_client_secret }} + client_id: {{ databricks_client_id | toyaml }} + client_secret: {{ databricks_client_secret | toyaml }} threads: 4 athena: &athena type: athena - s3_staging_dir: {{ athena_s3_staging_dir }} - s3_data_dir: {{ athena_s3_data_dir }} - region_name: {{ athena_region }} + s3_staging_dir: {{ athena_s3_staging_dir | toyaml }} + s3_data_dir: {{ athena_s3_data_dir | toyaml }} + region_name: {{ athena_region | toyaml }} database: awsdatacatalog schema: {{ schema_name }} - aws_access_key_id: {{ athena_aws_access_key_id }} - aws_secret_access_key: {{ athena_aws_secret_access_key }} + aws_access_key_id: {{ athena_aws_access_key_id | toyaml }} + aws_secret_access_key: {{ athena_aws_secret_access_key | toyaml }} threads: 4 # The internal CLI dbt_project uses profile "elementary", so we alias the From 91f260c8eadbc3eca82e7332891e6cebd43910b2 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Thu, 26 Feb 2026 22:18:54 +0000 Subject: [PATCH 11/17] fix: strip YAML document-end marker from toyaml filter for scalar values Co-Authored-By: Itamar Hartstein --- tests/profiles/generate_profiles.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/tests/profiles/generate_profiles.py b/tests/profiles/generate_profiles.py index 38c1aaeef..08e51a486 100644 --- a/tests/profiles/generate_profiles.py +++ b/tests/profiles/generate_profiles.py @@ -29,10 +29,21 @@ def __bool__(self) -> bool: def _yaml_inline(value: Any) -> str: - """Dump *value* as a compact inline YAML scalar / mapping.""" + """Dump *value* as a YAML-safe scalar or compact inline mapping. + + For dicts (e.g. bigquery_keyfile) this produces ``{key: val, ...}``. + For scalars it strips the trailing YAML document-end marker (``...``) + that ``yaml.dump`` appends to bare values. + """ if isinstance(value, Undefined): return "{}" - return yaml.dump(value, default_flow_style=True).strip() + dumped = yaml.dump(value, default_flow_style=True) + result = dumped.rstrip() + if result.endswith("\n..."): + result = result[: -len("\n...")] + elif result.endswith("..."): + result = result[: -len("...")] + return result.rstrip() @click.command() From 074cb295bbab1225b61789d9ba63e84ba331597e Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Thu, 26 Feb 2026 22:22:59 +0000 Subject: [PATCH 12/17] =?UTF-8?q?fix:=20make=20toyaml=20filter=20type-awar?= =?UTF-8?q?e=20=E2=80=94=20preserve=20int/bool,=20only=20quote=20ambiguous?= =?UTF-8?q?=20strings?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Itamar Hartstein --- tests/profiles/generate_profiles.py | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/tests/profiles/generate_profiles.py b/tests/profiles/generate_profiles.py index 08e51a486..6c20a2752 100644 --- a/tests/profiles/generate_profiles.py +++ b/tests/profiles/generate_profiles.py @@ -32,18 +32,27 @@ def _yaml_inline(value: Any) -> str: """Dump *value* as a YAML-safe scalar or compact inline mapping. For dicts (e.g. bigquery_keyfile) this produces ``{key: val, ...}``. - For scalars it strips the trailing YAML document-end marker (``...``) - that ``yaml.dump`` appends to bare values. + For non-string scalars (int, float, bool) the value passes through + unchanged so that YAML keeps its native type. + For strings, values that YAML would misinterpret (e.g. ``"yes"`` as + bool, ``"123"`` as int, ``"null"`` as None) are quoted. """ if isinstance(value, Undefined): return "{}" - dumped = yaml.dump(value, default_flow_style=True) - result = dumped.rstrip() - if result.endswith("\n..."): - result = result[: -len("\n...")] - elif result.endswith("..."): - result = result[: -len("...")] - return result.rstrip() + if isinstance(value, dict): + return yaml.dump(value, default_flow_style=True).strip() + if not isinstance(value, str): + # int, float, bool — pass through so YAML keeps native type + return str(value).lower() if isinstance(value, bool) else str(value) + # For strings, check if YAML would misinterpret the value + loaded = yaml.safe_load(value) + if loaded is None or not isinstance(loaded, str): + # YAML would coerce to non-string — quote it + dumped = yaml.dump(value, default_flow_style=True).rstrip() + if dumped.endswith("..."): + dumped = dumped[: -len("...")].rstrip() + return dumped + return value @click.command() From 49c61521ca4e8067287207fb460718dff0df58db Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Thu, 26 Feb 2026 22:25:33 +0000 Subject: [PATCH 13/17] =?UTF-8?q?fix:=20toyaml=20filter=20=E2=80=94=20emit?= =?UTF-8?q?=20numeric=20strings=20as=20unquoted=20ints=20for=20port=20fiel?= =?UTF-8?q?ds?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Itamar Hartstein --- tests/profiles/generate_profiles.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tests/profiles/generate_profiles.py b/tests/profiles/generate_profiles.py index 6c20a2752..a5d8d08ba 100644 --- a/tests/profiles/generate_profiles.py +++ b/tests/profiles/generate_profiles.py @@ -46,12 +46,15 @@ def _yaml_inline(value: Any) -> str: return str(value).lower() if isinstance(value, bool) else str(value) # For strings, check if YAML would misinterpret the value loaded = yaml.safe_load(value) - if loaded is None or not isinstance(loaded, str): - # YAML would coerce to non-string — quote it + if loaded is None or isinstance(loaded, bool): + # 'null', 'yes', 'no', 'true', 'false' — quote to keep as string dumped = yaml.dump(value, default_flow_style=True).rstrip() if dumped.endswith("..."): dumped = dumped[: -len("...")].rstrip() return dumped + if isinstance(loaded, (int, float)): + # '5439' → emit as 5439 so YAML sees native int/float + return str(loaded) return value From 5f4f41eb0d47a4644fb4e55503c3c2964c691a43 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Thu, 26 Feb 2026 22:28:41 +0000 Subject: [PATCH 14/17] fix: return empty string instead of empty dict for undefined values in toyaml Co-Authored-By: Itamar Hartstein --- tests/profiles/generate_profiles.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/profiles/generate_profiles.py b/tests/profiles/generate_profiles.py index a5d8d08ba..30f4b1b1c 100644 --- a/tests/profiles/generate_profiles.py +++ b/tests/profiles/generate_profiles.py @@ -38,7 +38,7 @@ def _yaml_inline(value: Any) -> str: bool, ``"123"`` as int, ``"null"`` as None) are quoted. """ if isinstance(value, Undefined): - return "{}" + return "''" if isinstance(value, dict): return yaml.dump(value, default_flow_style=True).strip() if not isinstance(value, str): From 766a2c3aea705409a42242e38c9363e11ca6cd66 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Thu, 26 Feb 2026 22:46:30 +0000 Subject: [PATCH 15/17] fix: add tests/profiles/** to workflow trigger paths Co-Authored-By: Itamar Hartstein --- .github/workflows/test-all-warehouses.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/test-all-warehouses.yml b/.github/workflows/test-all-warehouses.yml index a90ba5b72..46ad12da4 100644 --- a/.github/workflows/test-all-warehouses.yml +++ b/.github/workflows/test-all-warehouses.yml @@ -5,6 +5,7 @@ on: branches: ["master"] paths: - elementary/** + - tests/profiles/** - .github/** - pyproject.toml # For fork PRs - requires approval before running (has access to secrets) @@ -12,6 +13,7 @@ on: branches: ["master"] paths: - elementary/** + - tests/profiles/** - .github/** - pyproject.toml workflow_dispatch: From 25f34a096032d16ebee764937e49896db666e54e Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Thu, 26 Feb 2026 22:49:04 +0000 Subject: [PATCH 16/17] refactor: simplify _yaml_inline function per review feedback Co-Authored-By: Itamar Hartstein --- tests/profiles/generate_profiles.py | 24 ++++-------------------- 1 file changed, 4 insertions(+), 20 deletions(-) diff --git a/tests/profiles/generate_profiles.py b/tests/profiles/generate_profiles.py index 30f4b1b1c..c24901865 100644 --- a/tests/profiles/generate_profiles.py +++ b/tests/profiles/generate_profiles.py @@ -29,32 +29,16 @@ def __bool__(self) -> bool: def _yaml_inline(value: Any) -> str: - """Dump *value* as a YAML-safe scalar or compact inline mapping. + """Render *value* for inline YAML. - For dicts (e.g. bigquery_keyfile) this produces ``{key: val, ...}``. - For non-string scalars (int, float, bool) the value passes through - unchanged so that YAML keeps its native type. - For strings, values that YAML would misinterpret (e.g. ``"yes"`` as - bool, ``"123"`` as int, ``"null"`` as None) are quoted. + * Dicts (e.g. bigquery keyfile) → compact ``{key: val, …}`` + * Undefined (docker-only, no secrets) → empty string ``''`` + * Everything else → pass through as-is """ if isinstance(value, Undefined): return "''" if isinstance(value, dict): return yaml.dump(value, default_flow_style=True).strip() - if not isinstance(value, str): - # int, float, bool — pass through so YAML keeps native type - return str(value).lower() if isinstance(value, bool) else str(value) - # For strings, check if YAML would misinterpret the value - loaded = yaml.safe_load(value) - if loaded is None or isinstance(loaded, bool): - # 'null', 'yes', 'no', 'true', 'false' — quote to keep as string - dumped = yaml.dump(value, default_flow_style=True).rstrip() - if dumped.endswith("..."): - dumped = dumped[: -len("...")].rstrip() - return dumped - if isinstance(loaded, (int, float)): - # '5439' → emit as 5439 so YAML sees native int/float - return str(loaded) return value From dc7625db4937d53dd4fed4215b60c474851b3392 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Thu, 26 Feb 2026 22:55:42 +0000 Subject: [PATCH 17/17] fix: broaden workflow trigger to tests/** to catch all test file changes Co-Authored-By: Itamar Hartstein --- .github/workflows/test-all-warehouses.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test-all-warehouses.yml b/.github/workflows/test-all-warehouses.yml index 46ad12da4..3246c679f 100644 --- a/.github/workflows/test-all-warehouses.yml +++ b/.github/workflows/test-all-warehouses.yml @@ -5,7 +5,7 @@ on: branches: ["master"] paths: - elementary/** - - tests/profiles/** + - tests/** - .github/** - pyproject.toml # For fork PRs - requires approval before running (has access to secrets) @@ -13,7 +13,7 @@ on: branches: ["master"] paths: - elementary/** - - tests/profiles/** + - tests/** - .github/** - pyproject.toml workflow_dispatch: