diff --git a/.github/workflows/test-warehouse.yml b/.github/workflows/test-warehouse.yml index 0a1f7fd71..fe52d9c7f 100644 --- a/.github/workflows/test-warehouse.yml +++ b/.github/workflows/test-warehouse.yml @@ -114,12 +114,15 @@ jobs: - name: Write dbt profiles env: - PROFILES_YML: ${{ secrets.CI_PROFILES_YML }} + CI_WAREHOUSE_SECRETS: ${{ secrets.CI_WAREHOUSE_SECRETS || '' }} run: | - mkdir -p ~/.dbt DBT_VERSION=$(pip show dbt-core | grep -i version | awk '{print $2}' | sed 's/\.//g') UNDERSCORED_REF_NAME=$(echo "${{ inputs.warehouse-type }}_dbt_${DBT_VERSION}_${BRANCH_NAME}" | awk '{print tolower($0)}' | head -c 40 | sed "s/[-\/]/_/g") - echo "$PROFILES_YML" | base64 -d | sed "s//py_$UNDERSCORED_REF_NAME/g" > ~/.dbt/profiles.yml + + python "${{ github.workspace }}/elementary/tests/profiles/generate_profiles.py" \ + --template "${{ github.workspace }}/elementary/tests/profiles/profiles.yml.j2" \ + --output ~/.dbt/profiles.yml \ + --schema-name "py_$UNDERSCORED_REF_NAME" - name: Run Python package unit tests run: pytest -vv tests/unit --warehouse-type ${{ inputs.warehouse-type }} diff --git a/tests/profiles/generate_profiles.py b/tests/profiles/generate_profiles.py new file mode 100644 index 000000000..38c1aaeef --- /dev/null +++ b/tests/profiles/generate_profiles.py @@ -0,0 +1,126 @@ +#!/usr/bin/env python3 +"""Generate ~/.dbt/profiles.yml from a Jinja2 template and an optional secrets JSON.""" + +from __future__ import annotations + +import base64 +import binascii +import json +import os +from pathlib import Path +from typing import Any + +import click +import yaml +from jinja2 import BaseLoader, Environment, StrictUndefined, Undefined + + +class _NullUndefined(Undefined): + """Render missing variables as empty strings so docker-only runs don't crash.""" + + def __str__(self) -> str: + return "" + + def __iter__(self): + return iter([]) + + def __bool__(self) -> bool: + return False + + +def _yaml_inline(value: Any) -> str: + """Dump *value* as a compact inline YAML scalar / mapping.""" + if isinstance(value, Undefined): + return "{}" + return yaml.dump(value, default_flow_style=True).strip() + + +@click.command() +@click.option( + "--template", + required=True, + type=click.Path(exists=True, dir_okay=False, path_type=Path), + help="Path to the Jinja2 profiles template (e.g. profiles.yml.j2).", +) +@click.option( + "--output", + required=True, + type=click.Path(dir_okay=False, path_type=Path), + help="Destination path for the rendered profiles.yml.", +) +@click.option( + "--schema-name", + required=True, + help="Base schema name (e.g. dbt_pkg_ or py_).", +) +@click.option( + "--secrets-json-env", + default="CI_WAREHOUSE_SECRETS", + show_default=True, + help="Name of the env-var holding the base64-encoded JSON secrets blob.", +) +def main( + template: Path, + output: Path, + schema_name: str, + secrets_json_env: str, +) -> None: + """Render a Jinja2 profiles template into a dbt profiles.yml file. + + Resolution order: + 1. If the env-var named by ``--secrets-json-env`` is set, decode it and + use its key/value pairs (plus *schema_name*) as template variables. + 2. Otherwise render the template with only *schema_name* populated (all + other variables resolve to empty strings — suitable for docker-only + targets on fork PRs). + """ + output.parent.mkdir(parents=True, exist_ok=True) + + secrets_b64 = os.environ.get(secrets_json_env, "").strip() + + # ── Build template context ────────────────────────────────────────── + context: dict[str, object] = {"schema_name": schema_name} + + if secrets_b64: + try: + decoded: dict = json.loads(base64.b64decode(secrets_b64)) + except (binascii.Error, json.JSONDecodeError) as e: + raise click.ClickException( + f"Failed to decode ${secrets_json_env}: {e}" + ) from e + if not isinstance(decoded, dict): + raise click.ClickException( + f"Expected JSON object for ${secrets_json_env}, " + f"got {type(decoded).__name__}" + ) + for key, value in decoded.items(): + context[key.lower()] = value + click.echo( + f"Loaded {len(decoded)} secret(s) from ${secrets_json_env}.", + err=True, + ) + else: + click.echo( + "No secrets found — rendering template for docker-only targets.", + err=True, + ) + + # ── Render ────────────────────────────────────────────────────────── + # When secrets are loaded, use StrictUndefined so typos in secret keys + # fail fast. For docker-only runs (no secrets) use _NullUndefined so + # cloud placeholders silently resolve to empty strings. + undefined_cls = StrictUndefined if secrets_b64 else _NullUndefined + env = Environment( + loader=BaseLoader(), + undefined=undefined_cls, + keep_trailing_newline=True, + ) + env.filters["toyaml"] = _yaml_inline + tmpl = env.from_string(template.read_text()) + rendered = tmpl.render(**context) + output.write_text(rendered) + click.echo(f"Wrote {output}", err=True) + + +if __name__ == "__main__": + main() diff --git a/tests/profiles/profiles.yml.j2 b/tests/profiles/profiles.yml.j2 new file mode 100644 index 000000000..752ef20b0 --- /dev/null +++ b/tests/profiles/profiles.yml.j2 @@ -0,0 +1,91 @@ +elementary_tests: + target: postgres + outputs: + + # ── Docker targets (plaintext, no secrets needed) ────────────────── + + postgres: &postgres + type: postgres + host: 127.0.0.1 + port: 5432 + user: admin + password: admin + dbname: postgres + schema: {{ schema_name }} + threads: 32 + + clickhouse: &clickhouse + type: clickhouse + host: localhost + port: 8123 + user: default + password: default + schema: {{ schema_name }} + threads: 4 + + # ── Cloud targets (secrets substituted at CI time) ───────────────── + + snowflake: &snowflake + type: snowflake + account: {{ snowflake_account }} + user: {{ snowflake_user }} + password: {{ snowflake_password }} + role: {{ snowflake_role }} + database: {{ snowflake_database }} + warehouse: {{ snowflake_warehouse }} + schema: {{ schema_name }} + threads: 4 + + bigquery: &bigquery + type: bigquery + method: service-account-json + project: {{ bigquery_project }} + dataset: {{ schema_name }} + keyfile_json: {{ bigquery_keyfile | toyaml }} + location: US + priority: interactive + threads: 4 + + redshift: &redshift + type: redshift + host: {{ redshift_host }} + user: {{ redshift_user }} + password: {{ redshift_password }} + port: {{ redshift_port }} + dbname: {{ redshift_dbname }} + schema: {{ schema_name }} + threads: 4 + + databricks_catalog: &databricks_catalog + type: databricks + host: {{ databricks_host }} + http_path: {{ databricks_http_path }} + catalog: {{ databricks_catalog }} + schema: {{ schema_name }} + auth_type: oauth + client_id: {{ databricks_client_id }} + client_secret: {{ databricks_client_secret }} + threads: 4 + + athena: &athena + type: athena + s3_staging_dir: {{ athena_s3_staging_dir }} + s3_data_dir: {{ athena_s3_data_dir }} + region_name: {{ athena_region }} + database: awsdatacatalog + schema: {{ schema_name }} + aws_access_key_id: {{ athena_aws_access_key_id }} + aws_secret_access_key: {{ athena_aws_secret_access_key }} + threads: 4 + +# The internal CLI dbt_project uses profile "elementary", so we alias the +# same targets but override the schema to _elementary. +elementary: + target: postgres + outputs: +{%- set targets = ['postgres', 'clickhouse', 'snowflake', 'bigquery', 'redshift', 'databricks_catalog', 'athena'] %} +{%- for t in targets %} + {{ t }}: + <<: *{{ t }} + {{ 'dataset' if t == 'bigquery' else 'schema' }}: {{ schema_name }}_elementary +{%- endfor %}