diff --git a/.gitignore b/.gitignore index 66d2edb..2f655a7 100644 --- a/.gitignore +++ b/.gitignore @@ -137,3 +137,8 @@ logs/ sbom.json sbom.spdx.json trivy-results.sarif + +# ======================================== +# Personal/runtime workspace (never committed) +# ======================================== +.local/ diff --git a/Makefile b/Makefile index 7a402d7..899adba 100644 --- a/Makefile +++ b/Makefile @@ -69,15 +69,18 @@ env-check: ## Verify env wiring (1Password item or .env.local) fi # ---------------------------------------------------------------------- -# Docker compose (no secrets needed for compose itself; secrets via .env if any) +# Docker compose # ---------------------------------------------------------------------- +# `compose up` needs GARAGE_RPC_SECRET, GARAGE_ADMIN_TOKEN, POSTGRES_PASSWORD +# from the env (Garage refuses to start without a 32-byte rpc_secret). +# We wrap with $(OP_RUN) so 1Password (or .env.local) supplies them. .PHONY: compose-up -compose-up: ## Start the platform stack - cd $(REPO_ROOT)/docker && docker compose up -d +compose-up: env-check ## Start the platform stack + cd $(REPO_ROOT)/docker && $(OP_RUN) docker compose up -d .PHONY: compose-up-streaming -compose-up-streaming: ## Start the platform stack + Kafka - cd $(REPO_ROOT)/docker && docker compose -f docker-compose.yml -f docker-compose.streaming.yml up -d +compose-up-streaming: env-check ## Start the platform stack + Kafka + cd $(REPO_ROOT)/docker && $(OP_RUN) docker compose -f docker-compose.yml -f docker-compose.streaming.yml up -d .PHONY: compose-down compose-down: ## Stop the platform stack @@ -91,6 +94,13 @@ compose-ps: ## Show running containers compose-logs: ## Tail logs cd $(REPO_ROOT)/docker && docker compose logs -f +# ---------------------------------------------------------------------- +# Garage cluster init (one-time per fresh stack) +# ---------------------------------------------------------------------- +.PHONY: garage-init +garage-init: env-check ## Assign layout, create bucket, import keys, grant access + $(OP_RUN) bash $(REPO_ROOT)/scripts/garage-init.sh + # ---------------------------------------------------------------------- # Python ingestion # ---------------------------------------------------------------------- @@ -115,19 +125,30 @@ ingest-test: ## Run the ingestor unit tests (no secrets needed) cd $(REPO_ROOT)/ingestion/python && .venv/bin/pytest # ---------------------------------------------------------------------- -# dbt +# dbt — uses its own venv at transform/.venv # ---------------------------------------------------------------------- +DBT_VENV := $(REPO_ROOT)/transform/.venv +DBT := $(DBT_VENV)/bin/dbt +DBT_PROFILES_DIR := $(REPO_ROOT)/transform + +.PHONY: dbt-install +dbt-install: ## Create transform/.venv and install dbt-core + dbt-duckdb + $(PY) -m venv $(DBT_VENV) + $(DBT_VENV)/bin/pip install -r $(REPO_ROOT)/transform/requirements.txt + @test -f $(DBT_PROFILES_DIR)/profiles.yml || cp $(REPO_ROOT)/transform/profiles.yml.example $(DBT_PROFILES_DIR)/profiles.yml + @echo "dbt ready: $(DBT)" + .PHONY: dbt-deps -dbt-deps: ## Install dbt packages - cd $(REPO_ROOT)/transform && dbt deps +dbt-deps: ## Install dbt packages from packages.yml + cd $(REPO_ROOT)/transform && DBT_PROFILES_DIR=$(DBT_PROFILES_DIR) $(DBT) deps .PHONY: dbt-build dbt-build: env-check ## Run dbt build end-to-end - cd $(REPO_ROOT)/transform && $(OP_RUN) dbt build + cd $(REPO_ROOT)/transform && $(OP_RUN) bash -c 'DBT_PROFILES_DIR=$(DBT_PROFILES_DIR) $(DBT) build' .PHONY: dbt-debug dbt-debug: env-check ## Validate dbt connection - cd $(REPO_ROOT)/transform && $(OP_RUN) dbt debug + cd $(REPO_ROOT)/transform && $(OP_RUN) bash -c 'DBT_PROFILES_DIR=$(DBT_PROFILES_DIR) $(DBT) debug' # ---------------------------------------------------------------------- # Streamlit @@ -157,7 +178,7 @@ precommit: ## Run all pre-commit hooks against everything # End-to-end # ---------------------------------------------------------------------- .PHONY: phase1 -phase1: compose-up ingest-bootstrap ingest dbt-deps dbt-build dashboard ## Run the full Phase 1 pipeline +phase1: compose-up garage-init ingest-install ingest-bootstrap ingest dbt-install dbt-deps dbt-build dashboard ## Run the full Phase 1 pipeline # ---------------------------------------------------------------------- # 1Password helpers diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index 294de61..24e029b 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -44,6 +44,13 @@ services: ports: - "3900:3900" # S3 API - "3903:3903" # admin API + environment: + # Secrets injected from 1Password via `op run --env-file=.env`. + # Garage reads any config field from env when prefixed with GARAGE_. + # See: https://garagehq.deuxfleurs.fr/documentation/reference-manual/configuration/ + GARAGE_RPC_SECRET: "${GARAGE_RPC_SECRET:?GARAGE_RPC_SECRET must be set (use op run --env-file=.env -- ...)}" + GARAGE_ADMIN_TOKEN: "${GARAGE_ADMIN_TOKEN:?GARAGE_ADMIN_TOKEN must be set}" + GARAGE_METRICS_TOKEN: "${GARAGE_ADMIN_TOKEN:?reuse admin token for metrics}" volumes: - garage-meta:/var/lib/garage/meta - garage-data:/var/lib/garage/data @@ -59,7 +66,7 @@ services: # Lakekeeper — Iceberg REST catalog (Apache 2.0, Rust) # ---------------------------------------------------------------- lakekeeper-db: - image: postgres:18-alpine@sha256:54451ecb8ab38c24c3ec123f2fd501303a3a1856a5c66e98cecf2460d5e1e9d7 + image: postgres:17-alpine@sha256:c7526c0f6c3f30260a563d7bcf8ad778effac59a44f8ffa86678c35418338609 environment: POSTGRES_USER: lakekeeper POSTGRES_PASSWORD: lakekeeper @@ -97,7 +104,7 @@ services: # Postgres — operational mart (reverse-ETL target, app DB) # ---------------------------------------------------------------- postgres: - image: postgres:18-alpine@sha256:54451ecb8ab38c24c3ec123f2fd501303a3a1856a5c66e98cecf2460d5e1e9d7 + image: postgres:17-alpine@sha256:c7526c0f6c3f30260a563d7bcf8ad778effac59a44f8ffa86678c35418338609 ports: - "5432:5432" environment: @@ -117,7 +124,7 @@ services: # Temporal — orchestration # ---------------------------------------------------------------- temporal-db: - image: postgres:18-alpine@sha256:54451ecb8ab38c24c3ec123f2fd501303a3a1856a5c66e98cecf2460d5e1e9d7 + image: postgres:17-alpine@sha256:c7526c0f6c3f30260a563d7bcf8ad778effac59a44f8ffa86678c35418338609 environment: POSTGRES_USER: temporal POSTGRES_PASSWORD: temporal diff --git a/docker/garage/garage.toml b/docker/garage/garage.toml index f9e8013..c88b4fb 100644 --- a/docker/garage/garage.toml +++ b/docker/garage/garage.toml @@ -6,11 +6,12 @@ db_engine = "lmdb" replication_factor = 1 consistency_mode = "consistent" -# RPC secret — overridden via env at first boot. -# Generate with: openssl rand -hex 32 +# Secrets (rpc_secret, admin_token, metrics_token) are injected via +# environment variables — see docker-compose.yml `garage.environment` +# block. The values come from 1Password (`op run --env-file=.env`). +# Reference: https://garagehq.deuxfleurs.fr/documentation/reference-manual/configuration/ rpc_bind_addr = "[::]:3901" rpc_public_addr = "127.0.0.1:3901" -rpc_secret = "REPLACE_ME_AT_BOOTSTRAP_via_env" [s3_api] s3_region = "garage" @@ -24,5 +25,3 @@ index = "index.html" [admin] api_bind_addr = "[::]:3903" -admin_token = "REPLACE_ME_AT_BOOTSTRAP_via_env" -metrics_token = "REPLACE_ME_AT_BOOTSTRAP_via_env" diff --git a/scripts/garage-init.sh b/scripts/garage-init.sh new file mode 100755 index 0000000..f2de2f3 --- /dev/null +++ b/scripts/garage-init.sh @@ -0,0 +1,109 @@ +#!/usr/bin/env bash +# Initialize the single-node Garage cluster: +# 1. Assign role to the local node (zone, capacity) +# 2. Apply staged layout +# 3. Create the `bronze` bucket +# 4. Import the access keys from env (resolved by `op run`) +# 5. Grant the imported key read+write on `bronze` +# +# Idempotent — safe to re-run after a stack restart. Each step short-circuits +# if the desired state already exists. +# +# Required env (resolved via `op run --env-file=.env`): +# GARAGE_S3_ACCESS_KEY +# GARAGE_S3_SECRET_KEY +# +# Usage: +# make garage-init # via Makefile (wraps with op run automatically) +# bash scripts/garage-init.sh # if env vars are already exported + +set -euo pipefail + +CONTAINER="${GARAGE_CONTAINER:-de-lab-garage-1}" +ZONE="${GARAGE_ZONE:-dc1}" +CAPACITY="${GARAGE_CAPACITY:-10G}" +TAG="${GARAGE_TAG:-local}" +BUCKET="${GARAGE_BUCKET:-bronze}" +KEY_NAME="${GARAGE_KEY_NAME:-de-lab}" + +color() { printf "\033[%sm%s\033[0m" "$1" "$2"; } +info() { echo "$(color 36 "[garage-init]") $*"; } +warn() { echo "$(color 33 "[garage-init]") $*" >&2; } +err() { echo "$(color 31 "[garage-init]") $*" >&2; } + +g() { docker exec -i "$CONTAINER" /garage "$@"; } + +# ---------------------------------------------------------------------- +# 0. Pre-flight +# ---------------------------------------------------------------------- +docker inspect "$CONTAINER" > /dev/null 2>&1 || { + err "Container '$CONTAINER' is not running. Run 'make compose-up' first." + exit 1 +} + +[ -n "${GARAGE_S3_ACCESS_KEY:-}" ] || { err "GARAGE_S3_ACCESS_KEY missing — run via 'make garage-init' (op run)"; exit 1; } +[ -n "${GARAGE_S3_SECRET_KEY:-}" ] || { err "GARAGE_S3_SECRET_KEY missing — run via 'make garage-init' (op run)"; exit 1; } + +# Wait for daemon to accept commands +info "Waiting for Garage daemon..." +for _ in $(seq 1 30); do + if g status > /dev/null 2>&1; then + break + fi + sleep 1 +done +g status > /dev/null 2>&1 || { err "Garage daemon did not respond within 30s"; exit 1; } + +# ---------------------------------------------------------------------- +# 1. Layout: assign role + apply (idempotent) +# ---------------------------------------------------------------------- +status_out=$(g status 2>&1) +node_id=$(echo "$status_out" | awk '/HEALTHY NODES/{flag=1; next} flag && NF && $1 != "ID" {print $1; exit}') +[ -n "$node_id" ] || { err "Could not parse node ID from 'garage status'"; echo "$status_out" >&2; exit 1; } + +if echo "$status_out" | grep -q "NO ROLE ASSIGNED"; then + info "Assigning role to node $node_id (zone=$ZONE, cap=$CAPACITY)..." + g layout assign "$node_id" -z "$ZONE" -c "$CAPACITY" -t "$TAG" + info "Applying staged layout (version 1)..." + g layout apply --version 1 +else + info "Node $node_id already has role assigned — skipping layout." +fi + +# ---------------------------------------------------------------------- +# 2. Bucket (idempotent) +# ---------------------------------------------------------------------- +if g bucket list 2>/dev/null | awk 'NR>2 {print $1}' | grep -qx "$BUCKET"; then + info "Bucket '$BUCKET' already exists — skipping." +else + info "Creating bucket '$BUCKET'..." + g bucket create "$BUCKET" +fi + +# ---------------------------------------------------------------------- +# 3. Key import (idempotent) +# ---------------------------------------------------------------------- +if g key list 2>/dev/null | awk 'NR>2 {print $2}' | grep -qx "$KEY_NAME"; then + info "Key '$KEY_NAME' already exists — skipping import." +else + info "Importing access keys (id from env, name=$KEY_NAME)..." + g key import --yes -n "$KEY_NAME" "$GARAGE_S3_ACCESS_KEY" "$GARAGE_S3_SECRET_KEY" +fi + +# ---------------------------------------------------------------------- +# 4. Grant the key access to the bucket (idempotent: garage allow is OK to re-run) +# ---------------------------------------------------------------------- +info "Granting read+write on '$BUCKET' to key '$KEY_NAME'..." +g bucket allow --read --write --owner "$BUCKET" --key "$KEY_NAME" + +# ---------------------------------------------------------------------- +# 5. Verify +# ---------------------------------------------------------------------- +info "Verification:" +g bucket info "$BUCKET" 2>&1 | sed 's/^/ /' + +echo "" +echo "$(color 32 "[garage-init] Done.")" +echo " Bucket : $BUCKET" +echo " Key name: $KEY_NAME" +echo " S3 endpoint (host): http://localhost:3900" diff --git a/transform/dbt_project.yml b/transform/dbt_project.yml index f61e0be..de674da 100644 --- a/transform/dbt_project.yml +++ b/transform/dbt_project.yml @@ -18,7 +18,11 @@ clean-targets: models: de_lab: bronze: - +materialized: view + # Materialize bronze as table (not view) to dodge DuckDB v1.5 binder + # bug: view → silver `qualify` with multiple TIMESTAMP columns triggers + # "INTERNAL Error: TIMESTAMP != VARCHAR". Persisted bronze has stable + # column types and avoids the inline-view binder path. + +materialized: table +schema: bronze silver: +materialized: table diff --git a/transform/macros/generate_schema_name.sql b/transform/macros/generate_schema_name.sql new file mode 100644 index 0000000..2dd6fa5 --- /dev/null +++ b/transform/macros/generate_schema_name.sql @@ -0,0 +1,19 @@ +{# + Override dbt's default schema-naming behaviour. + + Default: when a model has `+schema: bronze`, dbt creates the table at + _ → main_bronze + + Override: use the custom schema name verbatim, so models declared as + `+schema: bronze` land in `bronze.*`. This matches what Streamlit and + case-study SQL examples reference. + + See https://docs.getdbt.com/docs/build/custom-schemas +#} +{% macro generate_schema_name(custom_schema_name, node) -%} + {%- if custom_schema_name is none -%} + {{ target.schema }} + {%- else -%} + {{ custom_schema_name | trim }} + {%- endif -%} +{%- endmacro %} diff --git a/transform/models/bronze/stg_commit.sql b/transform/models/bronze/stg_commit.sql index 288684f..b028bf9 100644 --- a/transform/models/bronze/stg_commit.sql +++ b/transform/models/bronze/stg_commit.sql @@ -1,4 +1,4 @@ -{{ config(materialized='view') }} +{{ config(materialized='table') }} select repo_full_name, @@ -13,4 +13,8 @@ select parents, cast(fetched_at as timestamp) as fetched_at, raw_payload -from {{ source('bronze', 'commit') }} +from read_parquet( + 's3://bronze/commit/**/*.parquet', + hive_partitioning = true, + union_by_name = true +) diff --git a/transform/models/bronze/stg_issue_or_pr.sql b/transform/models/bronze/stg_issue_or_pr.sql index 01fb50a..ed8e34a 100644 --- a/transform/models/bronze/stg_issue_or_pr.sql +++ b/transform/models/bronze/stg_issue_or_pr.sql @@ -1,4 +1,4 @@ -{{ config(materialized='view') }} +{{ config(materialized='table') }} -- Bronze passthrough for issues + PRs (GitHub treats them via the same endpoint). -- Splitting happens in silver. @@ -18,4 +18,8 @@ select cast(closed_at as timestamp) as closed_at, cast(fetched_at as timestamp) as fetched_at, raw_payload -from {{ source('bronze', 'issue_or_pr') }} +from read_parquet( + 's3://bronze/issue_or_pr/**/*.parquet', + hive_partitioning = true, + union_by_name = true +) diff --git a/transform/models/bronze/stg_repo_metadata.sql b/transform/models/bronze/stg_repo_metadata.sql index 10120c6..65fa817 100644 --- a/transform/models/bronze/stg_repo_metadata.sql +++ b/transform/models/bronze/stg_repo_metadata.sql @@ -1,7 +1,9 @@ -{{ config(materialized='view') }} +{{ config(materialized='table') }} -- Bronze passthrough for repository headline numbers. --- Just type cast and rename for downstream consumption. +-- Read directly from Garage S3 via DuckDB httpfs + parquet. +-- (sources.yml documents the contract; dbt-duckdb's source resolution +-- doesn't auto-handle Hive-partitioned globs, so we read explicitly.) select full_name as repo_full_name, name as repo_name, @@ -15,7 +17,7 @@ select network_count, default_branch, language, - license_spdx, + try_cast(license_spdx as varchar) as license_spdx, archived, disabled, fork, @@ -24,4 +26,8 @@ select cast(pushed_at as timestamp) as pushed_at, cast(fetched_at as timestamp) as fetched_at, raw_payload -from {{ source('bronze', 'repo_metadata') }} +from read_parquet( + 's3://bronze/repo_metadata/**/*.parquet', + hive_partitioning = true, + union_by_name = true +) diff --git a/transform/models/silver/dim_repos.sql b/transform/models/silver/dim_repos.sql index b0f468a..09c3e29 100644 --- a/transform/models/silver/dim_repos.sql +++ b/transform/models/silver/dim_repos.sql @@ -2,15 +2,10 @@ -- Latest snapshot per repo. Bronze can have multiple snapshots over time; -- silver collapses to "current truth" by fetched_at. -with ranked as ( - select - *, - row_number() over ( - partition by repo_full_name - order by fetched_at desc - ) as rn - from {{ ref('stg_repo_metadata') }} -) +-- +-- We use `qualify` rather than `with ranked as (select *, row_number() ...)` +-- because `select *` against an external-parquet view confuses DuckDB's +-- window-function binder (INTERNAL Error: TIMESTAMP != VARCHAR). select repo_full_name, repo_name, @@ -30,5 +25,5 @@ select updated_at as repo_updated_at, pushed_at as last_pushed_at, fetched_at as snapshot_at -from ranked -where rn = 1 +from {{ ref('stg_repo_metadata') }} +qualify row_number() over (partition by repo_full_name order by fetched_at desc) = 1 diff --git a/transform/models/silver/fct_commits.sql b/transform/models/silver/fct_commits.sql index fae4a78..15e7735 100644 --- a/transform/models/silver/fct_commits.sql +++ b/transform/models/silver/fct_commits.sql @@ -1,14 +1,5 @@ {{ config(materialized='table') }} -with deduped as ( - select - *, - row_number() over ( - partition by repo_full_name, sha - order by fetched_at desc - ) as rn - from {{ ref('stg_commit') }} -) select repo_full_name, sha, @@ -19,7 +10,8 @@ select committer_login, committer_email, committed_date, - cardinality(parents) as parent_count, + -- DuckDB v1.5: cardinality() is for MAPs; use len() for LISTs + len(parents) as parent_count, fetched_at -from deduped -where rn = 1 +from {{ ref('stg_commit') }} +qualify row_number() over (partition by repo_full_name, sha order by fetched_at desc) = 1 diff --git a/transform/models/silver/fct_issues.sql b/transform/models/silver/fct_issues.sql index 7a28ebe..4b51884 100644 --- a/transform/models/silver/fct_issues.sql +++ b/transform/models/silver/fct_issues.sql @@ -1,16 +1,7 @@ {{ config(materialized='table') }} -- Issues only. PRs are split off into fct_pull_requests. -with deduped as ( - select - *, - row_number() over ( - partition by repo_full_name, number - order by fetched_at desc - ) as rn - from {{ ref('stg_issue_or_pr') }} - where is_pull_request = false -) +-- Use `qualify` to avoid DuckDB's `select * + row_number()` binder bug. select repo_full_name, number as issue_number, @@ -25,5 +16,6 @@ select updated_at, closed_at, fetched_at -from deduped -where rn = 1 +from {{ ref('stg_issue_or_pr') }} +where is_pull_request = false +qualify row_number() over (partition by repo_full_name, number order by fetched_at desc) = 1 diff --git a/transform/models/silver/fct_pull_requests.sql b/transform/models/silver/fct_pull_requests.sql index 390f02f..42ce0dc 100644 --- a/transform/models/silver/fct_pull_requests.sql +++ b/transform/models/silver/fct_pull_requests.sql @@ -1,15 +1,5 @@ {{ config(materialized='table') }} -with deduped as ( - select - *, - row_number() over ( - partition by repo_full_name, number - order by fetched_at desc - ) as rn - from {{ ref('stg_issue_or_pr') }} - where is_pull_request = true -) select repo_full_name, number as pr_number, @@ -23,5 +13,6 @@ select updated_at, closed_at, fetched_at -from deduped -where rn = 1 +from {{ ref('stg_issue_or_pr') }} +where is_pull_request = true +qualify row_number() over (partition by repo_full_name, number order by fetched_at desc) = 1 diff --git a/transform/models/sources.yml b/transform/models/sources.yml index f51bdfe..e8e1ac7 100644 --- a/transform/models/sources.yml +++ b/transform/models/sources.yml @@ -1,9 +1,13 @@ version: 2 -# External Bronze sources written by ingestion/python/. -# These are materialized as DuckDB views over Garage S3 parquet partitions. +# Documentation-only: these sources describe the contract of what the +# Python ingestor writes to Garage S3. The bronze stg_* models read +# directly via DuckDB's `read_parquet(...)` since dbt-duckdb's source +# resolution doesn't auto-handle Hive-partitioned globs. # -# Phase 2 will replace these with Iceberg tables managed by Lakekeeper. +# Phase 2 will replace these external parquet sources with Iceberg tables +# managed by Lakekeeper, at which point real `source()` references become +# usable end-to-end. sources: - name: bronze @@ -11,45 +15,11 @@ sources: meta: origin: ingestion/python/ partitioned_by: [event_type, owner, repo, year, month, day] + external_location_template: "s3://bronze/{event_type}/{owner}/{repo}/year={Y}/month={M}/day={D}/*.parquet" tables: - name: repo_metadata description: "Snapshot of repository headline numbers." - external: - # Replaced at compile-time by env_var or vars. - # See profiles.yml.example: extensions=[httpfs, parquet] settings=[s3_*] - location: "s3://bronze/repo_metadata/**/*.parquet" - columns: - - name: full_name - description: "owner/repo" - tests: [not_null] - - name: stargazers_count - tests: [not_null] - - name: fetched_at - tests: [not_null] - - name: issue_or_pr description: "Issues and pull requests (PR is a kind of issue on GitHub)." - external: - location: "s3://bronze/issue_or_pr/**/*.parquet" - columns: - - name: repo_full_name - tests: [not_null] - - name: number - tests: [not_null] - - name: state - tests: - - not_null - - accepted_values: - values: [open, closed] - - name: is_pull_request - tests: [not_null] - - name: commit description: "Commits on the default branch." - external: - location: "s3://bronze/commit/**/*.parquet" - columns: - - name: sha - tests: [not_null] - - name: repo_full_name - tests: [not_null] diff --git a/transform/package-lock.yml b/transform/package-lock.yml new file mode 100644 index 0000000..2299784 --- /dev/null +++ b/transform/package-lock.yml @@ -0,0 +1,11 @@ +packages: + - name: dbt_utils + package: dbt-labs/dbt_utils + version: 1.3.3 + - name: dbt_expectations + package: calogica/dbt_expectations + version: 0.10.4 + - name: dbt_date + package: calogica/dbt_date + version: 0.10.1 +sha1_hash: 7e6826471cf21d684924eabf28ee66f0d233f735 diff --git a/transform/profiles.yml.example b/transform/profiles.yml.example index 6e8a893..2292630 100644 --- a/transform/profiles.yml.example +++ b/transform/profiles.yml.example @@ -10,6 +10,7 @@ de_lab: - iceberg settings: s3_endpoint: localhost:3900 + s3_region: "{{ env_var('GARAGE_S3_REGION', 'garage') }}" s3_access_key_id: "{{ env_var('GARAGE_S3_ACCESS_KEY') }}" s3_secret_access_key: "{{ env_var('GARAGE_S3_SECRET_KEY') }}" s3_use_ssl: false diff --git a/transform/requirements.txt b/transform/requirements.txt new file mode 100644 index 0000000..23499b4 --- /dev/null +++ b/transform/requirements.txt @@ -0,0 +1,9 @@ +# dbt runtime for the transform/ project. +# Pinned to dbt-core 1.10 (Phase 4 will switch to dbt Fusion). +# See ADR-0005 for the migration strategy. + +dbt-core>=1.10,<1.11 +dbt-duckdb>=1.10,<1.11 + +# Data quality (replaces Great Expectations — see ADR / SECURITY.md) +# soda-core-duckdb>=3.5 # Phase 4 — keep commented for now to keep deps lean