Skip to content

Latest commit

 

History

History
522 lines (371 loc) · 10.2 KB

File metadata and controls

522 lines (371 loc) · 10.2 KB

DBT Quick Reference - K2 Reference Data Platform

Quick commands for daily DBT tasks. For detailed explanations, see DBT-GUIDE.md.


Setup

# First-time setup
cd dbt/
dbt deps                    # Install dependencies (if using packages)
dbt debug                   # Test connection

# Check configuration
cat dbt_project.yml         # Project config
cat profiles.yml            # Connection profiles

Running Models

Basic Commands

# Run everything
dbt run

# Run specific model
dbt run --select silver_instruments

# Run model and everything downstream
dbt run --select silver_instruments+

# Run model and everything upstream
dbt run --select +silver_instruments

# Run all Silver models
dbt run --select silver.*

# Run all Gold models
dbt run --select gold.*

Advanced Selection

# Run modified models only (Git diff)
dbt run --select state:modified

# Run specific tag
dbt run --select tag:daily

# Exclude models
dbt run --exclude gold.*

# Full refresh (ignore incremental)
dbt run --full-refresh

# Run with multiple threads
dbt run --threads 8

Testing

Basic Commands

# Run all tests
dbt test

# Test specific model
dbt test --select silver_instruments

# Test data sources
dbt test --select source:bronze

# Run only schema tests (unique, not_null, etc.)
dbt test --select test_type:schema

# Run only data tests (custom SQL tests)
dbt test --select test_type:data

Common Test Scenarios

# Quick smoke test (sources + critical models)
dbt test --select source:bronze silver_instruments

# Full test suite before deployment
dbt test

# Test after incremental run
dbt run --select silver_instruments
dbt test --select silver_instruments

Debugging

View Compiled SQL

# Compile without running
dbt compile --select silver_instruments

# View what DBT generated
cat target/compiled/k2_refdata/models/silver/silver_instruments.sql

# Run compiled SQL directly
duckdb refdata.duckdb < target/compiled/k2_refdata/models/silver/silver_instruments.sql

Debug Mode

# Verbose logging
dbt run --select silver_instruments --debug

# Show SQL before running
dbt run --select silver_instruments --print

Check Logs

# View latest log
tail -f logs/dbt.log

# View run results
cat target/run_results.json | jq '.results[] | {name: .unique_id, status: .status, execution_time: .execution_time}'

# View manifest (full DAG)
cat target/manifest.json | jq '.nodes | keys'

Documentation

# Generate documentation
dbt docs generate

# Serve documentation (opens browser)
dbt docs serve

# Serve on custom port
dbt docs serve --port 8080

# Generate and open in one command
dbt docs generate && dbt docs serve

Working with Incremental Models

Standard Incremental Run

# First run: Full load
dbt run --select silver_instruments

# Subsequent runs: Incremental
dbt run --select silver_instruments

Force Full Refresh

# Rebuild from scratch
dbt run --select silver_instruments --full-refresh

# Or drop and rebuild
dbt run-operation drop_relation --args "{relation: ref('silver_instruments')}"
dbt run --select silver_instruments

Debug Incremental Logic

# Check what's incremental
dbt list --select config.materialized:incremental

# View incremental logic in compiled SQL
dbt compile --select silver_instruments
grep -A 10 "is_incremental()" target/compiled/k2_refdata/models/silver/silver_instruments.sql

Working with Sources

# Test all sources
dbt test --select source:*

# Test specific source
dbt test --select source:bronze.bronze_instruments_binance

# Generate source freshness report
dbt source freshness

# Snapshot source freshness (with alerts)
dbt source snapshot-freshness

Common Workflows

Daily Ingestion → Transformation

# 1. Run ingestion (Python)
make ingest-now

# 2. Transform with DBT
cd dbt/
dbt run --select silver.* gold.*

# 3. Test data quality
dbt test

Adding New Exchange

# 1. Create Bronze source definition
vim models/bronze/sources.yml
# Add: bronze_instruments_bybit

# 2. Update Silver model
vim models/silver/silver_instruments.sql
# Add: bronze_bybit CTE

# 3. Test
dbt run --select silver_instruments
dbt test --select silver_instruments

# 4. Update Gold model
vim models/gold/gold_symbology_master.sql
# Add: bybit_symbol column
dbt run --select gold_symbology_master

Schema Change Handling

# New field in Bronze (e.g., marginTradingAllowed)

# 1. Update Silver model to extract new field
vim models/silver/silver_instruments.sql
# Add: JSON_EXTRACT_SCALAR(..., '$.marginTradingAllowed')

# 2. Run with schema change handling (automatic)
dbt run --select silver_instruments
# Config: on_schema_change='append_new_columns'

# 3. Add tests for new field
vim models/silver/silver_instruments.yml
# Add test: not_null or accepted_values

# 4. Run tests
dbt test --select silver_instruments

Refactoring a Model

# 1. Create new model
cp models/silver/silver_instruments.sql models/silver/silver_instruments_v2.sql

# 2. Make changes to v2
vim models/silver/silver_instruments_v2.sql

# 3. Run both and compare
dbt run --select silver_instruments silver_instruments_v2

# 4. Validate results match
duckdb -c "
SELECT COUNT(*) FROM refdata.silver_instruments;
SELECT COUNT(*) FROM refdata.silver_instruments_v2;
"

# 5. Once validated, switch downstream refs
# Update models/gold/... to use ref('silver_instruments_v2')

# 6. Remove old model
rm models/silver/silver_instruments.sql
mv models/silver/silver_instruments_v2.sql models/silver/silver_instruments.sql

Environment Management

Development (Local)

# Use dev profile (default)
dbt run --target dev

# Check which target
dbt debug | grep "target:"

Production

# Use prod profile
dbt run --target prod

# Common prod workflow
dbt run --target prod --threads 8
dbt test --target prod
dbt docs generate --target prod

Custom Profiles Directory

# Use different profiles.yml
dbt run --profiles-dir ./config/dbt/

# Or set environment variable
export DBT_PROFILES_DIR=./config/dbt/
dbt run

Makefile Shortcuts

# From project root (not dbt/ directory)

make dbt-run              # Run all models
make dbt-test             # Run all tests
make dbt-docs             # Generate and serve docs
make dbt-clean            # Clean artifacts

Troubleshooting

Connection Issues

# Check connection
dbt debug

# Test S3/MinIO
aws --endpoint-url $DBT_S3_ENDPOINT s3 ls

# Check Iceberg catalog
curl http://localhost:8181/v1/namespaces

Model Errors

# View error in logs
tail -n 50 logs/dbt.log

# Run single model with debug
dbt run --select silver_instruments --debug

# Check compiled SQL
cat target/compiled/k2_refdata/models/silver/silver_instruments.sql

Test Failures

# View which tests failed
dbt test --store-failures

# Query failed test results
duckdb -c "SELECT * FROM dbt_test_failures.unique_silver_instruments_instrument_sk"

# Re-run only failed tests
dbt test --select result:fail

Performance Issues

# Profile query execution
dbt run --select silver_instruments --debug | grep "Execution time"

# Check row counts
dbt run --select silver_instruments --print

# Use EXPLAIN
duckdb -c "EXPLAIN $(cat target/compiled/.../silver_instruments.sql)"

Useful Queries

Check Model Status

# List all models
dbt list

# List models with config
dbt list --select config.materialized:incremental

# List models by tag
dbt list --select tag:daily

# Show model dependencies
dbt list --select +silver_instruments+

Inspect Data

# Count rows in Silver
duckdb -c "SELECT COUNT(*) FROM refdata.silver_instruments"

# Recent records
duckdb -c "
SELECT exchange, symbol, valid_from, record_created_at
FROM refdata.silver_instruments
ORDER BY record_created_at DESC
LIMIT 10
"

# Check for duplicates
duckdb -c "
SELECT instrument_sk, COUNT(*)
FROM refdata.silver_instruments
WHERE valid_to IS NULL
GROUP BY instrument_sk
HAVING COUNT(*) > 1
"

Freshness Check

# How old is latest data?
duckdb -c "
SELECT
    exchange,
    MAX(record_created_at) AS latest_record,
    NOW() - MAX(record_created_at) AS age
FROM refdata.silver_instruments
GROUP BY exchange
"

Cheat Sheet: Selection Syntax

Syntax Meaning Example
model_name Specific model silver_instruments
model_name+ Model + downstream silver_instruments+
+model_name Model + upstream +silver_instruments
+model_name+ Model + up + down +silver_instruments+
dir.* All in directory silver.*
tag:tagname Models with tag tag:daily
source:* All sources source:bronze
config.materialized:value Config filter config.materialized:incremental
result:fail Failed tests result:fail
state:modified Modified models state:modified (requires --state)

Quick Reference: dbt Commands

Command Purpose
dbt run Execute models
dbt test Run tests
dbt build Run + test in DAG order
dbt compile Compile without running
dbt docs generate Generate documentation
dbt docs serve Serve documentation
dbt source freshness Check source freshness
dbt debug Test connection
dbt list List models/tests
dbt clean Delete target/ and dbt_packages/
dbt deps Install packages
dbt seed Load CSV seeds
dbt snapshot Run snapshots
dbt run-operation Run macro

Getting Help

Official Docs: https://docs.getdbt.com/reference/dbt-commands

Project-Specific:

Common Questions:

  • "How do I add a new field?" → See Schema Change Handling
  • "How do I debug a failing test?" → See Test Failures
  • "How do I force a full refresh?" → dbt run --full-refresh
  • "Where are my compiled SQLs?" → target/compiled/k2_refdata/models/

Last Updated: 2026-01-23 Version: 1.0.0