Skip to content

Commit 46daf7e

Browse files
committed
Add initial bundle example: my_data_project
Pre-generated from databricks-bundle-template with full environment setup (user/stage/prod), classic compute, GitHub Actions CI/CD, and AWS OAuth M2M authentication. No permissions/RBAC configured.
0 parents  commit 46daf7e

24 files changed

Lines changed: 1994 additions & 0 deletions
Lines changed: 187 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,187 @@
1+
# GitHub Actions Workflow for my_data_project
2+
#
3+
# This workflow validates and deploys Databricks Asset Bundles:
4+
# - bundle-ci: Validates bundle configuration on Pull Requests to main
5+
# - staging-cd: Deploys to staging when PRs are merged into main
6+
# - prod-cd: Deploys to production when PRs are merged into release
7+
#
8+
# Prerequisites:
9+
# 1. Configure repository secrets (see docs/CI_CD_SETUP.md)
10+
# 2. Ensure Unity Catalog prerequisites are met
11+
12+
name: Bundle CI/CD for my_data_project
13+
14+
on:
15+
push:
16+
branches:
17+
- main
18+
- release
19+
paths-ignore:
20+
- 'README.md'
21+
- 'docs/**'
22+
- '.gitignore'
23+
pull_request:
24+
branches:
25+
- main
26+
paths-ignore:
27+
- 'README.md'
28+
- 'docs/**'
29+
- '.gitignore'
30+
workflow_dispatch:
31+
32+
permissions:
33+
contents: read
34+
checks: write
35+
pull-requests: write
36+
37+
defaults:
38+
run:
39+
working-directory: .
40+
41+
env:
42+
# AWS/GCP Databricks uses OAuth M2M authentication
43+
STAGING_DATABRICKS_HOST: ${{ secrets.STAGING_DATABRICKS_HOST }}
44+
STAGING_DATABRICKS_CLIENT_ID: ${{ secrets.STAGING_DATABRICKS_CLIENT_ID }}
45+
STAGING_DATABRICKS_CLIENT_SECRET: ${{ secrets.STAGING_DATABRICKS_CLIENT_SECRET }}
46+
PROD_DATABRICKS_HOST: ${{ secrets.PROD_DATABRICKS_HOST }}
47+
PROD_DATABRICKS_CLIENT_ID: ${{ secrets.PROD_DATABRICKS_CLIENT_ID }}
48+
PROD_DATABRICKS_CLIENT_SECRET: ${{ secrets.PROD_DATABRICKS_CLIENT_SECRET }}
49+
50+
jobs:
51+
# =============================================================================
52+
# Job: bundle-ci - Validate bundle on Pull Requests
53+
# =============================================================================
54+
bundle-ci:
55+
name: 'Validate and Test'
56+
if: github.event_name == 'pull_request'
57+
runs-on: ubuntu-latest
58+
concurrency:
59+
group: my_data_project_ci_${{ github.ref }}
60+
cancel-in-progress: true
61+
62+
steps:
63+
- name: Checkout repository
64+
uses: actions/checkout@v4
65+
with:
66+
ref: ${{ github.event.pull_request.head.sha || github.sha }}
67+
68+
- name: Set up Python
69+
uses: actions/setup-python@v5
70+
with:
71+
python-version: '3.11'
72+
73+
- name: Install dependencies
74+
run: |
75+
if [ -f "requirements_dev.txt" ]; then
76+
echo "Installing development dependencies..."
77+
python -m pip install --upgrade pip
78+
pip install -r requirements_dev.txt
79+
else
80+
echo "No requirements_dev.txt found, skipping dependency installation"
81+
fi
82+
83+
- name: Run unit tests
84+
run: |
85+
if [ -d "tests" ] && [ -n "$(find tests -name 'test_*.py' -o -name '*_test.py' 2>/dev/null)" ]; then
86+
echo "Running unit tests..."
87+
python -m pytest tests/ -v --junitxml=test-results.xml
88+
else
89+
echo "No test files found, skipping unit tests"
90+
fi
91+
92+
- name: Publish test results
93+
uses: dorny/test-reporter@v1
94+
if: success() || failure()
95+
with:
96+
name: Unit Tests - my_data_project
97+
path: test-results.xml
98+
reporter: java-junit
99+
fail-on-error: false # Prevent "double failing" noise when pytest fails the job
100+
token: ${{ github.token }}
101+
102+
- name: Install Databricks CLI
103+
uses: databricks/setup-cli@v0.274.0
104+
105+
- name: Validate bundle for staging
106+
env:
107+
DATABRICKS_HOST: ${{ env.STAGING_DATABRICKS_HOST }}
108+
DATABRICKS_CLIENT_ID: ${{ env.STAGING_DATABRICKS_CLIENT_ID }}
109+
DATABRICKS_CLIENT_SECRET: ${{ env.STAGING_DATABRICKS_CLIENT_SECRET }}
110+
run: |
111+
databricks bundle validate -t stage
112+
113+
- name: Validate bundle for production
114+
env:
115+
DATABRICKS_HOST: ${{ env.PROD_DATABRICKS_HOST }}
116+
DATABRICKS_CLIENT_ID: ${{ env.PROD_DATABRICKS_CLIENT_ID }}
117+
DATABRICKS_CLIENT_SECRET: ${{ env.PROD_DATABRICKS_CLIENT_SECRET }}
118+
run: |
119+
databricks bundle validate -t prod
120+
121+
# =============================================================================
122+
# Job: staging-cd - Deploy to staging on merge to main
123+
# =============================================================================
124+
staging-cd:
125+
name: 'Deploy to Staging'
126+
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
127+
runs-on: ubuntu-latest
128+
concurrency:
129+
group: my_data_project_staging_deploy
130+
cancel-in-progress: false
131+
132+
steps:
133+
- name: Checkout repository
134+
uses: actions/checkout@v4
135+
136+
- name: Install Databricks CLI
137+
uses: databricks/setup-cli@v0.274.0
138+
139+
- name: Validate bundle for staging
140+
env:
141+
DATABRICKS_HOST: ${{ env.STAGING_DATABRICKS_HOST }}
142+
DATABRICKS_CLIENT_ID: ${{ env.STAGING_DATABRICKS_CLIENT_ID }}
143+
DATABRICKS_CLIENT_SECRET: ${{ env.STAGING_DATABRICKS_CLIENT_SECRET }}
144+
run: |
145+
databricks bundle validate -t stage
146+
147+
- name: Deploy bundle to staging
148+
env:
149+
DATABRICKS_HOST: ${{ env.STAGING_DATABRICKS_HOST }}
150+
DATABRICKS_CLIENT_ID: ${{ env.STAGING_DATABRICKS_CLIENT_ID }}
151+
DATABRICKS_CLIENT_SECRET: ${{ env.STAGING_DATABRICKS_CLIENT_SECRET }}
152+
run: |
153+
databricks bundle deploy -t stage
154+
155+
# =============================================================================
156+
# Job: prod-cd - Deploy to production on merge to release
157+
# =============================================================================
158+
prod-cd:
159+
name: 'Deploy to Production'
160+
if: github.event_name == 'push' && github.ref == 'refs/heads/release'
161+
runs-on: ubuntu-latest
162+
concurrency:
163+
group: my_data_project_prod_deploy
164+
cancel-in-progress: false
165+
166+
steps:
167+
- name: Checkout repository
168+
uses: actions/checkout@v4
169+
170+
- name: Install Databricks CLI
171+
uses: databricks/setup-cli@v0.274.0
172+
173+
- name: Validate bundle for production
174+
env:
175+
DATABRICKS_HOST: ${{ env.PROD_DATABRICKS_HOST }}
176+
DATABRICKS_CLIENT_ID: ${{ env.PROD_DATABRICKS_CLIENT_ID }}
177+
DATABRICKS_CLIENT_SECRET: ${{ env.PROD_DATABRICKS_CLIENT_SECRET }}
178+
run: |
179+
databricks bundle validate -t prod
180+
181+
- name: Deploy bundle to production
182+
env:
183+
DATABRICKS_HOST: ${{ env.PROD_DATABRICKS_HOST }}
184+
DATABRICKS_CLIENT_ID: ${{ env.PROD_DATABRICKS_CLIENT_ID }}
185+
DATABRICKS_CLIENT_SECRET: ${{ env.PROD_DATABRICKS_CLIENT_SECRET }}
186+
run: |
187+
databricks bundle deploy -t prod

.gitignore

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
# ===========================================
2+
# Databricks Asset Bundles
3+
# ===========================================
4+
.databricks/
5+
*.tfstate
6+
*.tfstate.backup
7+
8+
# ===========================================
9+
# Python
10+
# ===========================================
11+
__pycache__/
12+
*.py[cod]
13+
*$py.class
14+
*.so
15+
*.egg
16+
*.egg-info/
17+
dist/
18+
build/
19+
eggs/
20+
.eggs/
21+
.ipynb_checkpoints
22+
23+
# Virtual environments
24+
.venv/
25+
venv/
26+
ENV/
27+
env/
28+
29+
# ===========================================
30+
# IDE and Editor
31+
# ===========================================
32+
.idea/
33+
.vscode/
34+
*.swp
35+
*.swo
36+
*~
37+
.cursor/
38+
.claude/
39+
40+
# ===========================================
41+
# Testing and Coverage
42+
# ===========================================
43+
.pytest_cache/
44+
.coverage
45+
htmlcov/
46+
.tox/
47+
.nox/
48+
49+
# ===========================================
50+
# Linters, Formatters, Type Checkers
51+
# ===========================================
52+
ruff_cache/
53+
.mypy_cache/
54+
.pytype/
55+
56+
# ===========================================
57+
# Environment and Secrets
58+
# ===========================================
59+
.env
60+
.envrc
61+
*.pem
62+
*.key
63+
64+
# ===========================================
65+
# OS Generated
66+
# ===========================================
67+
.DS_Store
68+
Thumbs.db
69+
70+
# ===========================================
71+
# Logs
72+
# ===========================================
73+
*.log
74+
75+
# ===========================================
76+
# Temporary Files
77+
# ===========================================
78+
tmp/
79+
temp/
80+
*.tmp

QUICKSTART.md

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
# Quick Start Guide for my_data_project
2+
3+
## 1. Prerequisites Check
4+
5+
```bash
6+
# Verify Databricks CLI is installed
7+
databricks --version
8+
9+
# Authenticate to your workspace
10+
databricks auth login
11+
```
12+
13+
## 2. Validate Configuration
14+
15+
```bash
16+
databricks bundle validate -t user
17+
```
18+
19+
If validation fails, check:
20+
- Unity Catalog `dev_analytics` exists and you have `USE CATALOG` permission
21+
22+
## 3. Deploy to User Environment
23+
24+
```bash
25+
databricks bundle deploy -t user
26+
```
27+
28+
## 4. Run Sample Workloads
29+
30+
```bash
31+
# Run the ingestion job
32+
databricks bundle run my_data_project_ingestion -t user
33+
34+
# Trigger the pipeline
35+
databricks bundle run my_data_project_pipeline_trigger -t user
36+
```
37+
38+
## 5. Verify in Workspace
39+
40+
Check your Databricks workspace:
41+
- **Jobs**: Look for `[user <yourname>] my_data_project Ingestion Job`
42+
- **Pipelines**: Look for `[user <yourname>] my_data_project ETL Pipeline`
43+
44+
## 6. Cleanup (Optional)
45+
46+
```bash
47+
databricks bundle destroy -t user
48+
```
49+
50+
## Next Steps
51+
52+
### Configure Service Principals (for CI/CD)
53+
54+
Before deploying to stage, or prod:
55+
56+
1. Create service principals in your Databricks workspace
57+
2. Search for `SP_PLACEHOLDER` in `variables.yml`
58+
3. Replace with your service principal application IDs
59+
60+
### Set Up CI/CD Pipeline
61+
62+
For automated deployment via CI/CD, see [docs/CI_CD_SETUP.md](docs/CI_CD_SETUP.md).
63+
64+
### Deploy to Higher Environments
65+
```bash
66+
databricks bundle deploy -t stage
67+
databricks bundle deploy -t prod
68+
```
69+
> **Multi-Workspace Setup**: If using a separate prod workspace, update `workspace.host` in `databricks.yml`. See [README.md](README.md) for details.
70+
71+
## Troubleshooting
72+
73+
### "Catalog not found" Error
74+
75+
Catalogs must be pre-existing (created by a metastore admin or platform team).
76+
Verify that the `dev_analytics` catalog exists and you have access:
77+
```sql
78+
SHOW CATALOGS;
79+
```
80+
81+
### Service Principal Errors
82+
83+
> **Note**: The `user` target does not require service principals.
84+
85+
For stage, prod targets:
86+
- Search for `SP_PLACEHOLDER` in `variables.yml` and replace with your SP IDs
87+
- Ensure the SP exists in your workspace before deploying

0 commit comments

Comments
 (0)