Skip to content

Commit 61a99a1

Browse files
authored
Merge branch 'master' into slack-rick-text-refactor
2 parents 68ddb44 + 780b2d9 commit 61a99a1

38 files changed

Lines changed: 1243 additions & 117 deletions
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
name: Auto-assign Devin PRs
2+
3+
on:
4+
pull_request:
5+
types: [opened]
6+
7+
jobs:
8+
assign:
9+
if: github.actor == 'devin-ai-integration[bot]'
10+
runs-on: ubuntu-latest
11+
permissions:
12+
pull-requests: write
13+
issues: write
14+
steps:
15+
- name: Extract and assign requesting user
16+
uses: actions/github-script@v7
17+
with:
18+
script: |
19+
const body = context.payload.pull_request.body || '';
20+
const match = body.match(/Requested by[:\s]*(?:@(\w[\w-]*)|[\w][\w\s]*\(@(\w[\w-]*)\))/);
21+
const user = match?.[1] || match?.[2];
22+
if (user) {
23+
await github.rest.issues.addAssignees({
24+
owner: context.repo.owner,
25+
repo: context.repo.repo,
26+
issue_number: context.issue.number,
27+
assignees: [user]
28+
});
29+
console.log(`Assigned PR #${context.issue.number} to @${user}`);
30+
} else {
31+
console.log('Could not determine requesting user from PR description');
32+
}
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
name: Cleanup stale CI schemas
2+
3+
on:
4+
schedule:
5+
# Daily at 03:00 UTC
6+
- cron: "0 3 * * *"
7+
workflow_dispatch:
8+
inputs:
9+
max-age-hours:
10+
type: string
11+
required: false
12+
default: "24"
13+
description: Drop schemas older than this many hours
14+
15+
env:
16+
# Re-use the dbt-data-reliability integration-test project so we get the
17+
# cleanup macro (drop_stale_ci_schemas) without duplicating it.
18+
TESTS_DIR: ${{ github.workspace }}/dbt-data-reliability/integration_tests
19+
20+
jobs:
21+
cleanup:
22+
runs-on: ubuntu-latest
23+
strategy:
24+
fail-fast: false
25+
matrix:
26+
warehouse-type:
27+
- snowflake
28+
- bigquery
29+
- redshift
30+
- databricks_catalog
31+
- athena
32+
steps:
33+
- name: Checkout dbt package
34+
uses: actions/checkout@v4
35+
with:
36+
repository: elementary-data/dbt-data-reliability
37+
path: dbt-data-reliability
38+
39+
- name: Setup Python
40+
uses: actions/setup-python@v6
41+
with:
42+
python-version: "3.10"
43+
cache: "pip"
44+
45+
- name: Install dbt
46+
run: >
47+
pip install
48+
"dbt-core"
49+
"dbt-${{ (matrix.warehouse-type == 'databricks_catalog' && 'databricks') || (matrix.warehouse-type == 'athena' && 'athena-community') || matrix.warehouse-type }}"
50+
51+
- name: Write dbt profiles
52+
env:
53+
CI_WAREHOUSE_SECRETS: ${{ secrets.CI_WAREHOUSE_SECRETS || '' }}
54+
run: |
55+
if [ -z "$CI_WAREHOUSE_SECRETS" ]; then
56+
echo "::error::Missing required secret: CI_WAREHOUSE_SECRETS"
57+
exit 1
58+
fi
59+
# The cleanup job doesn't create schemas, but generate_profiles.py
60+
# requires --schema-name. Use a dummy value.
61+
python "${{ github.workspace }}/dbt-data-reliability/integration_tests/profiles/generate_profiles.py" \
62+
--template "${{ github.workspace }}/dbt-data-reliability/integration_tests/profiles/profiles.yml.j2" \
63+
--output ~/.dbt/profiles.yml \
64+
--schema-name "cleanup_placeholder"
65+
66+
- name: Install dbt deps
67+
working-directory: ${{ env.TESTS_DIR }}/dbt_project
68+
run: dbt deps
69+
70+
- name: Symlink local elementary package
71+
run: ln -sfn ${{ github.workspace }}/dbt-data-reliability ${{ env.TESTS_DIR }}/dbt_project/dbt_packages/elementary
72+
73+
- name: Drop stale CI schemas
74+
working-directory: ${{ env.TESTS_DIR }}/dbt_project
75+
env:
76+
MAX_AGE_HOURS: ${{ inputs.max-age-hours || '24' }}
77+
run: |
78+
if ! [[ "$MAX_AGE_HOURS" =~ ^[0-9]+$ ]]; then
79+
echo "::error::max-age-hours must be a non-negative integer"
80+
exit 1
81+
fi
82+
ARGS=$(printf '{"prefixes":["py_"],"max_age_hours":%s}' "$MAX_AGE_HOURS")
83+
dbt run-operation drop_stale_ci_schemas \
84+
--args "$ARGS" \
85+
-t "${{ matrix.warehouse-type }}"

.github/workflows/release.yml

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,27 @@ env:
99
IMAGE_NAME: ${{ github.repository }}
1010

1111
jobs:
12+
validate-packages-yml:
13+
runs-on: ubuntu-latest
14+
steps:
15+
- name: Checkout Elementary
16+
uses: actions/checkout@v4
17+
18+
- name: Validate dbt-data-reliability is not a git hash reference
19+
run: |
20+
PACKAGES_FILE="./elementary/monitor/dbt_project/packages.yml"
21+
if grep -q 'git: https://github.com/elementary-data/dbt-data-reliability.git' "$PACKAGES_FILE"; then
22+
echo "::error::packages.yml contains a git hash reference for dbt-data-reliability. Releases must use a proper package version (e.g. 'package: elementary-data/elementary' with a 'version:' field). Please update packages.yml before releasing."
23+
exit 1
24+
fi
25+
if ! grep -q 'package: elementary-data/elementary' "$PACKAGES_FILE"; then
26+
echo "::error::packages.yml does not contain a proper package reference for elementary-data/elementary. Please update packages.yml before releasing."
27+
exit 1
28+
fi
29+
echo "packages.yml validation passed - using proper package version reference."
30+
1231
publish-to-pypi:
32+
needs: validate-packages-yml
1333
runs-on: ubuntu-latest
1434
steps:
1535
- name: Checkout Elementary
@@ -39,6 +59,7 @@ jobs:
3959
password: ${{ secrets.PYPI_PASS }}
4060

4161
build-and-push-docker-image:
62+
needs: validate-packages-yml
4263
runs-on: ubuntu-latest
4364
permissions:
4465
contents: read
@@ -77,6 +98,7 @@ jobs:
7798
labels: ${{ steps.meta.outputs.labels }}
7899

79100
merge-to-docs:
101+
needs: validate-packages-yml
80102
runs-on: ubuntu-latest
81103
steps:
82104
- uses: actions/checkout@v4

.github/workflows/test-all-warehouses.yml

Lines changed: 2 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,15 @@ on:
55
branches: ["master"]
66
paths:
77
- elementary/**
8+
- tests/**
89
- .github/**
910
- pyproject.toml
1011
# For fork PRs - requires approval before running (has access to secrets)
1112
pull_request_target:
1213
branches: ["master"]
1314
paths:
1415
- elementary/**
16+
- tests/**
1517
- .github/**
1618
- pyproject.toml
1719
workflow_dispatch:
@@ -103,19 +105,3 @@ jobs:
103105
dbt-version: ${{ matrix.dbt-version }}
104106
generate-data: ${{ inputs.generate-data || false }}
105107
secrets: inherit
106-
107-
notify_failures:
108-
name: Notify Slack
109-
secrets: inherit
110-
needs: [test]
111-
if: |
112-
always() &&
113-
! cancelled() &&
114-
! contains(needs.test.result, 'success') &&
115-
! contains(needs.test.result, 'cancelled') &&
116-
github.event_name != 'pull_request_target'
117-
uses: ./.github/workflows/notify_slack.yml
118-
with:
119-
result: "failure"
120-
run_id: ${{ github.run_id }}
121-
workflow_name: "Test all warehouse platforms"

.github/workflows/test-warehouse.yml

Lines changed: 30 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,8 @@ jobs:
6565
run:
6666
working-directory: elementary
6767
concurrency:
68-
# This is what eventually defines the schema name in the data platform.
68+
# Serialises runs for the same warehouse × dbt-version × branch.
69+
# The schema name is derived from a hash of this group (see "Write dbt profiles").
6970
group: tests_${{ inputs.warehouse-type }}_dbt_${{ inputs.dbt-version }}_${{ github.head_ref || github.ref_name }}
7071
cancel-in-progress: true
7172
steps:
@@ -114,12 +115,28 @@ jobs:
114115
115116
- name: Write dbt profiles
116117
env:
117-
PROFILES_YML: ${{ secrets.CI_PROFILES_YML }}
118+
CI_WAREHOUSE_SECRETS: ${{ secrets.CI_WAREHOUSE_SECRETS || '' }}
118119
run: |
119-
mkdir -p ~/.dbt
120-
DBT_VERSION=$(pip show dbt-core | grep -i version | awk '{print $2}' | sed 's/\.//g')
121-
UNDERSCORED_REF_NAME=$(echo "${{ inputs.warehouse-type }}_dbt_${DBT_VERSION}_${BRANCH_NAME}" | awk '{print tolower($0)}' | head -c 40 | sed "s/[-\/]/_/g")
122-
echo "$PROFILES_YML" | base64 -d | sed "s/<SCHEMA_NAME>/py_$UNDERSCORED_REF_NAME/g" > ~/.dbt/profiles.yml
120+
# Schema name = py_<YYMMDD_HHMMSS>_<branch≤19>_<8-char hash>
121+
# The hash prevents collisions across concurrent jobs; the branch
122+
# keeps it human-readable; the timestamp helps with stale schema
123+
# cleanup and ensures each CI run gets a unique schema.
124+
#
125+
# Budget (PostgreSQL 63-char limit):
126+
# py_(3) + timestamp(13) + _(1) + branch(≤19) + _(1) + hash(8) = 45
127+
# + _elementary(11) + _gw7(4) = 60
128+
CONCURRENCY_GROUP="tests_${{ inputs.warehouse-type }}_dbt_${{ inputs.dbt-version }}_${BRANCH_NAME}"
129+
SHORT_HASH=$(echo -n "$CONCURRENCY_GROUP" | sha256sum | head -c 8)
130+
SAFE_BRANCH=$(echo "${BRANCH_NAME}" | awk '{print tolower($0)}' | sed "s/[^a-z0-9]/_/g; s/__*/_/g" | head -c 19)
131+
DATE_STAMP=$(date -u +%y%m%d_%H%M%S)
132+
SCHEMA_NAME="py_${DATE_STAMP}_${SAFE_BRANCH}_${SHORT_HASH}"
133+
134+
echo "Schema name: $SCHEMA_NAME (branch='${BRANCH_NAME}', timestamp=${DATE_STAMP}, hash of concurrency group)"
135+
136+
python "${{ github.workspace }}/elementary/tests/profiles/generate_profiles.py" \
137+
--template "${{ github.workspace }}/elementary/tests/profiles/profiles.yml.j2" \
138+
--output ~/.dbt/profiles.yml \
139+
--schema-name "$SCHEMA_NAME"
123140
124141
- name: Run Python package unit tests
125142
run: pytest -vv tests/unit --warehouse-type ${{ inputs.warehouse-type }}
@@ -260,3 +277,10 @@ jobs:
260277

261278
- name: Run Python package e2e tests
262279
run: pytest -vv tests/e2e --warehouse-type ${{ inputs.warehouse-type }}
280+
281+
- name: Drop test schemas
282+
if: always()
283+
working-directory: ${{ env.E2E_DBT_PROJECT_DIR }}
284+
continue-on-error: true
285+
run: |
286+
dbt run-operation elementary_integration_tests.drop_test_schemas --target "${{ inputs.warehouse-type }}"

elementary/cli/cli.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,10 @@ def get_log_path(ctx):
2424
ctx_args = ctx.args
2525
target_path_flag = "--target-path"
2626
target_path = ctx_args[ctx_args.index(target_path_flag) + 1]
27-
finally:
28-
os.makedirs(os.path.abspath(target_path), exist_ok=True)
29-
return os.path.join(target_path, "edr.log")
27+
except (ValueError, IndexError):
28+
pass
29+
os.makedirs(os.path.abspath(target_path), exist_ok=True)
30+
return os.path.join(target_path, "edr.log")
3031

3132

3233
def get_quiet_logs(ctx):

elementary/clients/dbt/api_dbt_runner.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@ class APIDbtRunner(CommandLineDbtRunner):
2727
def _inner_run_command(
2828
self,
2929
dbt_command_args: List[str],
30-
capture_output: bool,
3130
quiet: bool,
3231
log_output: bool,
3332
log_format: str,
@@ -50,15 +49,24 @@ def collect_dbt_command_logs(event):
5049
with with_chdir(self.project_dir):
5150
res: dbtRunnerResult = dbt.invoke(dbt_command_args)
5251
output = "\n".join(dbt_logs) or None
52+
# Surface the exception text so that transient-error detection in
53+
# _inner_run_command_with_retries can match against it. The dbt
54+
# Python API doesn't write to stderr, so we repurpose that field
55+
# for the exception string (analogous to how SubprocessDbtRunner
56+
# captures subprocess stderr).
57+
exception_text = str(res.exception) if res.exception else None
5358
if self.raise_on_failure and not res.success:
5459
raise DbtCommandError(
5560
base_command_args=dbt_command_args,
56-
err_msg=(str(res.exception) if res.exception else output),
61+
err_msg=(exception_text or output),
5762
logs=[DbtLog.from_log_line(log) for log in dbt_logs],
5863
)
5964

6065
return APIDbtCommandResult(
61-
success=res.success, output=output, stderr=None, result_obj=res
66+
success=res.success,
67+
output=output,
68+
stderr=exception_text,
69+
result_obj=res,
6270
)
6371

6472
def _parse_ls_command_result(

0 commit comments

Comments
 (0)