Skip to content

Commit aaf3b87

Browse files
authored
Merge branch 'TobikoData:main' into doris
2 parents ebee7de + 61a65ac commit aaf3b87

File tree

202 files changed

+5692
-4852
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

202 files changed

+5692
-4852
lines changed

.circleci/continue_config.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -239,7 +239,7 @@ jobs:
239239
- checkout
240240
- run:
241241
name: Install OS-level dependencies
242-
command: ./.circleci/install-prerequisites.sh "<< parameters.engine >>"
242+
command: ./.circleci/install-prerequisites.sh "<< parameters.engine >>"
243243
- run:
244244
name: Generate database name
245245
command: |
@@ -308,7 +308,7 @@ workflows:
308308
- redshift
309309
- bigquery
310310
- clickhouse-cloud
311-
- athena
311+
- athena
312312
- fabric
313313
- gcp-postgres
314314
filters:

.circleci/manage-test-db.sh

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ databricks_init() {
5151

5252
# Note: the cluster doesnt need to be running to create / drop catalogs, but it does need to be running to run the integration tests
5353
echo "Ensuring cluster is running"
54-
databricks clusters start $CLUSTER_ID || true
54+
databricks clusters start $CLUSTER_ID
5555
}
5656

5757
databricks_up() {
@@ -80,7 +80,11 @@ redshift_down() {
8080
EXIT_CODE=1
8181
ATTEMPTS=0
8282
while [ $EXIT_CODE -ne 0 ] && [ $ATTEMPTS -lt 5 ]; do
83-
redshift_exec "select pg_terminate_backend(procpid) from pg_stat_activity where datname = '$1'"
83+
# note: sometimes this pg_terminate_backend() call can randomly fail with: ERROR: Insufficient privileges
84+
# if it does, let's proceed with the drop anyway rather than aborting and never attempting the drop
85+
redshift_exec "select pg_terminate_backend(procpid) from pg_stat_activity where datname = '$1'" || true
86+
87+
# perform drop
8488
redshift_exec "drop database $1;" && EXIT_CODE=$? || EXIT_CODE=$?
8589
if [ $EXIT_CODE -ne 0 ]; then
8690
echo "Unable to drop database; retrying..."

.github/workflows/pr.yaml

Lines changed: 79 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@ on:
88
concurrency:
99
group: 'pr-${{ github.event.pull_request.number }}'
1010
cancel-in-progress: true
11+
permissions:
12+
contents: read
1113
jobs:
1214
test-vscode:
1315
env:
@@ -56,7 +58,7 @@ jobs:
5658
run: pnpm exec playwright install
5759
- name: Run e2e tests
5860
working-directory: ./vscode/extension
59-
timeout-minutes: 90
61+
timeout-minutes: 30
6062
run: |
6163
source ../../.venv/bin/activate
6264
pnpm run test:e2e
@@ -66,3 +68,79 @@ jobs:
6668
name: playwright-report
6769
path: vscode/extension/playwright-report/
6870
retention-days: 30
71+
test-dbt-versions:
72+
runs-on: ubuntu-latest
73+
strategy:
74+
fail-fast: false
75+
matrix:
76+
dbt-version: ['1.3', '1.4', '1.5', '1.6', '1.7', '1.8', '1.9', '1.10']
77+
steps:
78+
- uses: actions/checkout@v5
79+
- name: Set up Python
80+
uses: actions/setup-python@v5
81+
with:
82+
python-version: '3.10'
83+
- name: Install uv
84+
uses: astral-sh/setup-uv@v6
85+
- name: Install SQLMesh dev dependencies
86+
run: |
87+
uv venv .venv
88+
source .venv/bin/activate
89+
UV=1 make install-dev-dbt-${{ matrix.dbt-version }}
90+
- name: Run dbt tests
91+
# We can't run slow tests across all engines due to tests requiring DuckDB and old versions
92+
# of DuckDB require a version of DuckDB we no longer support
93+
run: |
94+
source .venv/bin/activate
95+
96+
# Remove semantic_models and metrics sections for DBT versions < 1.6.0
97+
# Using explicit list to avoid version comparison issues
98+
if [[ "${{ matrix.dbt-version }}" == "1.3" ]] || \
99+
[[ "${{ matrix.dbt-version }}" == "1.4" ]] || \
100+
[[ "${{ matrix.dbt-version }}" == "1.5" ]]; then
101+
102+
echo "DBT version is ${{ matrix.dbt-version }} (< 1.6.0), removing semantic_models and metrics sections..."
103+
104+
schema_file="tests/fixtures/dbt/sushi_test/models/schema.yml"
105+
if [[ -f "$schema_file" ]]; then
106+
echo "Modifying $schema_file..."
107+
108+
# Create a temporary file
109+
temp_file=$(mktemp)
110+
111+
# Use awk to remove semantic_models and metrics sections
112+
awk '
113+
/^semantic_models:/ { in_semantic=1; next }
114+
/^metrics:/ { in_metrics=1; next }
115+
/^[^ ]/ && (in_semantic || in_metrics) {
116+
in_semantic=0;
117+
in_metrics=0
118+
}
119+
!in_semantic && !in_metrics { print }
120+
' "$schema_file" > "$temp_file"
121+
122+
# Move the temp file back
123+
mv "$temp_file" "$schema_file"
124+
125+
echo "Successfully removed semantic_models and metrics sections"
126+
else
127+
echo "Schema file not found at $schema_file, skipping..."
128+
fi
129+
else
130+
echo "DBT version is ${{ matrix.dbt-version }} (>= 1.6.0), keeping semantic_models and metrics sections"
131+
fi
132+
133+
make dbt-fast-test
134+
- name: Test SQLMesh info in sushi_dbt
135+
working-directory: ./examples/sushi_dbt
136+
run: |
137+
source ../../.venv/bin/activate
138+
sed -i 's/target: in_memory/target: postgres/g' profiles.yml
139+
if [[ $(echo -e "${{ matrix.dbt-version }}\n1.5.0" | sort -V | head -n1) == "${{ matrix.dbt-version }}" ]] && [[ "${{ matrix.dbt-version }}" != "1.5.0" ]]; then
140+
echo "DBT version is ${{ matrix.dbt-version }} (< 1.5.0), removing version parameters..."
141+
sed -i -e 's/, version=1) }}/) }}/g' -e 's/, v=1) }}/) }}/g' models/top_waiters.sql
142+
else
143+
echo "DBT version is ${{ matrix.dbt-version }} (>= 1.5.0), keeping version parameters"
144+
fi
145+
146+
sqlmesh info --skip-connection

Makefile

Lines changed: 57 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,58 @@
11
.PHONY: docs
22

3+
ifdef UV
4+
PIP := uv pip
5+
else
6+
PIP := pip3
7+
endif
8+
9+
UNAME_S := $(shell uname -s)
10+
ifeq ($(UNAME_S),Darwin)
11+
SED_INPLACE = sed -i ''
12+
else
13+
SED_INPLACE = sed -i
14+
endif
15+
316
install-dev:
4-
pip3 install -e ".[dev,web,slack,dlt,lsp]" ./examples/custom_materializations
17+
$(PIP) install -e ".[dev,web,slack,dlt,lsp]" ./examples/custom_materializations
518

619
install-doc:
7-
pip3 install -r ./docs/requirements.txt
20+
$(PIP) install -r ./docs/requirements.txt
821

922
install-pre-commit:
1023
pre-commit install
1124

25+
install-dev-dbt-%:
26+
@version="$*"; \
27+
period_count=$$(echo "$$version" | tr -cd '.' | wc -c); \
28+
if [ "$$period_count" -eq 0 ]; then \
29+
version="$${version:0:1}.$${version:1}"; \
30+
elif [ "$$period_count" -eq 1 ]; then \
31+
version="$$version.0"; \
32+
fi; \
33+
echo "Installing dbt version: $$version"; \
34+
cp pyproject.toml pyproject.toml.backup; \
35+
$(SED_INPLACE) 's/"pydantic>=2.0.0"/"pydantic"/g' pyproject.toml; \
36+
if [ "$$version" = "1.10.0" ]; then \
37+
echo "Applying special handling for dbt 1.10.0"; \
38+
$(SED_INPLACE) -E 's/"(dbt-core)[^"]*"/"\1~='"$$version"'"/g' pyproject.toml; \
39+
$(SED_INPLACE) -E 's/"(dbt-(bigquery|duckdb|snowflake|athena-community|clickhouse|databricks|redshift|trino))[^"]*"/"\1"/g' pyproject.toml; \
40+
else \
41+
echo "Applying version $$version to all dbt packages"; \
42+
$(SED_INPLACE) -E 's/"(dbt-[^"><=~!]+)[^"]*"/"\1~='"$$version"'"/g' pyproject.toml; \
43+
fi; \
44+
$(MAKE) install-dev; \
45+
if [ "$$version" = "1.6.0" ]; then \
46+
echo "Applying overrides for dbt 1.6.0"; \
47+
$(PIP) install 'pydantic>=2.0.0' 'google-cloud-bigquery==3.30.0' 'databricks-sdk==0.28.0' --reinstall; \
48+
fi; \
49+
if [ "$$version" = "1.7.0" ]; then \
50+
echo "Applying overrides for dbt 1.7.0"; \
51+
$(PIP) install 'databricks-sdk==0.28.0' --reinstall; \
52+
fi; \
53+
mv pyproject.toml.backup pyproject.toml; \
54+
echo "Restored original pyproject.toml"
55+
1256
style:
1357
pre-commit run --all-files
1458

@@ -22,16 +66,16 @@ doc-test:
2266
python -m pytest --doctest-modules sqlmesh/core sqlmesh/utils
2367

2468
package:
25-
pip3 install build && python3 -m build
69+
$(PIP) install build && python3 -m build
2670

2771
publish: package
28-
pip3 install twine && python3 -m twine upload dist/*
72+
$(PIP) install twine && python3 -m twine upload dist/*
2973

3074
package-tests:
31-
pip3 install build && cp pyproject.toml tests/sqlmesh_pyproject.toml && python3 -m build tests/
75+
$(PIP) install build && cp pyproject.toml tests/sqlmesh_pyproject.toml && python3 -m build tests/
3276

3377
publish-tests: package-tests
34-
pip3 install twine && python3 -m twine upload -r tobiko-private tests/dist/*
78+
$(PIP) install twine && python3 -m twine upload -r tobiko-private tests/dist/*
3579

3680
docs-serve:
3781
mkdocs serve
@@ -93,6 +137,9 @@ engine-test:
93137
dbt-test:
94138
pytest -n auto -m "dbt and not cicdonly"
95139

140+
dbt-fast-test:
141+
pytest -n auto -m "dbt and fast" --retries 3
142+
96143
github-test:
97144
pytest -n auto -m "github"
98145

@@ -109,7 +156,7 @@ guard-%:
109156
fi
110157

111158
engine-%-install:
112-
pip3 install -e ".[dev,web,slack,lsp,${*}]" ./examples/custom_materializations
159+
$(PIP) install -e ".[dev,web,slack,lsp,${*}]" ./examples/custom_materializations
113160

114161
engine-docker-%-up:
115162
docker compose -f ./tests/core/engine_adapter/integration/docker/compose.${*}.yaml up -d
@@ -159,11 +206,11 @@ snowflake-test: guard-SNOWFLAKE_ACCOUNT guard-SNOWFLAKE_WAREHOUSE guard-SNOWFLAK
159206
pytest -n auto -m "snowflake" --retries 3 --junitxml=test-results/junit-snowflake.xml
160207

161208
bigquery-test: guard-BIGQUERY_KEYFILE engine-bigquery-install
162-
pip install -e ".[bigframes]"
209+
$(PIP) install -e ".[bigframes]"
163210
pytest -n auto -m "bigquery" --retries 3 --junitxml=test-results/junit-bigquery.xml
164211

165212
databricks-test: guard-DATABRICKS_CATALOG guard-DATABRICKS_SERVER_HOSTNAME guard-DATABRICKS_HTTP_PATH guard-DATABRICKS_ACCESS_TOKEN guard-DATABRICKS_CONNECT_VERSION engine-databricks-install
166-
pip install 'databricks-connect==${DATABRICKS_CONNECT_VERSION}'
213+
$(PIP) install 'databricks-connect==${DATABRICKS_CONNECT_VERSION}'
167214
pytest -n auto -m "databricks" --retries 3 --junitxml=test-results/junit-databricks.xml
168215

169216
redshift-test: guard-REDSHIFT_HOST guard-REDSHIFT_USER guard-REDSHIFT_PASSWORD guard-REDSHIFT_DATABASE engine-redshift-install
@@ -176,7 +223,7 @@ athena-test: guard-AWS_ACCESS_KEY_ID guard-AWS_SECRET_ACCESS_KEY guard-ATHENA_S3
176223
pytest -n auto -m "athena" --retries 3 --junitxml=test-results/junit-athena.xml
177224

178225
fabric-test: guard-FABRIC_HOST guard-FABRIC_CLIENT_ID guard-FABRIC_CLIENT_SECRET guard-FABRIC_DATABASE engine-fabric-install
179-
pytest -n auto -m "fabric" --retries 3 --junitxml=test-results/junit-fabric.xml
226+
pytest -n auto -m "fabric" --retries 3 --junitxml=test-results/junit-fabric.xml
180227

181228
gcp-postgres-test: guard-GCP_POSTGRES_INSTANCE_CONNECTION_STRING guard-GCP_POSTGRES_USER guard-GCP_POSTGRES_PASSWORD guard-GCP_POSTGRES_KEYFILE_JSON engine-gcppostgres-install
182229
pytest -n auto -m "gcp_postgres" --retries 3 --junitxml=test-results/junit-gcp-postgres.xml

docs/integrations/engines/snowflake.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -250,6 +250,14 @@ And confirm that our schemas and objects exist in the Snowflake catalog:
250250

251251
Congratulations - your SQLMesh project is up and running on Snowflake!
252252

253+
### Where are the row counts?
254+
255+
SQLMesh reports the number of rows processed by each model in its `plan` and `run` terminal output.
256+
257+
However, due to limitations in the Snowflake Python connector, row counts cannot be determined for `CREATE TABLE AS` statements. Therefore, SQLMesh does not report row counts for certain model kinds, such as `FULL` models.
258+
259+
Learn more about the connector limitation [on Github](https://github.com/snowflakedb/snowflake-connector-python/issues/645).
260+
253261
## Local/Built-in Scheduler
254262
**Engine Adapter Type**: `snowflake`
255263

examples/sushi/models/customer_revenue_by_day.sql

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ WITH order_total AS (
2121
LEFT JOIN sushi.items AS i
2222
ON oi.item_id = i.id AND oi.event_date = i.event_date
2323
WHERE
24-
oi.event_date BETWEEN CAST('{{ start_ds }}' as DATE) AND CAST('{{ end_ds }}' as DATE)
24+
oi.event_date BETWEEN @start_date AND @end_date
2525
GROUP BY
2626
oi.order_id,
2727
oi.event_date
@@ -35,7 +35,7 @@ FROM sushi.orders AS o
3535
LEFT JOIN order_total AS ot
3636
ON o.id = ot.order_id AND o.event_date = ot.event_date
3737
WHERE
38-
o.event_date BETWEEN CAST('{{ start_ds }}' as DATE) AND CAST('{{ end_ds }}' as DATE)
38+
o.event_date BETWEEN @start_date AND @end_date
3939
GROUP BY
4040
o.customer_id,
4141
o.event_date

examples/sushi/models/waiter_as_customer_by_day.sql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,6 @@ SELECT
2727
FROM sushi.waiters AS w
2828
JOIN sushi.customers as c ON w.waiter_id = c.customer_id
2929
JOIN sushi.waiter_names as wn ON w.waiter_id = wn.id
30-
WHERE w.event_date BETWEEN @start_date AND @end_date;
30+
WHERE w.event_date BETWEEN CAST('{{ start_ds }}' as DATE) AND @end_date;
3131

3232
JINJA_END;

examples/sushi_dbt/models/schema.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@ models:
2121
tests:
2222
- less_than_amount:
2323
amount: 1000
24+
- greater_than_amount:
25+
amount: 0
2426
- name: ds
2527
description: Date
2628
- name: top_waiters
@@ -34,6 +36,8 @@ models:
3436
field: waiter_id
3537
- name: revenue
3638
description: Revenue from orders served by this waiter
39+
- name: unused_column
40+
data_type: int
3741
- name: waiters
3842
columns:
3943
- name: waiter_id

examples/sushi_dbt/models/top_waiters.sql

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,8 @@
66

77
SELECT
88
waiter_id::INT AS waiter_id,
9-
revenue::DOUBLE AS revenue
9+
revenue::DOUBLE AS revenue,
10+
1 AS unused_column
1011
FROM {{ ref('waiter_revenue_by_day', version=1) }}
1112
WHERE
1213
ds = (

examples/sushi_dbt/profiles.yml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,14 @@ sushi:
33
in_memory:
44
type: duckdb
55
schema: sushi
6+
postgres:
7+
type: postgres
8+
host: "host"
9+
user: "user"
10+
password: "password"
11+
dbname: "dbname"
12+
port: 5432
13+
schema: sushi
614
duckdb:
715
type: duckdb
816
path: 'local.duckdb'

0 commit comments

Comments
 (0)