Skip to content

Commit e0637b0

Browse files
rrbarbosaharitamarclaude
authored
add option to partition large models (#939)
* add option to partition large models * add explicit partition check * undo dbt project change * refactor: simplify BQ partitioning - enable by default, single disable flag - Partitioning is now always on for BigQuery (partition by created_at/day) instead of requiring an opt-in flag - Replace partition_run_results + run_results_partition_by vars with a single bigquery_disable_partitioning escape hatch - Hardcode partition spec in model configs instead of indirecting through config vars - Restore original code formatting (revert gratuitous style changes) - Tighten test assertions to verify partition column is created_at Note: existing BigQuery users will need --full-refresh to apply partitioning to already-created tables. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * refactor: extract get_default_partition_by dispatched macro Move the BQ partition logic into a dispatched macro to avoid repeating the target.type check across model files. Follows the same pattern as get_default_incremental_strategy and get_default_table_type. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> --------- Co-authored-by: Itamar Hartstein <haritamar@gmail.com> Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 8040a76 commit e0637b0

File tree

5 files changed

+68
-0
lines changed

5 files changed

+68
-0
lines changed

integration_tests/tests/test_dbt_artifacts/test_artifacts.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,3 +163,50 @@ def test_timings(dbt_project: DbtProject):
163163

164164
assert len(results) == 1
165165
assert results[0]["execute_started_at"]
166+
167+
168+
@pytest.mark.only_on_targets(["bigquery"])
169+
def test_run_results_partitioned(dbt_project: DbtProject):
170+
# BigQuery partitioning is enabled by default. Verify the model works and data is readable.
171+
dbt_project.dbt_runner.vars["disable_run_results"] = False
172+
# Full-refresh to ensure the table is created with partitioning
173+
dbt_project.dbt_runner.run(select="dbt_run_results", full_refresh=True)
174+
dbt_project.dbt_runner.run(select=TEST_MODEL)
175+
results = dbt_project.run_query(
176+
"""select * from {{ ref("dbt_run_results") }} where name='%s'""" % TEST_MODEL
177+
)
178+
assert len(results) >= 1
179+
180+
# Verify the partition column is created_at in BigQuery
181+
partition_cols = dbt_project.run_query(
182+
"SELECT column_name "
183+
"FROM `{{ ref('dbt_run_results').database }}.{{ ref('dbt_run_results').schema }}.INFORMATION_SCHEMA.COLUMNS` "
184+
"WHERE table_name = '{{ ref('dbt_run_results').identifier }}' "
185+
"AND is_partitioning_column = 'YES'"
186+
)
187+
assert [row["column_name"] for row in partition_cols] == [
188+
"created_at"
189+
], "dbt_run_results should be partitioned by created_at in BigQuery"
190+
191+
192+
@pytest.mark.only_on_targets(["bigquery"])
193+
def test_dbt_invocations_partitioned(dbt_project: DbtProject):
194+
# BigQuery partitioning is enabled by default. Verify dbt_invocations works.
195+
dbt_project.dbt_runner.vars["disable_dbt_invocation_autoupload"] = False
196+
# Full-refresh to ensure the table is created with partitioning
197+
dbt_project.dbt_runner.run(select="dbt_invocations", full_refresh=True)
198+
dbt_project.dbt_runner.run(selector="one")
199+
dbt_project.read_table(
200+
"dbt_invocations", where="yaml_selector = 'one'", raise_if_empty=True
201+
)
202+
203+
# Verify the partition column is created_at in BigQuery
204+
partition_cols = dbt_project.run_query(
205+
"SELECT column_name "
206+
"FROM `{{ ref('dbt_invocations').database }}.{{ ref('dbt_invocations').schema }}.INFORMATION_SCHEMA.COLUMNS` "
207+
"WHERE table_name = '{{ ref('dbt_invocations').identifier }}' "
208+
"AND is_partitioning_column = 'YES'"
209+
)
210+
assert [row["column_name"] for row in partition_cols] == [
211+
"created_at"
212+
], "dbt_invocations should be partitioned by created_at in BigQuery"

macros/edr/system/system_utils/get_config_var.sql

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,7 @@
143143
"anomaly_exclude_metrics": none,
144144
"disable_samples_on_pii_tags": false,
145145
"pii_tags": ["pii"],
146+
"bigquery_disable_partitioning": false,
146147
} %}
147148
{{- return(default_config) -}}
148149
{%- endmacro -%}
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
{% macro get_default_partition_by() %}
2+
{% do return(adapter.dispatch("get_default_partition_by", "elementary")()) %}
3+
{% endmacro %}
4+
5+
{%- macro bigquery__get_default_partition_by() %}
6+
{% if not elementary.get_config_var("bigquery_disable_partitioning") %}
7+
{% do return(
8+
{
9+
"field": "created_at",
10+
"data_type": "timestamp",
11+
"granularity": "day",
12+
}
13+
) %}
14+
{% endif %}
15+
{% do return(none) %}
16+
{% endmacro %}
17+
18+
{% macro default__get_default_partition_by() %} {% do return(none) %} {% endmacro %}

models/edr/dbt_artifacts/dbt_invocations.sql

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
transient=False,
55
unique_key="invocation_id",
66
on_schema_change="append_new_columns",
7+
partition_by=elementary.get_default_partition_by(),
78
full_refresh=elementary.get_config_var("elementary_full_refresh"),
89
meta={
910
"timestamp_column": "created_at",

models/edr/dbt_artifacts/dbt_run_results.sql

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
if target.type == "postgres"
1414
else []
1515
),
16+
partition_by=elementary.get_default_partition_by(),
1617
full_refresh=elementary.get_config_var("elementary_full_refresh"),
1718
meta={
1819
"dedup_by_column": "model_execution_id",

0 commit comments

Comments
 (0)