|
| 1 | +{# |
| 2 | + Override dbt-dremio's seed materialization to support large seeds via batched inserts. |
| 3 | + Dremio's REST API rejects SQL that is too large or complex, so we split the VALUES |
| 4 | + clause into batches of BATCH_SIZE rows and issue separate INSERT INTO statements. |
| 5 | +#} |
| 6 | + |
| 7 | +{% macro dremio__select_csv_rows_batch(model, agate_table, start_idx, end_idx) %} |
| 8 | +{%- set column_override = model['config'].get('column_types', {}) -%} |
| 9 | +{%- set quote_seed_column = model['config'].get('quote_columns', None) -%} |
| 10 | +{%- set cols_sql = get_seed_column_quoted_csv(model, agate_table.column_names) -%} |
| 11 | + select |
| 12 | + {% for col_name in agate_table.column_names -%} |
| 13 | + {%- set inferred_type = adapter.convert_type(agate_table, loop.index0) -%} |
| 14 | + {%- set type = column_override.get(col_name, inferred_type) -%} |
| 15 | + {%- set column_name = (col_name | string) -%} |
| 16 | + cast({{ adapter.quote_seed_column(column_name, quote_seed_column) }} as {{ type }}) |
| 17 | + as {{ adapter.quote_seed_column(column_name, quote_seed_column) }}{%- if not loop.last -%}, {%- endif -%} |
| 18 | + {% endfor %} |
| 19 | + from |
| 20 | + (values |
| 21 | + {% for idx in range(start_idx, end_idx) %} |
| 22 | + {%- set row = agate_table.rows[idx] -%} |
| 23 | + ({%- for value in row -%} |
| 24 | + {% if value is not none %} |
| 25 | + {{ "'" ~ (value | string | replace("'", "''")) ~ "'" }} |
| 26 | + {% else %} |
| 27 | + cast(null as varchar) |
| 28 | + {% endif %} |
| 29 | + {%- if not loop.last%},{%- endif %} |
| 30 | + {%- endfor -%}) |
| 31 | + {%- if not loop.last%},{%- endif %} |
| 32 | + {% endfor %}) temp_table ( {{ cols_sql }} ) |
| 33 | +{% endmacro %} |
| 34 | + |
| 35 | + |
| 36 | +{% materialization seed, adapter = 'dremio' %} |
| 37 | + |
| 38 | + {%- set identifier = model['alias'] -%} |
| 39 | + {%- set format = config.get('format', validator=validation.any[basestring]) or 'iceberg' -%} |
| 40 | + {%- set old_relation = adapter.get_relation(database=database, schema=schema, identifier=identifier) -%} |
| 41 | + {%- set target_relation = this.incorporate(type='table') -%} |
| 42 | + {% set grant_config = config.get('grants') %} |
| 43 | + |
| 44 | + {{ run_hooks(pre_hooks) }} |
| 45 | + |
| 46 | + {% if old_relation is not none -%} |
| 47 | + {{ adapter.drop_relation(old_relation) }} |
| 48 | + {%- endif %} |
| 49 | + |
| 50 | + {%- set agate_table = load_agate_table() -%} |
| 51 | + {%- do store_result('agate_table', response='OK', agate_table=agate_table) -%} |
| 52 | + {%- set num_rows = (agate_table.rows | length) -%} |
| 53 | + |
| 54 | + {# Batch size: keep each SQL statement small enough for Dremio's REST API #} |
| 55 | + {%- set batch_size = 500 -%} |
| 56 | + {%- set first_end = [batch_size, num_rows] | min -%} |
| 57 | + |
| 58 | + {# Create table with first batch #} |
| 59 | + {%- set first_batch_sql = dremio__select_csv_rows_batch(model, agate_table, 0, first_end) -%} |
| 60 | + {% call statement('effective_main') -%} |
| 61 | + {{ create_table_as(False, target_relation, first_batch_sql) }} |
| 62 | + {%- endcall %} |
| 63 | + |
| 64 | + {# Insert remaining batches #} |
| 65 | + {% for batch_start in range(batch_size, num_rows, batch_size) %} |
| 66 | + {%- set batch_end = [batch_start + batch_size, num_rows] | min -%} |
| 67 | + {%- set batch_sql = dremio__select_csv_rows_batch(model, agate_table, batch_start, batch_end) -%} |
| 68 | + {% call statement('insert_batch_' ~ batch_start) -%} |
| 69 | + INSERT INTO {{ target_relation }} |
| 70 | + {{ batch_sql }} |
| 71 | + {%- endcall %} |
| 72 | + {% endfor %} |
| 73 | + |
| 74 | + {% call noop_statement('main', 'CREATE ' ~ num_rows, 'CREATE', num_rows) %} |
| 75 | + -- batched seed insert ({{ num_rows }} rows in {{ (num_rows / batch_size) | round(0, 'ceil') | int }} batches) |
| 76 | + {% endcall %} |
| 77 | + |
| 78 | + {{ refresh_metadata(target_relation, format) }} |
| 79 | + |
| 80 | + {{ apply_twin_strategy(target_relation) }} |
| 81 | + |
| 82 | + {% do persist_docs(target_relation, model) %} |
| 83 | + |
| 84 | + {% do apply_grants(target_relation, grant_config, should_revoke=should_revoke) %} |
| 85 | + |
| 86 | + {{ run_hooks(post_hooks) }} |
| 87 | + |
| 88 | + {{ return({'relations': [target_relation]}) }} |
| 89 | + |
| 90 | +{% endmaterialization %} |
0 commit comments