|
40 | 40 | import bigframes.session.metrics |
41 | 41 | import bigframes.session.planner |
42 | 42 | import bigframes.session.temporary_storage |
43 | | -from bigframes.core import compile, guid, local_data, rewrite |
| 43 | +from bigframes.core import bq_data, compile, guid, local_data, rewrite |
44 | 44 | from bigframes.core.compile.sqlglot import sql as sg_sql |
45 | 45 | from bigframes.core.compile.sqlglot import sqlglot_ir |
46 | 46 | from bigframes.session import ( |
@@ -176,59 +176,61 @@ def _execute_bigquery( |
176 | 176 | ) -> executor.ExecuteResult: |
177 | 177 | dest_spec = execution_spec.destination_spec |
178 | 178 | # Recursive handlers for different cases, maybe extract to explicit interface. |
| 179 | + if ( |
| 180 | + isinstance(dest_spec, ex_spec.EphemeralTableSpec) |
| 181 | + and not execution_spec.promise_under_10gb |
| 182 | + ): |
| 183 | + # Results over 10GB need to explicitly allocate a table. |
| 184 | + execution_spec = dataclasses.replace( |
| 185 | + execution_spec, destination_spec=ex_spec.SessionTableSpec() |
| 186 | + ) |
| 187 | + return self._execute_bigquery(array_value, execution_spec) |
179 | 188 | if isinstance(dest_spec, ex_spec.GcsOutputSpec): |
180 | 189 | execution_spec = dataclasses.replace( |
181 | | - execution_spec, |
182 | | - destination_spec=ex_spec.TempTableSpec( |
183 | | - cluster_cols=dest_spec.cluster_cols, lifetime="ephemeral" |
184 | | - ), |
| 190 | + execution_spec, destination_spec=ex_spec.EphemeralTableSpec() |
185 | 191 | ) |
186 | 192 | results = self._execute_bigquery(array_value, execution_spec) |
187 | 193 | self._export_result_gcs(results, dest_spec) |
188 | 194 | return results |
189 | | - if isinstance(dest_spec, ex_spec.TableOutputSpec): |
| 195 | + if isinstance(dest_spec, ex_spec.TableOutputSpec) and dest_spec.permit_dml: |
190 | 196 | # Special DML path - maybe this should be configurable, dml vs query destination has tradeoffs |
191 | 197 | existing_table = self._maybe_find_existing_table(dest_spec) |
192 | | - if execution_spec.ordered: |
193 | | - raise ValueError("Ordering not supported with table outputs") |
194 | 198 | if (existing_table is not None) and _is_schema_match( |
195 | 199 | existing_table.schema, array_value.schema |
196 | 200 | ): |
197 | 201 | execution_spec = dataclasses.replace( |
198 | | - execution_spec, |
199 | | - destination_spec=ex_spec.TempTableSpec( |
200 | | - cluster_cols=execution_spec.destination_spec.cluster_cols, |
201 | | - lifetime="ephemeral", |
202 | | - ), |
| 202 | + execution_spec, destination_spec=ex_spec.EphemeralTableSpec() |
203 | 203 | ) |
204 | 204 | results = self._execute_bigquery(array_value, execution_spec) |
205 | 205 | self._export_gbq_with_dml(results, dest_spec) |
206 | 206 | return results |
207 | | - if isinstance(dest_spec, ex_spec.TempTableSpec): |
| 207 | + if isinstance(dest_spec, ex_spec.SessionTableSpec): |
208 | 208 | # "ephemeral" temp tables created in the course of exeuction, don't need to be allocated |
209 | 209 | # materialized ordering only really makes sense for internal temp tables used by caching |
210 | 210 | cluster_cols = dest_spec.cluster_cols |
| 211 | + # Rewrite plan to materialize ordering as extra columns |
211 | 212 | plan = array_value.node |
212 | 213 | if dest_spec.ordering == "offsets_col": |
213 | 214 | order_col_id = guid.generate_guid() |
214 | 215 | plan = nodes.PromoteOffsetsNode(plan, order_col_id) |
215 | 216 | cluster_cols = [order_col_id] |
216 | 217 | elif dest_spec.ordering == "order_key": |
217 | | - plan = nodes.defer_order(plan, output_hidden_row_keys=True) |
218 | | - if dest_spec.lifetime == "session": |
219 | | - destination_table = self.storage_manager.create_temp_table( |
220 | | - plan.schema, cluster_cols |
221 | | - ) |
222 | | - arr_value = bigframes.core.ArrayValue(plan) |
223 | | - execution_spec = dataclasses.replace( |
224 | | - execution_spec, |
225 | | - destination_spec=ex_spec.TableOutputSpec( |
226 | | - table=destination_table, |
227 | | - cluster_cols=dest_spec.cluster_cols, |
228 | | - if_exists="replace", |
229 | | - ), |
230 | | - ) |
231 | | - return self._execute_bigquery(arr_value, execution_spec) |
| 218 | + plan, _ = rewrite.pull_out_order(plan) |
| 219 | + destination_table = self.storage_manager.create_temp_table( |
| 220 | + plan.schema.to_bigquery(), cluster_cols |
| 221 | + ) |
| 222 | + arr_value = bigframes.core.ArrayValue(plan) |
| 223 | + execution_spec = dataclasses.replace( |
| 224 | + execution_spec, |
| 225 | + destination_spec=ex_spec.TableOutputSpec( |
| 226 | + table=destination_table, |
| 227 | + cluster_cols=dest_spec.cluster_cols, |
| 228 | + if_exists="replace", |
| 229 | + # Avoid loops, also dml is mostly used to avoid quotas on user-owned tables |
| 230 | + permit_dml=False, |
| 231 | + ), |
| 232 | + ) |
| 233 | + return self._execute_bigquery(arr_value, execution_spec) |
232 | 234 |
|
233 | 235 | # At this point, dst should be unspecified, a specific bq table, or an ephemeral temp table |
234 | 236 | # Also, ordering mode will either be none or row-sorted |
@@ -405,32 +407,28 @@ def _cache_with_cluster_cols( |
405 | 407 | ] |
406 | 408 | cluster_cols = cluster_cols[:_MAX_CLUSTER_COLUMNS] |
407 | 409 | execution_spec = ex_spec.ExecutionSpec( |
408 | | - destination_spec=ex_spec.TempTableSpec( |
409 | | - cluster_cols=tuple(cluster_cols), |
410 | | - lifetime="session", |
411 | | - ordering="order_key", |
412 | | - ) |
| 410 | + destination_spec=ex_spec.SessionTableSpec(cluster_cols=tuple(cluster_cols)) |
413 | 411 | ) |
414 | | - result_bq_data = self.execute( |
| 412 | + result = self.execute( |
415 | 413 | array_value, |
416 | 414 | execution_spec=execution_spec, |
417 | 415 | ) |
418 | | - assert isinstance(result_bq_data, bigframes.core.BigqueryDataSource) |
419 | | - self.cache.cache_results_table(array_value.node, result_bq_data) |
| 416 | + assert isinstance(result, executor.BQTableExecuteResult) |
| 417 | + self.cache.cache_results_table(array_value.node, result._data) |
420 | 418 |
|
421 | 419 | def _cache_with_offsets(self, array_value: bigframes.core.ArrayValue): |
422 | 420 | """Executes the query and uses the resulting table to rewrite future executions.""" |
423 | 421 | execution_spec = ex_spec.ExecutionSpec( |
424 | | - destination_spec=ex_spec.TempTableSpec( |
425 | | - cluster_cols=(), lifetime="session", ordering="offsets_col" |
| 422 | + destination_spec=ex_spec.SessionTableSpec( |
| 423 | + cluster_cols=(), ordering="offsets_col" |
426 | 424 | ) |
427 | 425 | ) |
428 | | - result_bq_data = self.execute( |
| 426 | + result = self.execute( |
429 | 427 | array_value, |
430 | 428 | execution_spec=execution_spec, |
431 | 429 | ) |
432 | | - assert isinstance(result_bq_data, bigframes.core.BigqueryDataSource) |
433 | | - self.cache.cache_results_table(array_value.node, result_bq_data) |
| 430 | + assert isinstance(result, executor.BQTableExecuteResult) |
| 431 | + self.cache.cache_results_table(array_value.node, result._data) |
434 | 432 |
|
435 | 433 | def _cache_with_session_awareness( |
436 | 434 | self, |
|
0 commit comments