@@ -177,6 +177,7 @@ def __init__(
177177 # Now that we're starting the session, don't allow the options to be
178178 # changed.
179179 context ._session_started = True
180+ self ._df_snapshot : Dict [bigquery .TableReference , datetime .datetime ] = {}
180181
181182 @property
182183 def bqclient (self ):
@@ -232,6 +233,7 @@ def read_gbq(
232233 index_col : Iterable [str ] | str = (),
233234 col_order : Iterable [str ] = (),
234235 max_results : Optional [int ] = None ,
236+ use_cache : bool = True ,
235237 # Add a verify index argument that fails if the index is not unique.
236238 ) -> dataframe .DataFrame :
237239 # TODO(b/281571214): Generate prompt to show the progress of read_gbq.
@@ -242,6 +244,7 @@ def read_gbq(
242244 col_order = col_order ,
243245 max_results = max_results ,
244246 api_name = "read_gbq" ,
247+ use_cache = use_cache ,
245248 )
246249 else :
247250 # TODO(swast): Query the snapshot table but mark it as a
@@ -253,13 +256,15 @@ def read_gbq(
253256 col_order = col_order ,
254257 max_results = max_results ,
255258 api_name = "read_gbq" ,
259+ use_cache = use_cache ,
256260 )
257261
258262 def _query_to_destination (
259263 self ,
260264 query : str ,
261265 index_cols : List [str ],
262266 api_name : str ,
267+ use_cache : bool = True ,
263268 ) -> Tuple [Optional [bigquery .TableReference ], Optional [bigquery .QueryJob ]]:
264269 # If a dry_run indicates this is not a query type job, then don't
265270 # bother trying to do a CREATE TEMP TABLE ... AS SELECT ... statement.
@@ -284,6 +289,7 @@ def _query_to_destination(
284289 job_config = bigquery .QueryJobConfig ()
285290 job_config .labels ["bigframes-api" ] = api_name
286291 job_config .destination = temp_table
292+ job_config .use_query_cache = use_cache
287293
288294 try :
289295 # Write to temp table to workaround BigQuery 10 GB query results
@@ -305,6 +311,7 @@ def read_gbq_query(
305311 index_col : Iterable [str ] | str = (),
306312 col_order : Iterable [str ] = (),
307313 max_results : Optional [int ] = None ,
314+ use_cache : bool = True ,
308315 ) -> dataframe .DataFrame :
309316 """Turn a SQL query into a DataFrame.
310317
@@ -362,6 +369,7 @@ def read_gbq_query(
362369 col_order = col_order ,
363370 max_results = max_results ,
364371 api_name = "read_gbq_query" ,
372+ use_cache = use_cache ,
365373 )
366374
367375 def _read_gbq_query (
@@ -372,14 +380,18 @@ def _read_gbq_query(
372380 col_order : Iterable [str ] = (),
373381 max_results : Optional [int ] = None ,
374382 api_name : str = "read_gbq_query" ,
383+ use_cache : bool = True ,
375384 ) -> dataframe .DataFrame :
376385 if isinstance (index_col , str ):
377386 index_cols = [index_col ]
378387 else :
379388 index_cols = list (index_col )
380389
381390 destination , query_job = self ._query_to_destination (
382- query , index_cols , api_name = api_name
391+ query ,
392+ index_cols ,
393+ api_name = api_name ,
394+ use_cache = use_cache ,
383395 )
384396
385397 # If there was no destination table, that means the query must have
@@ -403,6 +415,7 @@ def _read_gbq_query(
403415 index_col = index_cols ,
404416 col_order = col_order ,
405417 max_results = max_results ,
418+ use_cache = use_cache ,
406419 )
407420
408421 def read_gbq_table (
@@ -412,6 +425,7 @@ def read_gbq_table(
412425 index_col : Iterable [str ] | str = (),
413426 col_order : Iterable [str ] = (),
414427 max_results : Optional [int ] = None ,
428+ use_cache : bool = True ,
415429 ) -> dataframe .DataFrame :
416430 """Turn a BigQuery table into a DataFrame.
417431
@@ -434,33 +448,22 @@ def read_gbq_table(
434448 col_order = col_order ,
435449 max_results = max_results ,
436450 api_name = "read_gbq_table" ,
451+ use_cache = use_cache ,
437452 )
438453
439454 def _get_snapshot_sql_and_primary_key (
440455 self ,
441456 table_ref : bigquery .table .TableReference ,
442457 * ,
443458 api_name : str ,
459+ use_cache : bool = True ,
444460 ) -> Tuple [ibis_types .Table , Optional [Sequence [str ]]]:
445461 """Create a read-only Ibis table expression representing a table.
446462
447463 If we can get a total ordering from the table, such as via primary key
448464 column(s), then return those too so that ordering generation can be
449465 avoided.
450466 """
451- if table_ref .dataset_id .upper () == "_SESSION" :
452- # _SESSION tables aren't supported by the tables.get REST API.
453- return (
454- self .ibis_client .sql (
455- f"SELECT * FROM `_SESSION`.`{ table_ref .table_id } `"
456- ),
457- None ,
458- )
459- table_expression = self .ibis_client .table (
460- table_ref .table_id ,
461- database = f"{ table_ref .project } .{ table_ref .dataset_id } " ,
462- )
463-
464467 # If there are primary keys defined, the query engine assumes these
465468 # columns are unique, even if the constraint is not enforced. We make
466469 # the same assumption and use these columns as the total ordering keys.
@@ -481,14 +484,18 @@ def _get_snapshot_sql_and_primary_key(
481484
482485 job_config = bigquery .QueryJobConfig ()
483486 job_config .labels ["bigframes-api" ] = api_name
484- current_timestamp = list (
485- self .bqclient .query (
486- "SELECT CURRENT_TIMESTAMP() AS `current_timestamp`" ,
487- job_config = job_config ,
488- ).result ()
489- )[0 ][0 ]
487+ if use_cache and table_ref in self ._df_snapshot .keys ():
488+ snapshot_timestamp = self ._df_snapshot [table_ref ]
489+ else :
490+ snapshot_timestamp = list (
491+ self .bqclient .query (
492+ "SELECT CURRENT_TIMESTAMP() AS `current_timestamp`" ,
493+ job_config = job_config ,
494+ ).result ()
495+ )[0 ][0 ]
496+ self ._df_snapshot [table_ref ] = snapshot_timestamp
490497 table_expression = self .ibis_client .sql (
491- bigframes_io .create_snapshot_sql (table_ref , current_timestamp )
498+ bigframes_io .create_snapshot_sql (table_ref , snapshot_timestamp )
492499 )
493500 return table_expression , primary_keys
494501
@@ -500,20 +507,21 @@ def _read_gbq_table(
500507 col_order : Iterable [str ] = (),
501508 max_results : Optional [int ] = None ,
502509 api_name : str ,
510+ use_cache : bool = True ,
503511 ) -> dataframe .DataFrame :
504512 if max_results and max_results <= 0 :
505513 raise ValueError ("`max_results` should be a positive number." )
506514
507- # TODO(swast): Can we re-use the temp table from other reads in the
508- # session, if the original table wasn't modified?
509515 table_ref = bigquery .table .TableReference .from_string (
510516 query , default_project = self .bqclient .project
511517 )
512518
513519 (
514520 table_expression ,
515521 total_ordering_cols ,
516- ) = self ._get_snapshot_sql_and_primary_key (table_ref , api_name = api_name )
522+ ) = self ._get_snapshot_sql_and_primary_key (
523+ table_ref , api_name = api_name , use_cache = use_cache
524+ )
517525
518526 for key in col_order :
519527 if key not in table_expression .columns :
0 commit comments