@@ -350,16 +350,38 @@ def read_managed_data(
350350 session = self ._session ,
351351 )
352352
353+ def load_data_or_write_data (
354+ self ,
355+ data : local_data .ManagedArrowTable ,
356+ offsets_col : str ,
357+ ) -> bq_data .BigqueryDataSource :
358+ """Write local data into BigQuery using the local API if possible,
359+ otherwise use the write API."""
360+ can_load = all (
361+ _is_dtype_can_load (item .column , item .dtype ) for item in data .schema .items
362+ )
363+ if can_load :
364+ return self .load_data (data , offsets_col = offsets_col )
365+ else :
366+ return self .write_data (data , offsets_col = offsets_col )
367+
353368 def load_data (
354369 self ,
355370 data : local_data .ManagedArrowTable ,
356371 offsets_col : str ,
357372 ) -> bq_data .BigqueryDataSource :
358373 """Load managed data into bigquery"""
359-
360- # JSON support incomplete
361- for item in data .schema .items :
362- _validate_dtype_can_load (item .column , item .dtype )
374+ cannot_load_columns = {
375+ item .column : item .dtype
376+ for item in data .schema .items
377+ if not _is_dtype_can_load (item .column , item .dtype )
378+ }
379+
380+ if cannot_load_columns :
381+ raise NotImplementedError (
382+ f"Nested JSON types are currently unsupported for BigQuery Load API. "
383+ f"Unsupported columns: { cannot_load_columns } . { constants .FEEDBACK_LINK } "
384+ )
363385
364386 schema_w_offsets = data .schema .append (
365387 schemata .SchemaItem (offsets_col , bigframes .dtypes .INT_DTYPE )
@@ -1475,31 +1497,27 @@ def _transform_read_gbq_configuration(configuration: Optional[dict]) -> dict:
14751497 return configuration
14761498
14771499
1478- def _validate_dtype_can_load (name : str , column_type : bigframes .dtypes .Dtype ):
1500+ def _is_dtype_can_load (name : str , column_type : bigframes .dtypes .Dtype ) -> bool :
14791501 """
14801502 Determines whether a datatype is supported by bq load jobs.
14811503
14821504 Due to a BigQuery IO limitation with loading JSON from Parquet files (b/374784249),
14831505 we're using a workaround: storing JSON as strings and then parsing them into JSON
14841506 objects.
14851507 TODO(b/395912450): Remove workaround solution once b/374784249 got resolved.
1486-
1487- Raises:
1488- NotImplementedError: Type is not yet supported by load jobs.
14891508 """
14901509 # we can handle top-level json, but not nested yet through string conversion
14911510 if column_type == bigframes .dtypes .JSON_DTYPE :
1492- return
1511+ return True
14931512
14941513 if isinstance (
14951514 column_type , pandas .ArrowDtype
14961515 ) and bigframes .dtypes .contains_db_dtypes_json_arrow_type (
14971516 column_type .pyarrow_dtype
14981517 ):
1499- raise NotImplementedError (
1500- f"Nested JSON types, found in column `{ name } `: `{ column_type } `', "
1501- f"are currently unsupported for upload. { constants .FEEDBACK_LINK } "
1502- )
1518+ return False
1519+
1520+ return True
15031521
15041522
15051523# itertools.batched not available in python <3.12, so we use this instead
0 commit comments