Skip to content

Commit efe4e94

Browse files
authored
extend to yaml (#38371)
1 parent 16609ed commit efe4e94

1 file changed

Lines changed: 8 additions & 1 deletion

File tree

sdks/python/apache_beam/yaml/yaml_io.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -563,6 +563,7 @@ def write_to_iceberg(
563563
drop: Optional[Iterable[str]] = None,
564564
only: Optional[str] = None,
565565
distribution_mode: Optional[str] = None,
566+
autosharding: Optional[bool] = None,
566567
):
567568
# TODO(robertwb): It'd be nice to derive this list of parameters, along with
568569
# their types and docs, programmatically from the iceberg (or managed)
@@ -616,6 +617,11 @@ def write_to_iceberg(
616617
distributions:
617618
- none: don't shuffle rows (default)
618619
- hash: shuffle rows by partition key before writing data
620+
autosharding: Enables dynamic sharding to automatically adjust the number
621+
of parallel writers based on data volume. It handles data skew by
622+
further sub-dividing partitions into multiple shards to prevent
623+
bottlenecks during high-throughput writes. Only available with 'hash'
624+
distribution mode.
619625
"""
620626
return beam.managed.Write(
621627
"iceberg",
@@ -630,7 +636,8 @@ def write_to_iceberg(
630636
keep=keep,
631637
drop=drop,
632638
only=only,
633-
distribution_mode=distribution_mode))
639+
distribution_mode=distribution_mode,
640+
autosharding=autosharding))
634641

635642

636643
def io_providers():

0 commit comments

Comments
 (0)