DataDog
diff --git a/‎crates/integrations/datafusion/src/table/bucketing.rs‎
Lines changed: 146 additions & 16 deletions b/‎crates/integrations/datafusion/src/table/bucketing.rs‎
Lines changed: 146 additions & 16 deletions
@@ -23,6 +23,8 @@ use datafusion::arrow::array::{
 };
 use datafusion::arrow::datatypes::{DataType, Schema as ArrowSchema};
 use datafusion::common::hash_utils::create_hashes;
+use datafusion::physical_expr::PhysicalExpr;
+use datafusion::physical_expr::expressions::Column;
 use datafusion::physical_plan::repartition::REPARTITION_RANDOM_STATE;
 use iceberg::scan::FileScanTask;
 use iceberg::spec::{Literal, PrimitiveLiteral, Transform};
@@ -96,27 +98,128 @@ fn is_supported_dtype(dt: &DataType) -> bool {
     )
 }
 
-/// Distribute `tasks` across `n_partitions` buckets. When `identity_cols`
-/// describes a non-empty, hashable identity key, each task is hashed on
-/// that key using DataFusion's repartition hash so the resulting partitioning
-/// matches what `RepartitionExec` would produce on the same data. Tasks
-/// missing partition data fall back to hashing `data_file_path`, which still
-/// distributes evenly but breaks the `Hash` contract — the second tuple
-/// element flags whether every task supplied a full identity key.
+/// Spec field with `Transform::Bucket(_)`. The source column must be in the
+/// output projection so we can reference it via `Column` in `Partitioning::Hash`.
+/// We don't need the Arrow type because the partition tuple slot for a bucket
+/// transform is always `Int32` (the spec-defined `result_type`).
+pub(super) struct BucketCol {
+    pub(super) name: String,
+    /// Position of this column in the *output* schema (after projection).
+    pub(super) output_idx: usize,
+    /// Position of this column inside the partition spec's `fields()` slice,
+    /// matching the slot order of `FileScanTask::partition`.
+    pub(super) spec_field_idx: usize,
+}
+
+/// Inspect the table's default partition spec and return the list of bucket
+/// columns when the spec is *purely* bucketed: every field must be a
+/// `Transform::Bucket(_)` and every source column must be present in the
+/// output projection. Returns `None` otherwise (mixed transforms, spec
+/// evolution, missing source column, or empty spec).
+///
+/// This deliberately rejects mixed identity+bucket specs: those are handled
+/// by [`compute_identity_cols`] which retains only the identity fields.
+pub(super) fn compute_bucket_cols(
+    table: &Table,
+    output_schema: &ArrowSchema,
+) -> Option<Vec<BucketCol>> {
+    let metadata = table.metadata();
+    if metadata.partition_specs_iter().len() > 1 {
+        return None;
+    }
+    let spec = metadata.default_partition_spec();
+    let fields = spec.fields();
+    if fields.is_empty() {
+        return None;
+    }
+    let table_schema = metadata.current_schema();
+
+    let mut cols = Vec::with_capacity(fields.len());
+    for (spec_field_idx, pf) in fields.iter().enumerate() {
+        if !matches!(pf.transform, Transform::Bucket(_)) {
+            return None;
+        }
+        let source_field = table_schema.field_by_id(pf.source_id)?;
+        let output_idx = output_schema.index_of(source_field.name.as_str()).ok()?;
+        cols.push(BucketCol {
+            name: source_field.name.clone(),
+            output_idx,
+            spec_field_idx,
+        });
+    }
+    Some(cols)
+}
+
+/// Single-entry partition-key descriptor used by [`bucket_tasks`] and
+/// `IcebergTableProvider::scan` to drive both task distribution and the
+/// `Partitioning::Hash` declaration.
+pub(super) enum PartitionKeys {
+    Identity(Vec<IdentityCol>),
+    Bucket(Vec<BucketCol>),
+}
+
+impl PartitionKeys {
+    /// `Column` exprs (one per key column) referencing the *output* schema,
+    /// suitable for `Partitioning::Hash`.
+    pub(super) fn column_exprs(&self) -> Vec<Arc<dyn PhysicalExpr>> {
+        match self {
+            PartitionKeys::Identity(cols) => cols
+                .iter()
+                .map(|c| Arc::new(Column::new(&c.name, c.output_idx)) as Arc<dyn PhysicalExpr>)
+                .collect(),
+            PartitionKeys::Bucket(cols) => cols
+                .iter()
+                .map(|c| Arc::new(Column::new(&c.name, c.output_idx)) as Arc<dyn PhysicalExpr>)
+                .collect(),
+        }
+    }
+}
+
+/// Try identity detection first (preserves the existing behaviour, including
+/// extracting identity-only keys from mixed identity+bucket specs). If no
+/// identity columns exist, fall back to *pure* bucket detection.
+///
+/// Why declaring `Hash` is correct for a pure-bucket spec even though the
+/// hash function differs from DataFusion's: DataFusion checks
+/// `Partitioning::Hash` against `Distribution::HashPartitioned` purely by
+/// expression equality, not by the underlying hash function. The contract to
+/// honour is "rows with the same key tuple end up in the same partition",
+/// which Iceberg `bucket[N]` already guarantees at the file level (same
+/// source value implies same bucket index, hence same files), and our
+/// task-distribution preserves at the partition level by sending each
+/// unique bucket index to a single DataFusion partition.
+pub(super) fn compute_partition_keys(
+    table: &Table,
+    output_schema: &ArrowSchema,
+) -> Option<PartitionKeys> {
+    if let Some(cols) = compute_identity_cols(table, output_schema)
+        && !cols.is_empty()
+    {
+        return Some(PartitionKeys::Identity(cols));
+    }
+    compute_bucket_cols(table, output_schema).map(PartitionKeys::Bucket)
+}
+
+/// Distribute `tasks` across `n_partitions` buckets. When `keys` describes a
+/// non-empty, hashable partition key (identity or bucket-index), each task is
+/// hashed on that key using DataFusion's repartition random state so the
+/// resulting partitioning satisfies the `Hash` contract at the row level.
+/// Tasks missing partition data fall back to hashing `data_file_path`, which
+/// still distributes evenly but breaks the `Hash` contract: the second tuple
+/// element flags whether every task supplied a full key.
 pub(super) fn bucket_tasks(
     tasks: Vec<FileScanTask>,
     n_partitions: usize,
-    identity_cols: Option<&[IdentityCol]>,
+    keys: Option<&PartitionKeys>,
 ) -> (Vec<Vec<FileScanTask>>, bool) {
     if n_partitions == 0 {
         return (Vec::new(), tasks.is_empty());
     }
     let mut buckets: Vec<Vec<FileScanTask>> = (0..n_partitions).map(|_| Vec::new()).collect();
     let mut all_full_key = true;
-    let cols = identity_cols.unwrap_or(&[]);
 
     for task in tasks {
-        let bucket_idx = match identity_hash(&task, cols) {
+        let bucket_idx = match partition_hash(&task, keys) {
             Some(h) => (h % n_partitions as u64) as usize,
             None => {
                 all_full_key = false;
@@ -128,6 +231,13 @@ pub(super) fn bucket_tasks(
     (buckets, all_full_key)
 }
 
+fn partition_hash(task: &FileScanTask, keys: Option<&PartitionKeys>) -> Option<u64> {
+    match keys? {
+        PartitionKeys::Identity(cols) => identity_hash(task, cols),
+        PartitionKeys::Bucket(cols) => bucket_hash(task, cols),
+    }
+}
+
 /// Hash the identity-partition values of `task` using
 /// [`REPARTITION_RANDOM_STATE`] so the bucket assignment matches DataFusion's
 /// hash-repartition convention. Returns `None` if the task lacks partition
@@ -142,13 +252,33 @@ fn identity_hash(task: &FileScanTask, cols: &[IdentityCol]) -> Option<u64> {
         let lit = partition.fields().get(col.spec_field_idx)?.as_ref()?;
         arrays.push(literal_to_array(lit, &col.output_dtype)?);
     }
+    hash_arrays(&arrays)
+}
+
+/// Hash the bucket-index values stored in `task`'s partition tuple. The slot
+/// for a `Transform::Bucket(_)` field is always an `Int32` per the Iceberg
+/// spec, so we materialise it as `Int32Array` regardless of the source
+/// column's Arrow type.
+fn bucket_hash(task: &FileScanTask, cols: &[BucketCol]) -> Option<u64> {
+    if cols.is_empty() {
+        return None;
+    }
+    let partition = task.partition.as_ref()?;
+    let mut arrays: Vec<ArrayRef> = Vec::with_capacity(cols.len());
+    for col in cols {
+        let lit = partition.fields().get(col.spec_field_idx)?.as_ref()?;
+        let idx = match lit {
+            Literal::Primitive(PrimitiveLiteral::Int(v)) => *v,
+            _ => return None,
+        };
+        arrays.push(Arc::new(Int32Array::from(vec![idx])) as ArrayRef);
+    }
+    hash_arrays(&arrays)
+}
+
+fn hash_arrays(arrays: &[ArrayRef]) -> Option<u64> {
     let mut hashes = vec![0u64; 1];
-    create_hashes(
-        &arrays,
-        REPARTITION_RANDOM_STATE.random_state(),
-        &mut hashes,
-    )
-    .ok()?;
+    create_hashes(arrays, REPARTITION_RANDOM_STATE.random_state(), &mut hashes).ok()?;
     Some(hashes[0])
 }