pydantic
diff --git a/‎datafusion/datasource-parquet/src/opener.rs‎
Lines changed: 8 additions & 4 deletions b/‎datafusion/datasource-parquet/src/opener.rs‎
Lines changed: 8 additions & 4 deletions
@@ -896,17 +896,21 @@ impl FiltersPreparedParquetOpen {
         // Apply optional row-group and row-range sampling now that we
         // know the actual row-group count. Both calls are no-ops when
         // their respective fraction is `None`. Selection is
-        // deterministic per `(file_name, row_group_index, fraction,
-        // cluster_size)` so re-runs match.
+        // deterministic per `(partition_index, row_group_index,
+        // fraction, cluster_size)` so re-runs match. The execution
+        // `partition_index` is the stable per-file id we plumb in:
+        // it makes sampling reproducible across environments without
+        // depending on object-store paths, and decorrelates files
+        // assigned to different partitions.
         prepared.sampling.apply_row_group_sampling(
             &mut initial_plan,
             rg_metadata.len(),
-            &prepared.file_name,
+            prepared.partition_index,
         );
         prepared.sampling.apply_row_fraction_sampling(
             &mut initial_plan,
             rg_metadata,
-            &prepared.file_name,
+            prepared.partition_index,
         );
 
         let mut row_groups = RowGroupAccessPlanFilter::new(initial_plan);