Skip to content

Commit 327a653

Browse files
committed
Remove archeology comments.
1 parent 1913208 commit 327a653

3 files changed

Lines changed: 12 additions & 14 deletions

File tree

native/core/src/execution/operators/schema_align.rs

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,11 @@
1616
// under the License.
1717

1818
//! `SchemaAlignExec` reshapes its child's output so the per-column Arrow type and field-level
19-
//! nullability match what Spark catalyst declared. Used between an inlined native subtree and
20-
//! `ShuffleWriterExec` when the FFI deep-copy + `ScanExec` cast in `build_record_batch` are both
21-
//! gone, so DataFusion / `datafusion-spark` return-type drift would otherwise be written into
22-
//! shuffle blocks. See <https://github.com/apache/datafusion-comet/issues/4515> for the running
23-
//! list of mismatched functions.
19+
//! nullability match what Spark catalyst declared, casting where necessary. Sits between a native
20+
//! subtree and `ShuffleWriterExec` so DataFusion / `datafusion-spark` return-type drift is caught
21+
//! before it reaches shuffle blocks. See
22+
//! <https://github.com/apache/datafusion-comet/issues/4515> for the running list of mismatched
23+
//! functions.
2424
2525
use arrow::array::{ArrayRef, RecordBatch, RecordBatchOptions};
2626
use arrow::compute::{cast_with_options, CastOptions};
@@ -53,6 +53,9 @@ fn warn_dedup() -> &'static Mutex<HashSet<String>> {
5353
SET.get_or_init(|| Mutex::new(HashSet::new()))
5454
}
5555

56+
/// Casts each column of `child`'s output to the data_type Spark catalyst declared, widening
57+
/// nullability to `actual.nullable || expected.nullable`. See
58+
/// <https://github.com/apache/datafusion-comet/issues/4515>.
5659
#[derive(Debug)]
5760
pub struct SchemaAlignExec {
5861
child: Arc<dyn ExecutionPlan>,
@@ -74,8 +77,7 @@ impl SchemaAlignExec {
7477
/// Build a SchemaAlignExec that aligns `child`'s output to `expected`. Returns
7578
/// `Ok(child)` unchanged when no per-column reshape is needed; otherwise wraps `child`
7679
/// in a SchemaAlignExec whose target schema preserves `expected`'s data_type and metadata
77-
/// but widens nullability to `actual.nullable || expected.nullable` (matching the
78-
/// reconciliation rule used at the FFI boundary on `main`).
80+
/// but widens nullability to `actual.nullable || expected.nullable`.
7981
pub fn try_new_or_passthrough(
8082
child: Arc<dyn ExecutionPlan>,
8183
expected: &SchemaRef,

native/core/src/execution/planner.rs

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1164,9 +1164,6 @@ impl PhysicalPlanner {
11641164
)?,
11651165
);
11661166

1167-
// HashAggregate emits its natural shape (group keys + agg results); any
1168-
// post-aggregate projection is serialized as an explicit `OpStruct::Projection`
1169-
// op above by the JVM serializer (see `CometBaseAggregate.doConvert`)
11701167
Ok((
11711168
scans,
11721169
shuffle_scans,

spark/src/main/scala/org/apache/spark/sql/comet/execution/shuffle/CometShuffleExchangeExec.scala

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@ case class CometShuffleExchangeExec(
124124
ctx.shuffleScanIndices)
125125
case None =>
126126
// Non-native child (e.g. CometSparkToColumnarExec): no subtree to inline. The dep gets
127-
// built via the legacy convenience overload below; we just need a real RDD of batches.
127+
// built via the convenience overload below; we just need a real RDD of batches.
128128
child.executeColumnar()
129129
}
130130
} else if (shuffleType == CometColumnarShuffle) {
@@ -676,9 +676,8 @@ object CometShuffleExchangeExec
676676
* Implemented as a thin wrapper around [[prepareNativeShuffleDependency]]: synthesizes a
677677
* `Scan("ShuffleWriterInput")` as the child native op (so the writer's plan is still
678678
* `ShuffleWriter -> Scan`, consuming JVM batches via Arrow C Stream), wraps `rdd` as the single
679-
* leaf input of a thin scheduling RDD, and supplies a minimal [[NativeExecContext]]. Same wire
680-
* shape as before; one writer code path for both this case and the [[CometShuffleExchangeExec]]
681-
* case.
679+
* leaf input of a thin scheduling RDD, and supplies a minimal [[NativeExecContext]]. Lets the
680+
* writer use one code path for both this case and the [[CometShuffleExchangeExec]] case.
682681
*/
683682
def prepareShuffleDependency(
684683
rdd: RDD[ColumnarBatch],

0 commit comments

Comments
 (0)