PR feedback

AnishMahto · AnishMahto · commit b71ec8e61303 · 2026-05-21T21:52:28.000Z
diff --git a/sql/pipelines/src/main/scala/org/apache/spark/sql/pipelines/autocdc/Scd1BatchProcessor.scala b/sql/pipelines/src/main/scala/org/apache/spark/sql/pipelines/autocdc/Scd1BatchProcessor.scala
@@ -127,12 +127,9 @@ case class Scd1BatchProcessor(
     val caseSensitiveColumnComparison =
       microbatchWithCdcMetadataDf.sparkSession.sessionState.conf.caseSensitiveAnalysis
 
-    // Calculate the schema of the microbatch less the system-projected CDC metadata column, i.e.
-    // the The user schema is the microbatch's schema after dropping the system columns - i.e the
-    // CDC metadata column.
-
-    // We project out the system columns before applying user selection and project back in
-    // afterwards, so that users cannot control whether these [necessary] columns show up in the
+    // The user schema is the microbatch schema after dropping the system CDC metadata column.
+    // We project out the system column before applying user selection and project it back in
+    // afterwards, so that users cannot control whether this [necessary] column shows up in the
     // target table.
     val userColumnsInMicrobatchSchema = ColumnSelection.applyToSchema(
       schemaName = "microbatch",
diff --git a/sql/pipelines/src/test/scala/org/apache/spark/sql/pipelines/autocdc/Scd1BatchProcessorSuite.scala b/sql/pipelines/src/test/scala/org/apache/spark/sql/pipelines/autocdc/Scd1BatchProcessorSuite.scala
@@ -897,4 +897,39 @@ class Scd1BatchProcessorSuite extends QueryTest with SharedSparkSession {
       expectedAnswer = Row(1, "u-100", Row(null, 10L))
     )
   }
+
+  test("projectTargetColumnsOntoMicrobatch resolves columnSelection case-insensitively " +
+    "when SQLConf.CASE_SENSITIVE=false") {
+    withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") {
+      val batch = microbatchOf(microbatchWithCdcMetadataSchema)(
+        Row(1, "alice", 30, Row(null, 10L))
+      )
+
+      val processor = Scd1BatchProcessor(
+        changeArgs = ChangeArgs(
+          keys = Seq(UnqualifiedColumnName("id")),
+          sequencing = F.col("seq"),
+          storedAsScdType = ScdType.Type1,
+          // User columns intentionally use a different case than the schema (id, age).
+          columnSelection = Some(
+            ColumnSelection.IncludeColumns(
+              Seq(UnqualifiedColumnName("ID"), UnqualifiedColumnName("AGE"))
+            )
+          )
+        ),
+        resolvedSequencingType = LongType
+      )
+
+      val result = processor.projectTargetColumnsOntoMicrobatch(batch)
+
+      // Output column names follow the microbatch schema's casing, not the casing in the user's
+      // columnSelection. The CDC metadata column is appended last as always.
+      assert(result.schema.fieldNames.toSeq ==
+        Seq("id", "age", Scd1BatchProcessor.cdcMetadataColName))
+      checkAnswer(
+        df = result,
+        expectedAnswer = Row(1, 30, Row(null, 10L))
+      )
+    }
+  }
 }