Skip to content

Commit 8560cca

Browse files
fix(operator): guard against NoSuchElementException in ParallelCSVScanSourceOpDesc.getPhysicalOp (#4375)
### What changes were proposed in this PR? `ParallelCSVScanSourceOpDesc.getPhysicalOp` called `customDelimiter.get` without first checking whether the `Option` is defined. When `customDelimiter` is `None` (the field's default), this throws a `NoSuchElementException` before the fallback comma delimiter can be applied. **Before:** ```scala if (customDelimiter.get.isEmpty) { // throws NoSuchElementException when None customDelimiter = Option(",") } ``` **After:** ```scala if (customDelimiter.isEmpty || customDelimiter.get.isEmpty) { customDelimiter = Option(",") } ``` This brings the parallel variant in line with `CSVScanSourceOpDesc`, which has always used the correct two-part guard. ### Any related issues, documentation, discussions? Closes #4374 ### How was this PR tested? Two new test cases were added to `CSVScanSourceOpDescSpec`: 1. `"use comma as the default delimiter when customDelimiter is not set for parallel CSV"` — verifies that `getPhysicalOp` does not throw when `customDelimiter` is `None` and that the default `,` is applied. 2. `"use comma as the default delimiter when customDelimiter is empty string for parallel CSV"` — same verification for `Some("")`. The existing parallel-CSV schema-inference tests continue to pass unchanged. ### Was this PR authored or co-authored using generative AI tooling? No. --------- Signed-off-by: Asish Kumar <officialasishkumar@gmail.com>
1 parent ec02509 commit 8560cca

3 files changed

Lines changed: 34 additions & 2 deletions

File tree

common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/source/scan/csv/CSVScanSourceOpDesc.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ class CSVScanSourceOpDesc extends ScanSourceOpDesc {
5555
executionId: ExecutionIdentity
5656
): PhysicalOp = {
5757
// fill in default values
58-
if (customDelimiter.isEmpty || customDelimiter.get.isEmpty) {
58+
if (customDelimiter.forall(_.isEmpty)) {
5959
customDelimiter = Option(",")
6060
}
6161

common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/source/scan/csv/ParallelCSVScanSourceOpDesc.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ class ParallelCSVScanSourceOpDesc extends ScanSourceOpDesc {
5656
executionId: ExecutionIdentity
5757
): PhysicalOp = {
5858
// fill in default values
59-
if (customDelimiter.get.isEmpty) {
59+
if (customDelimiter.forall(_.isEmpty)) {
6060
customDelimiter = Option(",")
6161
}
6262

common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/source/scan/csv/CSVScanSourceOpDescSpec.scala

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,4 +128,36 @@ class CSVScanSourceOpDescSpec extends AnyFlatSpec with BeforeAndAfter {
128128
)
129129
}
130130

131+
it should "use comma as the default delimiter when customDelimiter is not set for parallel CSV" in {
132+
parallelCsvScanSourceOpDesc.customDelimiter = None
133+
134+
parallelCsvScanSourceOpDesc.getPhysicalOp(DEFAULT_WORKFLOW_ID, DEFAULT_EXECUTION_ID)
135+
136+
assert(parallelCsvScanSourceOpDesc.customDelimiter.contains(","))
137+
}
138+
139+
it should "use comma as the default delimiter when customDelimiter is empty string for parallel CSV" in {
140+
parallelCsvScanSourceOpDesc.customDelimiter = Some("")
141+
142+
parallelCsvScanSourceOpDesc.getPhysicalOp(DEFAULT_WORKFLOW_ID, DEFAULT_EXECUTION_ID)
143+
144+
assert(parallelCsvScanSourceOpDesc.customDelimiter.contains(","))
145+
}
146+
147+
it should "use comma as the default delimiter when customDelimiter is not set for CSV" in {
148+
csvScanSourceOpDesc.customDelimiter = None
149+
150+
csvScanSourceOpDesc.getPhysicalOp(DEFAULT_WORKFLOW_ID, DEFAULT_EXECUTION_ID)
151+
152+
assert(csvScanSourceOpDesc.customDelimiter.contains(","))
153+
}
154+
155+
it should "use comma as the default delimiter when customDelimiter is empty string for CSV" in {
156+
csvScanSourceOpDesc.customDelimiter = Some("")
157+
158+
csvScanSourceOpDesc.getPhysicalOp(DEFAULT_WORKFLOW_ID, DEFAULT_EXECUTION_ID)
159+
160+
assert(csvScanSourceOpDesc.customDelimiter.contains(","))
161+
}
162+
131163
}

0 commit comments

Comments
 (0)