@@ -478,7 +478,10 @@ fn throw_spark_error_as_json(
478478
479479/// Try to convert a DataFusion "Unable to get field named" error into a SparkError.
480480/// DataFusion produces this error when reading Parquet files with duplicate field names
481- /// in case-insensitive mode (e.g., file has columns "b" and "B", query requests "b").
481+ /// in case-insensitive mode. For example, if a Parquet file has columns "B" and "b",
482+ /// DataFusion may deduplicate them and report: Unable to get field named "b". Valid
483+ /// fields: ["A", "B"]. When the requested field has a case-insensitive match among the
484+ /// valid fields, we convert this to Spark's _LEGACY_ERROR_TEMP_2093 error.
482485fn try_convert_duplicate_field_error ( error_msg : & str ) -> Option < SparkError > {
483486 // Match: Schema error: Unable to get field named "X". Valid fields: [...]
484487 lazy_static ! {
@@ -488,26 +491,28 @@ fn try_convert_duplicate_field_error(error_msg: &str) -> Option<SparkError> {
488491 if let Some ( caps) = FIELD_RE . captures ( error_msg) {
489492 let requested_field = caps. get ( 1 ) ?. as_str ( ) ;
490493 let requested_lower = requested_field. to_lowercase ( ) ;
491- // Parse all field names from the Valid fields list: ["A", "B", "b" ]
494+ // Parse field names from the Valid fields list: ["A", "B"] or [A, B, b ]
492495 let valid_fields_raw = caps. get ( 2 ) ?. as_str ( ) ;
493496 let all_fields: Vec < String > = valid_fields_raw
494497 . split ( ',' )
495498 . map ( |s| s. trim ( ) . trim_matches ( '"' ) . to_string ( ) )
496499 . collect ( ) ;
497- // Filter to only fields that match case-insensitively (the actual duplicates).
498- // Spark's ParquetReadSupport.matchCaseInsensitiveField only reports fields
499- // from its case-insensitive map, not all schema fields.
500- let matched: Vec < String > = all_fields
500+ // Find fields that match case-insensitively
501+ let mut matched: Vec < String > = all_fields
501502 . into_iter ( )
502503 . filter ( |f| f. to_lowercase ( ) == requested_lower)
503504 . collect ( ) ;
504- // Only treat as a duplicate-field error if there are 2+ case-insensitive matches
505- if matched. len ( ) < 2 {
505+ // Need at least one case-insensitive match to treat this as a duplicate field error.
506+ // DataFusion may deduplicate columns case-insensitively, so the valid fields list
507+ // might contain only one variant (e.g. "B" when file has both "B" and "b").
508+ // If requested field differs from the match, both existed in the original file.
509+ if matched. is_empty ( ) {
506510 return None ;
507511 }
508- // Spark passes the original table schema field name (uppercase "B") as
509- // requiredFieldName. We don't have that here, so use the requested field
510- // name as-is, which is what DataFusion resolved.
512+ // Add the requested field name if it's not already in the list (different case)
513+ if !matched. iter ( ) . any ( |f| f == requested_field) {
514+ matched. push ( requested_field. to_string ( ) ) ;
515+ }
511516 let required_field_name = requested_field. to_string ( ) ;
512517 let matched_fields = format ! ( "[{}]" , matched. join( ", " ) ) ;
513518 Some ( SparkError :: DuplicateFieldCaseInsensitive {
0 commit comments