@@ -385,6 +385,40 @@ impl SparkPhysicalExprAdapter {
385385 let physical_type = cast. input_field ( ) . data_type ( ) ;
386386 let target_type = cast. target_field ( ) . data_type ( ) ;
387387
388+ // Reject reading a string/binary Parquet column as anything other
389+ // than string, binary, or a binary-encoded decimal. This mirrors
390+ // Spark's TypeUtil.checkParquetType for the BINARY case (lines
391+ // 208-221): a BINARY (or UTF8-annotated BINARY) physical column is
392+ // only readable as StringType, BinaryType, or a binary-encoded
393+ // decimal; every other target type (numeric, boolean, date,
394+ // timestamp, ...) raises SchemaColumnConvertNotSupportedException.
395+ //
396+ // Without this guard, Spark's Cast below (in is_adapting_schema
397+ // mode) falls through to DataFusion's cast, which silently parses
398+ // the bytes (returning nulls for non-numeric strings, parsing
399+ // date/timestamp/boolean strings, or in some paths reinterpreting
400+ // raw bytes). See issue #4088.
401+ if matches ! (
402+ physical_type,
403+ DataType :: Utf8 | DataType :: LargeUtf8 | DataType :: Binary | DataType :: LargeBinary
404+ ) && !matches ! (
405+ target_type,
406+ DataType :: Utf8
407+ | DataType :: LargeUtf8
408+ | DataType :: Binary
409+ | DataType :: LargeBinary
410+ | DataType :: Decimal128 ( _, _)
411+ | DataType :: Decimal256 ( _, _)
412+ ) {
413+ return Err ( DataFusionError :: Plan ( format ! (
414+ "Parquet column cannot be converted. Column: [{}], \
415+ Expected: {}, Found: {}",
416+ cast. input_field( ) . name( ) ,
417+ target_type,
418+ physical_type,
419+ ) ) ) ;
420+ }
421+
388422 // For complex nested types (Struct, List, Map), Timestamp timezone
389423 // mismatches, and Timestamp→Int64 (nanosAsLong), use CometCastColumnExpr
390424 // with spark_parquet_convert which handles field-name-based selection,
0 commit comments