@@ -419,6 +419,42 @@ impl SparkPhysicalExprAdapter {
419419 ) ) ) ;
420420 }
421421
422+ // Decimal-to-decimal scale-narrowing check.
423+ // Reject reads where the read schema has a smaller scale than the
424+ // file's, because Spark's Cast below would silently truncate
425+ // fractional digits, producing wrong values. This matches the
426+ // unconditionally-lossy case in issue #4089 (e.g. Decimal(10,2) read
427+ // as Decimal(5,0)).
428+ //
429+ // Other decimal mismatches are intentionally NOT rejected here,
430+ // even though Spark's vectorized reader would reject them via
431+ // `ParquetVectorUpdaterFactory#isDecimalTypeMatched` (which requires
432+ // exact precision and scale):
433+ //
434+ // - Precision-only changes with the same scale (e.g. Decimal(5,2)
435+ // read as Decimal(3,2)): Spark 4.0's parquet-mr fallback path
436+ // (PARQUET_VECTORIZED_READER_ENABLED=false) and the vectorized
437+ // type-widening path produce null on per-value overflow, which
438+ // DataFusion's cast already does in the adapting-schema path.
439+ //
440+ // - Scale widening (e.g. Decimal(10,2) read as Decimal(10,4)): the
441+ // cast is lossless (no truncation, no overflow), so allowing it
442+ // here is strictly more permissive than Spark's vectorized reader
443+ // without risking wrong values.
444+ if let ( DataType :: Decimal128 ( _src_p, src_s) , DataType :: Decimal128 ( _dst_p, dst_s) ) =
445+ ( physical_type, target_type)
446+ {
447+ if dst_s < src_s {
448+ return Err ( DataFusionError :: Plan ( format ! (
449+ "Parquet column cannot be converted. Column: [{}], \
450+ Expected: {}, Found: {}",
451+ cast. input_field( ) . name( ) ,
452+ target_type,
453+ physical_type,
454+ ) ) ) ;
455+ }
456+ }
457+
422458 // For complex nested types (Struct, List, Map), Timestamp timezone
423459 // mismatches, and Timestamp→Int64 (nanosAsLong), use CometCastColumnExpr
424460 // with spark_parquet_convert which handles field-name-based selection,
0 commit comments