From 87915a6e70302a14885085cd7bc54008523a4af3 Mon Sep 17 00:00:00 2001 From: klion26 Date: Thu, 9 Apr 2026 19:32:29 +0800 Subject: [PATCH 01/13] [Variant] Align cast logic for from/to_decimal for variant --- arrow-cast/src/cast/decimal.rs | 136 ++++++++---------- arrow-cast/src/cast/mod.rs | 23 ++- .../src/type_conversion.rs | 16 ++- parquet-variant/src/variant.rs | 134 +++++++++++++++-- 4 files changed, 220 insertions(+), 89 deletions(-) diff --git a/arrow-cast/src/cast/decimal.rs b/arrow-cast/src/cast/decimal.rs index 3553f2b6a76f..e58cfde9a1d5 100644 --- a/arrow-cast/src/cast/decimal.rs +++ b/arrow-cast/src/cast/decimal.rs @@ -531,7 +531,7 @@ where /// Parses given string to specified decimal native (i128/i256) based on given /// scale. Returns an `Err` if it cannot parse given string. -pub(crate) fn parse_string_to_decimal_native( +pub fn parse_string_to_decimal_native( value_str: &str, scale: usize, ) -> Result @@ -777,7 +777,7 @@ where if cast_options.safe { array .unary_opt::<_, D>(|v| { - D::Native::from_f64((mul * v.as_()).round()) + single_float_to_decimal::(v.as_(), mul) .filter(|v| D::is_valid_decimal_precision(*v, precision)) }) .with_precision_and_scale(precision, scale) @@ -785,7 +785,7 @@ where } else { array .try_unary::<_, D, _>(|v| { - D::Native::from_f64((mul * v.as_()).round()) + single_float_to_decimal::(v.as_(), mul) .ok_or_else(|| { ArrowError::CastError(format!( "Cannot cast to {}({}, {}). Overflowing on {:?}", @@ -802,6 +802,17 @@ where } } +/// Cast a single floating point value to a decimal native with the given multiple. +/// Returns `None` if the value cannot be represented with the requested precision. +#[inline] +pub fn single_float_to_decimal(input: f64, mul: f64) -> Option +where + D: DecimalType + ArrowPrimitiveType, + ::Native: DecimalCast, +{ + D::Native::from_f64((mul * input).round()) +} + pub(crate) fn cast_decimal_to_integer( array: &dyn Array, base: D::Native, @@ -826,84 +837,63 @@ where let mut value_builder = PrimitiveBuilder::::with_capacity(array.len()); - if scale < 0 { - match cast_options.safe { - true => { - for i in 0..array.len() { - if array.is_null(i) { - value_builder.append_null(); - } else { - let v = array - .value(i) - .mul_checked(div) - .ok() - .and_then(::from::); - value_builder.append_option(v); - } - } - } - false => { - for i in 0..array.len() { - if array.is_null(i) { - value_builder.append_null(); - } else { - let v = array.value(i).mul_checked(div)?; - - let value = - ::from::(v).ok_or_else(|| { - ArrowError::CastError(format!( - "value of {:?} is out of range {}", - v, - T::DATA_TYPE - )) - })?; - - value_builder.append_value(value); - } - } - } - } - } else { - match cast_options.safe { - true => { - for i in 0..array.len() { - if array.is_null(i) { - value_builder.append_null(); - } else { - let v = array - .value(i) - .div_checked(div) - .ok() - .and_then(::from::); - value_builder.append_option(v); - } + for i in 0..array.len() { + if array.is_null(i) { + value_builder.append_null(); + } else { + match cast_options.safe { + true => { + let v = cast_single_decimal_to_integer::( + array.value(i), + div, + scale as _, + T::DATA_TYPE, + ) + .ok(); + value_builder.append_option(v); } - } - false => { - for i in 0..array.len() { - if array.is_null(i) { - value_builder.append_null(); - } else { - let v = array.value(i).div_checked(div)?; - - let value = - ::from::(v).ok_or_else(|| { - ArrowError::CastError(format!( - "value of {:?} is out of range {}", - v, - T::DATA_TYPE - )) - })?; - - value_builder.append_value(value); - } + false => { + let value = cast_single_decimal_to_integer::( + array.value(i), + div, + scale as _, + T::DATA_TYPE, + )?; + + value_builder.append_value(value); } } } } + Ok(Arc::new(value_builder.finish())) } +/// Casting a given decimal to an integer based on given div and scale. +/// The value is scaled by multiplying or dividing with the div based on the scale sign. +/// Returns `Err` if the value is overflow or cannot be represented with the requested precision. +pub fn cast_single_decimal_to_integer( + value: D::Native, + div: D::Native, + scale: i16, + type_name: DataType, +) -> Result +where + T: NumCast + ToPrimitive, + D: DecimalType + ArrowPrimitiveType, + ::Native: ToPrimitive, +{ + let v = if scale < 0 { + value.mul_checked(div)? + } else { + value.div_checked(div)? + }; + + T::from::(v).ok_or_else(|| { + ArrowError::CastError(format!("value of {:?} is out of range {:?}", v, type_name)) + }) +} + /// Cast a decimal array to a floating point array. /// /// Conversion is lossy and follows standard floating point semantics. Values diff --git a/arrow-cast/src/cast/mod.rs b/arrow-cast/src/cast/mod.rs index 5f08dcbfc138..ea549fd8a592 100644 --- a/arrow-cast/src/cast/mod.rs +++ b/arrow-cast/src/cast/mod.rs @@ -72,9 +72,26 @@ use arrow_schema::*; use arrow_select::take::take; use num_traits::{NumCast, ToPrimitive, cast::AsPrimitive}; -pub use decimal::{DecimalCast, rescale_decimal}; +pub use decimal::{ + DecimalCast, cast_single_decimal_to_integer, parse_string_to_decimal_native, rescale_decimal, + single_float_to_decimal, +}; pub use string::cast_single_string_to_boolean_default; +/// Lossy conversion from decimal to float. +/// +/// Conversion is lossy and follows standard floating point semantics. Values +/// that exceed the representable range become `INFINITY` or `-INFINITY` without +/// returning an error. +#[inline] +pub fn single_decimal_to_float_lossy(f: &F, x: D::Native, scale: i32) -> f64 +where + D: DecimalType, + F: Fn(D::Native) -> f64, +{ + f(x) / 10_f64.powi(scale) +} + /// CastOptions provides a way to override the default cast behaviors #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct CastOptions<'a> { @@ -2314,10 +2331,10 @@ where Int32 => cast_decimal_to_integer::(array, base, *scale, cast_options), Int64 => cast_decimal_to_integer::(array, base, *scale, cast_options), Float32 => cast_decimal_to_float::(array, |x| { - (as_float(x) / 10_f64.powi(*scale as i32)) as f32 + single_decimal_to_float_lossy::(&as_float, x, *scale as _) as f32 }), Float64 => cast_decimal_to_float::(array, |x| { - as_float(x) / 10_f64.powi(*scale as i32) + single_decimal_to_float_lossy::(&as_float, x, *scale as _) }), Utf8View => value_to_string_view(array, cast_options), Utf8 => value_to_string::(array, cast_options), diff --git a/parquet-variant-compute/src/type_conversion.rs b/parquet-variant-compute/src/type_conversion.rs index 7b9eb67d1a95..9360b68fad0e 100644 --- a/parquet-variant-compute/src/type_conversion.rs +++ b/parquet-variant-compute/src/type_conversion.rs @@ -17,7 +17,10 @@ //! Module for transforming a typed arrow `Array` to `VariantArray`. -use arrow::compute::{CastOptions, DecimalCast, rescale_decimal}; +use arrow::compute::{ + CastOptions, DecimalCast, parse_string_to_decimal_native, rescale_decimal, + single_float_to_decimal, +}; use arrow::datatypes::{ self, ArrowPrimitiveType, ArrowTimestampType, Decimal32Type, Decimal64Type, Decimal128Type, DecimalType, @@ -204,9 +207,12 @@ impl_timestamp_from_variant!( /// /// - `precision` and `scale` specify the target Arrow decimal parameters /// - Integer variants (`Int8/16/32/64`) are treated as decimals with scale 0 +/// - Floating point variants (`Float/Double`) are converted to decimals with the given scale +/// - String variants (`String/ShortString`) are parsed as decimals with the given scale /// - Decimal variants (`Decimal4/8/16`) use their embedded precision and scale /// -/// The value is rescaled to (`precision`, `scale`) using `rescale_decimal` and +/// The value is rescaled to (`precision`, `scale`) using `rescale_decimal` for integers, +/// `single_float_to_decimal` for floats, and `parse_string_to_decimal_native` for strings. /// returns `None` if it cannot fit the requested precision. pub(crate) fn variant_to_unscaled_decimal( variant: &Variant<'_, '_>, @@ -217,6 +223,8 @@ where O: DecimalType, O::Native: DecimalCast, { + let mul = 10_f64.powi(scale as i32); + match variant { Variant::Int8(i) => rescale_decimal::( *i as i32, @@ -246,6 +254,10 @@ where precision, scale, ), + Variant::Float(f) => single_float_to_decimal::(*f as _, mul), + Variant::Double(f) => single_float_to_decimal::(*f, mul), + Variant::String(v) => parse_string_to_decimal_native::(v, scale as _).ok(), + Variant::ShortString(v) => parse_string_to_decimal_native::(v, scale as _).ok(), Variant::Decimal4(d) => rescale_decimal::( d.integer(), VariantDecimal4::MAX_PRECISION, diff --git a/parquet-variant/src/variant.rs b/parquet-variant/src/variant.rs index accff009045a..1c1316681fac 100644 --- a/parquet-variant/src/variant.rs +++ b/parquet-variant/src/variant.rs @@ -29,10 +29,17 @@ use crate::decoder::{ }; use crate::path::{VariantPath, VariantPathElement}; use crate::utils::{first_byte_from_slice, slice_from_slice}; +use arrow::array::ArrowNativeTypeOp; use arrow::compute::{ - cast_num_to_bool, cast_single_string_to_boolean_default, num_cast, single_bool_to_numeric, + cast_num_to_bool, cast_single_decimal_to_integer, cast_single_string_to_boolean_default, + num_cast, parse_string_to_decimal_native, single_bool_to_numeric, + single_decimal_to_float_lossy, single_float_to_decimal, }; -use arrow_schema::ArrowError; +use arrow::datatypes::{Decimal32Type, Decimal64Type, Decimal128Type, DecimalType}; +use arrow_schema::DataType::{ + Float16, Float32, Float64, Int8, Int16, Int32, Int64, UInt8, UInt16, UInt32, UInt64, +}; +use arrow_schema::{ArrowError, DataType}; use chrono::{DateTime, NaiveDate, NaiveDateTime, NaiveTime, Timelike, Utc}; use num_traits::NumCast; use std::ops::Deref; @@ -166,10 +173,11 @@ impl Deref for ShortString<'_> { /// Arrow UTF8-to-boolean cast rules. /// - Numeric accessors such as [`Self::as_int8`], [`Self::as_int64`], [`Self::as_u8`], /// [`Self::as_u64`], [`Self::as_f16`], [`Self::as_f32`], and [`Self::as_f64`] accept -/// boolean and numeric variants (integers, floating-point, and decimals with scale `0`). +/// boolean and numeric variants (integers, floating-point, and decimals). /// They return `None` when conversion is not possible. /// - Decimal accessors such as [`Self::as_decimal4`], [`Self::as_decimal8`], and -/// [`Self::as_decimal16`] accept compatible decimal variants and integer variants. +/// [`Self::as_decimal16`] accept compatible decimal variants, integer variants, +/// float variants and string variants. /// They return `None` when conversion is not possible. /// /// # Examples: @@ -294,6 +302,39 @@ pub enum Variant<'m, 'v> { // We don't want this to grow because it could hurt performance of a frequently-created type. const _: () = crate::utils::expect_size_of::(80); +enum NumericKind { + Integer, + Float, +} + +trait DecimalCastTarget: NumCast + Default { + const KIND: NumericKind; + fn arrow_type() -> DataType; +} + +macro_rules! impl_decimal_cast_target { + ($raw_type: ident, $target_kind:expr, $arrow_type: expr) => { + impl DecimalCastTarget for $raw_type { + const KIND: NumericKind = $target_kind; + fn arrow_type() -> DataType { + $arrow_type + } + } + }; +} + +impl_decimal_cast_target!(i8, NumericKind::Integer, Int8); +impl_decimal_cast_target!(i16, NumericKind::Integer, Int16); +impl_decimal_cast_target!(i32, NumericKind::Integer, Int32); +impl_decimal_cast_target!(i64, NumericKind::Integer, Int64); +impl_decimal_cast_target!(u8, NumericKind::Integer, UInt8); +impl_decimal_cast_target!(u16, NumericKind::Integer, UInt16); +impl_decimal_cast_target!(u32, NumericKind::Integer, UInt32); +impl_decimal_cast_target!(u64, NumericKind::Integer, UInt64); +impl_decimal_cast_target!(f16, NumericKind::Float, Float16); +impl_decimal_cast_target!(f32, NumericKind::Float, Float32); +impl_decimal_cast_target!(f64, NumericKind::Float, Float64); + impl<'m, 'v> Variant<'m, 'v> { /// Attempts to interpret a metadata and value buffer pair as a new `Variant`. /// @@ -797,14 +838,36 @@ impl<'m, 'v> Variant<'m, 'v> { } } - /// Converts a boolean or numeric variant(integers, floating-point, and decimals with scale 0) + fn cast_decimal_to_num(raw: D::Native, scale: u8, as_float: F) -> Option + where + D: DecimalType, + D::Native: NumCast + ArrowNativeTypeOp, + T: DecimalCastTarget, + F: Fn(D::Native) -> f64, + { + let base: D::Native = NumCast::from(10)?; + + base.pow_checked(scale as _) + .ok() + .and_then(|div| match T::KIND { + NumericKind::Integer => { + cast_single_decimal_to_integer::(raw, div, scale as _, T::arrow_type()) + .ok() + } + NumericKind::Float => T::from(single_decimal_to_float_lossy::( + &as_float, raw, scale as _, + )), + }) + } + + /// Converts a boolean or numeric variant(integers, floating-point, and decimals) /// to the specified numeric type `T`. /// /// Uses Arrow's casting logic to perform the conversion. Returns `Some(T)` if /// the conversion succeeds, `None` if the variant can't be casted to type `T`. fn as_num(&self) -> Option where - T: NumCast + Default, + T: DecimalCastTarget, { match *self { Variant::BooleanFalse => single_bool_to_numeric(false), @@ -815,9 +878,21 @@ impl<'m, 'v> Variant<'m, 'v> { Variant::Int64(i) => num_cast(i), Variant::Float(f) => num_cast(f), Variant::Double(d) => num_cast(d), - Variant::Decimal4(d) if d.scale() == 0 => num_cast(d.integer()), - Variant::Decimal8(d) if d.scale() == 0 => num_cast(d.integer()), - Variant::Decimal16(d) if d.scale() == 0 => num_cast(d.integer()), + Variant::Decimal4(d) => Self::cast_decimal_to_num::( + d.integer(), + d.scale(), + |x: i32| x as f64, + ), + Variant::Decimal8(d) => Self::cast_decimal_to_num::( + d.integer(), + d.scale(), + |x: i64| x as f64, + ), + Variant::Decimal16(d) => Self::cast_decimal_to_num::( + d.integer(), + d.scale(), + |x: i128| x as f64, + ), _ => None, } } @@ -1138,6 +1213,18 @@ impl<'m, 'v> Variant<'m, 'v> { Variant::Int8(_) | Variant::Int16(_) | Variant::Int32(_) | Variant::Int64(_) => { self.as_num::().and_then(|x| x.try_into().ok()) } + Variant::Float(f) => single_float_to_decimal::(f as _, 1f64) + .and_then(|x: i32| x.try_into().ok()), + Variant::Double(f) => single_float_to_decimal::(f, 1f64) + .and_then(|x: i32| x.try_into().ok()), + Variant::String(v) => parse_string_to_decimal_native::(v, 0usize) + .ok() + .and_then(|x: i32| x.try_into().ok()), + Variant::ShortString(v) => { + parse_string_to_decimal_native::(v.as_str(), 0usize) + .ok() + .and_then(|x: i32| x.try_into().ok()) + } Variant::Decimal4(decimal4) => Some(decimal4), Variant::Decimal8(decimal8) => decimal8.try_into().ok(), Variant::Decimal16(decimal16) => decimal16.try_into().ok(), @@ -1177,6 +1264,18 @@ impl<'m, 'v> Variant<'m, 'v> { Variant::Int8(_) | Variant::Int16(_) | Variant::Int32(_) | Variant::Int64(_) => { self.as_num::().and_then(|x| x.try_into().ok()) } + Variant::Float(f) => single_float_to_decimal::(f as _, 1f64) + .and_then(|x: i64| x.try_into().ok()), + Variant::Double(f) => single_float_to_decimal::(f, 1f64) + .and_then(|x: i64| x.try_into().ok()), + Variant::String(v) => parse_string_to_decimal_native::(v, 0usize) + .ok() + .and_then(|x: i64| x.try_into().ok()), + Variant::ShortString(v) => { + parse_string_to_decimal_native::(v.as_str(), 0usize) + .ok() + .and_then(|x: i64| x.try_into().ok()) + } Variant::Decimal4(decimal4) => Some(decimal4.into()), Variant::Decimal8(decimal8) => Some(decimal8), Variant::Decimal16(decimal16) => decimal16.try_into().ok(), @@ -1205,8 +1304,21 @@ impl<'m, 'v> Variant<'m, 'v> { /// ``` pub fn as_decimal16(&self) -> Option { match *self { - Variant::Int8(_) | Variant::Int16(_) | Variant::Int32(_) | Variant::Int64(_) => { - self.as_num::().and_then(|x| x.try_into().ok()) + Variant::Int8(_) | Variant::Int16(_) | Variant::Int32(_) | Variant::Int64(_) => self + .as_num::() + .map(|x| (x as i128).try_into().ok()) + .unwrap(), + Variant::Float(f) => single_float_to_decimal::(f as _, 1f64) + .and_then(|x: i128| x.try_into().ok()), + Variant::Double(f) => single_float_to_decimal::(f, 1f64) + .and_then(|x: i128| x.try_into().ok()), + Variant::String(v) => parse_string_to_decimal_native::(v, 0usize) + .ok() + .and_then(|x: i128| x.try_into().ok()), + Variant::ShortString(v) => { + parse_string_to_decimal_native::(v.as_str(), 0usize) + .ok() + .and_then(|x: i128| x.try_into().ok()) } Variant::Decimal4(decimal4) => Some(decimal4.into()), Variant::Decimal8(decimal8) => Some(decimal8.into()), From 097e97184d063c892bcc7b07d81f4c333e54a1f5 Mon Sep 17 00:00:00 2001 From: klion26 Date: Fri, 10 Apr 2026 17:54:24 +0800 Subject: [PATCH 02/13] add some example for decimal from string --- parquet-variant/src/variant.rs | 149 +++++++++++++++++++-------------- 1 file changed, 84 insertions(+), 65 deletions(-) diff --git a/parquet-variant/src/variant.rs b/parquet-variant/src/variant.rs index 1c1316681fac..a417c10700b2 100644 --- a/parquet-variant/src/variant.rs +++ b/parquet-variant/src/variant.rs @@ -31,9 +31,9 @@ use crate::path::{VariantPath, VariantPathElement}; use crate::utils::{first_byte_from_slice, slice_from_slice}; use arrow::array::ArrowNativeTypeOp; use arrow::compute::{ - cast_num_to_bool, cast_single_decimal_to_integer, cast_single_string_to_boolean_default, - num_cast, parse_string_to_decimal_native, single_bool_to_numeric, - single_decimal_to_float_lossy, single_float_to_decimal, + DecimalCast, cast_num_to_bool, cast_single_decimal_to_integer, + cast_single_string_to_boolean_default, num_cast, parse_string_to_decimal_native, + single_bool_to_numeric, single_decimal_to_float_lossy, single_float_to_decimal, }; use arrow::datatypes::{Decimal32Type, Decimal64Type, Decimal128Type, DecimalType}; use arrow_schema::DataType::{ @@ -1037,17 +1037,17 @@ impl<'m, 'v> Variant<'m, 'v> { /// let v2 = Variant::from(d); /// assert_eq!(v2.as_u8(), Some(26u8)); /// + /// // or a variant that decimal with scale not equal to zero + /// let d = VariantDecimal4::try_new(123, 2).unwrap(); + /// let v3 = Variant::from(d); + /// assert_eq!(v3.as_u8(), Some(1)); + /// /// // or from boolean variant - /// let v3 = Variant::BooleanFalse; - /// assert_eq!(v3.as_u8(), Some(0)); + /// let v4 = Variant::BooleanFalse; + /// assert_eq!(v4.as_u8(), Some(0)); /// /// // but not a variant that can't fit into the range - /// let v4 = Variant::from(-1); - /// assert_eq!(v4.as_u8(), None); - /// - /// // not a variant that decimal with scale not equal to zero - /// let d = VariantDecimal4::try_new(1, 2).unwrap(); - /// let v5 = Variant::from(d); + /// let v5 = Variant::from(-1); /// assert_eq!(v5.as_u8(), None); /// /// // or not a variant that cannot be cast into an integer @@ -1078,17 +1078,17 @@ impl<'m, 'v> Variant<'m, 'v> { /// let v2 = Variant::from(d); /// assert_eq!(v2.as_u16(), Some(u16::MAX)); /// + /// // or a variant that decimal with scale not equal to zero + /// let d = VariantDecimal4::try_new(123, 2).unwrap(); + /// let v3 = Variant::from(d); + /// assert_eq!(v3.as_u16(), Some(1)); + /// /// // or from boolean variant - /// let v3= Variant::BooleanFalse; - /// assert_eq!(v3.as_u16(), Some(0)); + /// let v4= Variant::BooleanFalse; + /// assert_eq!(v4.as_u16(), Some(0)); /// /// // but not a variant that can't fit into the range - /// let v4 = Variant::from(-1); - /// assert_eq!(v4.as_u16(), None); - /// - /// // not a variant that decimal with scale not equal to zero - /// let d = VariantDecimal4::try_new(1, 2).unwrap(); - /// let v5 = Variant::from(d); + /// let v5 = Variant::from(-1); /// assert_eq!(v5.as_u16(), None); /// /// // or not a variant that cannot be cast into an integer @@ -1119,17 +1119,17 @@ impl<'m, 'v> Variant<'m, 'v> { /// let v2 = Variant::from(d); /// assert_eq!(v2.as_u32(), Some(u32::MAX)); /// + /// // or a variant that decimal with scale not equal to zero + /// let d = VariantDecimal8::try_new(123, 2).unwrap(); + /// let v3 = Variant::from(d); + /// assert_eq!(v3.as_u32(), Some(1)); + /// /// // or from boolean variant - /// let v3 = Variant::BooleanFalse; - /// assert_eq!(v3.as_u32(), Some(0)); + /// let v4 = Variant::BooleanFalse; + /// assert_eq!(v4.as_u32(), Some(0)); /// /// // but not a variant that can't fit into the range - /// let v4 = Variant::from(-1); - /// assert_eq!(v4.as_u32(), None); - /// - /// // not a variant that decimal with scale not equal to zero - /// let d = VariantDecimal8::try_new(1, 2).unwrap(); - /// let v5 = Variant::from(d); + /// let v5 = Variant::from(-1); /// assert_eq!(v5.as_u32(), None); /// /// // or not a variant that cannot be cast into an integer @@ -1160,17 +1160,17 @@ impl<'m, 'v> Variant<'m, 'v> { /// let v2 = Variant::from(d); /// assert_eq!(v2.as_u64(), Some(u64::MAX)); /// + /// // or a variant that decimal with scale not equal to zero + /// let d = VariantDecimal16::try_new(123, 2).unwrap(); + /// let v3 = Variant::from(d); + /// assert_eq!(v3.as_u64(), Some(1)); + /// /// // or from boolean variant - /// let v3 = Variant::BooleanFalse; - /// assert_eq!(v3.as_u64(), Some(0)); + /// let v4 = Variant::BooleanFalse; + /// assert_eq!(v4.as_u64(), Some(0)); /// /// // but not a variant that can't fit into the range - /// let v4 = Variant::from(-1); - /// assert_eq!(v4.as_u64(), None); - /// - /// // not a variant that decimal with scale not equal to zero - /// let d = VariantDecimal16::try_new(1, 2).unwrap(); - /// let v5 = Variant::from(d); + /// let v5 = Variant::from(-1); /// assert_eq!(v5.as_u64(), None); /// /// // or not a variant that cannot be cast into an integer @@ -1181,6 +1181,25 @@ impl<'m, 'v> Variant<'m, 'v> { self.as_num() } + fn convert_string_to_decimal(input: &str) -> Option + where + D: DecimalType, + VD: VariantDecimalType, + D::Native: NumCast + DecimalCast, + { + // find the last '.' + let scale_usize = input + .rsplit_once('.') + .map(|(_, frac)| frac.len()) + .unwrap_or(0); + + let scale = u8::try_from(scale_usize).ok()?; + + parse_string_to_decimal_native::(input, scale_usize) + .ok() + .and_then(|raw| VD::try_new(raw, scale).ok()) + } + /// Converts this variant to tuple with a 4-byte unscaled value if possible. /// /// Returns `Some((i32, u8))` for decimal variants where the unscaled value @@ -1200,13 +1219,17 @@ impl<'m, 'v> Variant<'m, 'v> { /// let v2 = Variant::from(VariantDecimal8::try_new(1234_i64, 2).unwrap()); /// assert_eq!(v2.as_decimal4(), VariantDecimal4::try_new(1234_i32, 2).ok()); /// + /// // or from string variants if they can be parsed as decimals + /// let v3 = Variant::from("123.45"); + /// assert_eq!(v3.as_decimal4(), VariantDecimal4::try_new(12345, 2).ok()); + /// /// // but not if the value would overflow i32 - /// let v3 = Variant::from(VariantDecimal8::try_new(12345678901i64, 2).unwrap()); - /// assert_eq!(v3.as_decimal4(), None); + /// let v4 = Variant::from(VariantDecimal8::try_new(12345678901i64, 2).unwrap()); + /// assert_eq!(v4.as_decimal4(), None); /// /// // or if the variant is not a decimal - /// let v4 = Variant::from("hello!"); - /// assert_eq!(v4.as_decimal4(), None); + /// let v5 = Variant::from("hello!"); + /// assert_eq!(v5.as_decimal4(), None); /// ``` pub fn as_decimal4(&self) -> Option { match *self { @@ -1217,13 +1240,9 @@ impl<'m, 'v> Variant<'m, 'v> { .and_then(|x: i32| x.try_into().ok()), Variant::Double(f) => single_float_to_decimal::(f, 1f64) .and_then(|x: i32| x.try_into().ok()), - Variant::String(v) => parse_string_to_decimal_native::(v, 0usize) - .ok() - .and_then(|x: i32| x.try_into().ok()), + Variant::String(v) => Self::convert_string_to_decimal::<_, Decimal32Type>(v), Variant::ShortString(v) => { - parse_string_to_decimal_native::(v.as_str(), 0usize) - .ok() - .and_then(|x: i32| x.try_into().ok()) + Self::convert_string_to_decimal::<_, Decimal32Type>(v.as_str()) } Variant::Decimal4(decimal4) => Some(decimal4), Variant::Decimal8(decimal8) => decimal8.try_into().ok(), @@ -1235,7 +1254,7 @@ impl<'m, 'v> Variant<'m, 'v> { /// Converts this variant to tuple with an 8-byte unscaled value if possible. /// /// Returns `Some((i64, u8))` for decimal variants where the unscaled value - /// fits in `i64` range, + /// fits in `i64` range, the scale will be 0 if the input is string variants. /// `None` for non-decimal variants or decimal values that would overflow. /// /// # Examples @@ -1251,13 +1270,17 @@ impl<'m, 'v> Variant<'m, 'v> { /// let v2 = Variant::from(VariantDecimal16::try_new(1234_i128, 2).unwrap()); /// assert_eq!(v2.as_decimal8(), VariantDecimal8::try_new(1234_i64, 2).ok()); /// + /// // or from string variants if they can be parsed as decimals + /// let v3 = Variant::from("123.45"); + /// assert_eq!(v3.as_decimal8(), VariantDecimal8::try_new(12345, 2).ok()); + /// /// // but not if the value would overflow i64 - /// let v3 = Variant::from(VariantDecimal16::try_new(2e19 as i128, 2).unwrap()); - /// assert_eq!(v3.as_decimal8(), None); + /// let v4 = Variant::from(VariantDecimal16::try_new(2e19 as i128, 2).unwrap()); + /// assert_eq!(v4.as_decimal8(), None); /// /// // or if the variant is not a decimal - /// let v4 = Variant::from("hello!"); - /// assert_eq!(v4.as_decimal8(), None); + /// let v5 = Variant::from("hello!"); + /// assert_eq!(v5.as_decimal8(), None); /// ``` pub fn as_decimal8(&self) -> Option { match *self { @@ -1268,13 +1291,9 @@ impl<'m, 'v> Variant<'m, 'v> { .and_then(|x: i64| x.try_into().ok()), Variant::Double(f) => single_float_to_decimal::(f, 1f64) .and_then(|x: i64| x.try_into().ok()), - Variant::String(v) => parse_string_to_decimal_native::(v, 0usize) - .ok() - .and_then(|x: i64| x.try_into().ok()), + Variant::String(v) => Self::convert_string_to_decimal::<_, Decimal64Type>(v), Variant::ShortString(v) => { - parse_string_to_decimal_native::(v.as_str(), 0usize) - .ok() - .and_then(|x: i64| x.try_into().ok()) + Self::convert_string_to_decimal::<_, Decimal64Type>(v.as_str()) } Variant::Decimal4(decimal4) => Some(decimal4.into()), Variant::Decimal8(decimal8) => Some(decimal8), @@ -1286,7 +1305,7 @@ impl<'m, 'v> Variant<'m, 'v> { /// Converts this variant to tuple with a 16-byte unscaled value if possible. /// /// Returns `Some((i128, u8))` for decimal variants where the unscaled value - /// fits in `i128` range, + /// fits in `i128` range, the scale will be 0 if the input is string variants. /// `None` for non-decimal variants or decimal values that would overflow. /// /// # Examples @@ -1298,9 +1317,13 @@ impl<'m, 'v> Variant<'m, 'v> { /// let v1 = Variant::from(VariantDecimal4::try_new(1234_i32, 2).unwrap()); /// assert_eq!(v1.as_decimal16(), VariantDecimal16::try_new(1234_i128, 2).ok()); /// + /// // or from a string variant if it can be parsed as decimal + /// let v2 = Variant::from("123.45"); + /// assert_eq!(v2.as_decimal16(), VariantDecimal16::try_new(12345, 2).ok()); + /// /// // but not if the variant is not a decimal - /// let v2 = Variant::from("hello!"); - /// assert_eq!(v2.as_decimal16(), None); + /// let v3 = Variant::from("hello!"); + /// assert_eq!(v3.as_decimal16(), None); /// ``` pub fn as_decimal16(&self) -> Option { match *self { @@ -1312,13 +1335,9 @@ impl<'m, 'v> Variant<'m, 'v> { .and_then(|x: i128| x.try_into().ok()), Variant::Double(f) => single_float_to_decimal::(f, 1f64) .and_then(|x: i128| x.try_into().ok()), - Variant::String(v) => parse_string_to_decimal_native::(v, 0usize) - .ok() - .and_then(|x: i128| x.try_into().ok()), + Variant::String(v) => Self::convert_string_to_decimal::<_, Decimal128Type>(v), Variant::ShortString(v) => { - parse_string_to_decimal_native::(v.as_str(), 0usize) - .ok() - .and_then(|x: i128| x.try_into().ok()) + Self::convert_string_to_decimal::<_, Decimal128Type>(v.as_str()) } Variant::Decimal4(decimal4) => Some(decimal4.into()), Variant::Decimal8(decimal8) => Some(decimal8.into()), From 0d75884cf96ab673a8e036200c39a081be63b992 Mon Sep 17 00:00:00 2001 From: klion26 Date: Mon, 13 Apr 2026 20:41:23 +0800 Subject: [PATCH 03/13] address comment --- arrow-cast/src/cast/decimal.rs | 44 ++++++------ arrow-cast/src/cast/mod.rs | 7 +- .../src/type_conversion.rs | 14 +++- parquet-variant/src/variant.rs | 71 ++++++++++--------- 4 files changed, 75 insertions(+), 61 deletions(-) diff --git a/arrow-cast/src/cast/decimal.rs b/arrow-cast/src/cast/decimal.rs index e58cfde9a1d5..230a3b4960aa 100644 --- a/arrow-cast/src/cast/decimal.rs +++ b/arrow-cast/src/cast/decimal.rs @@ -837,33 +837,35 @@ where let mut value_builder = PrimitiveBuilder::::with_capacity(array.len()); - for i in 0..array.len() { - if array.is_null(i) { - value_builder.append_null(); - } else { - match cast_options.safe { - true => { - let v = cast_single_decimal_to_integer::( - array.value(i), - div, - scale as _, - T::DATA_TYPE, - ) - .ok(); - value_builder.append_option(v); - } - false => { - let value = cast_single_decimal_to_integer::( + // Helper macro for emitting nearly the same loop every time, so we can hoist branches out + // The compiler will specialize the resulting code (inlining and jump threading) + macro_rules! cast_loop { + (|$v: ident| $body:expr) => {{ + for i in 0..array.len() { + if array.is_null(i) { + value_builder.append_null(); + } else { + let $v = cast_single_decimal_to_integer::( array.value(i), div, - scale as _, + >::from(scale), T::DATA_TYPE, - )?; - - value_builder.append_value(value); + ); + $body } } + }}; + } + if scale < 0 { + if cast_options.safe { + cast_loop!(|v| value_builder.append_option(v.ok())); + } else { + cast_loop!(|v| value_builder.append_value(v?)); } + } else if cast_options.safe { + cast_loop!(|v| value_builder.append_option(v.ok())); + } else { + cast_loop!(|v| value_builder.append_value(v?)); } Ok(Arc::new(value_builder.finish())) diff --git a/arrow-cast/src/cast/mod.rs b/arrow-cast/src/cast/mod.rs index ea549fd8a592..1679eaa586e9 100644 --- a/arrow-cast/src/cast/mod.rs +++ b/arrow-cast/src/cast/mod.rs @@ -89,7 +89,7 @@ where D: DecimalType, F: Fn(D::Native) -> f64, { - f(x) / 10_f64.powi(scale) + f(x) * 10_f64.powi(-scale) } /// CastOptions provides a way to override the default cast behaviors @@ -2331,10 +2331,11 @@ where Int32 => cast_decimal_to_integer::(array, base, *scale, cast_options), Int64 => cast_decimal_to_integer::(array, base, *scale, cast_options), Float32 => cast_decimal_to_float::(array, |x| { - single_decimal_to_float_lossy::(&as_float, x, *scale as _) as f32 + single_decimal_to_float_lossy::(&as_float, x, >::from(*scale)) + as f32 }), Float64 => cast_decimal_to_float::(array, |x| { - single_decimal_to_float_lossy::(&as_float, x, *scale as _) + single_decimal_to_float_lossy::(&as_float, x, >::from(*scale)) }), Utf8View => value_to_string_view(array, cast_options), Utf8 => value_to_string::(array, cast_options), diff --git a/parquet-variant-compute/src/type_conversion.rs b/parquet-variant-compute/src/type_conversion.rs index 9360b68fad0e..e8b9104ad997 100644 --- a/parquet-variant-compute/src/type_conversion.rs +++ b/parquet-variant-compute/src/type_conversion.rs @@ -254,10 +254,18 @@ where precision, scale, ), - Variant::Float(f) => single_float_to_decimal::(*f as _, mul), + Variant::Float(f) => single_float_to_decimal::(f64::from(*f), mul), Variant::Double(f) => single_float_to_decimal::(*f, mul), - Variant::String(v) => parse_string_to_decimal_native::(v, scale as _).ok(), - Variant::ShortString(v) => parse_string_to_decimal_native::(v, scale as _).ok(), + Variant::String(v) if scale > 0 => parse_string_to_decimal_native::( + v, + >::try_into(scale).expect("scale is positive, would never fail"), + ) + .ok(), + Variant::ShortString(v) if scale > 0 => parse_string_to_decimal_native::( + v, + >::try_into(scale).expect("scale is positive, would never fail"), + ) + .ok(), Variant::Decimal4(d) => rescale_decimal::( d.integer(), VariantDecimal4::MAX_PRECISION, diff --git a/parquet-variant/src/variant.rs b/parquet-variant/src/variant.rs index a417c10700b2..24f0a62d4a8f 100644 --- a/parquet-variant/src/variant.rs +++ b/parquet-variant/src/variant.rs @@ -847,15 +847,20 @@ impl<'m, 'v> Variant<'m, 'v> { { let base: D::Native = NumCast::from(10)?; - base.pow_checked(scale as _) + base.pow_checked(>::from(scale)) .ok() .and_then(|div| match T::KIND { - NumericKind::Integer => { - cast_single_decimal_to_integer::(raw, div, scale as _, T::arrow_type()) - .ok() - } + NumericKind::Integer => cast_single_decimal_to_integer::( + raw, + div, + >::from(scale), + T::arrow_type(), + ) + .ok(), NumericKind::Float => T::from(single_decimal_to_float_lossy::( - &as_float, raw, scale as _, + &as_float, + raw, + >::from(scale), )), }) } @@ -878,21 +883,21 @@ impl<'m, 'v> Variant<'m, 'v> { Variant::Int64(i) => num_cast(i), Variant::Float(f) => num_cast(f), Variant::Double(d) => num_cast(d), - Variant::Decimal4(d) => Self::cast_decimal_to_num::( - d.integer(), - d.scale(), - |x: i32| x as f64, - ), - Variant::Decimal8(d) => Self::cast_decimal_to_num::( - d.integer(), - d.scale(), - |x: i64| x as f64, - ), - Variant::Decimal16(d) => Self::cast_decimal_to_num::( - d.integer(), - d.scale(), - |x: i128| x as f64, - ), + Variant::Decimal4(d) => { + Self::cast_decimal_to_num::(d.integer(), d.scale(), |x| { + x as f64 + }) + } + Variant::Decimal8(d) => { + Self::cast_decimal_to_num::(d.integer(), d.scale(), |x| { + x as f64 + }) + } + Variant::Decimal16(d) => { + Self::cast_decimal_to_num::(d.integer(), d.scale(), |x| { + x as f64 + }) + } _ => None, } } @@ -1181,7 +1186,7 @@ impl<'m, 'v> Variant<'m, 'v> { self.as_num() } - fn convert_string_to_decimal(input: &str) -> Option + fn convert_string_to_decimal(input: &str) -> Option where D: DecimalType, VD: VariantDecimalType, @@ -1190,14 +1195,12 @@ impl<'m, 'v> Variant<'m, 'v> { // find the last '.' let scale_usize = input .rsplit_once('.') - .map(|(_, frac)| frac.len()) - .unwrap_or(0); + .map_or_else(|| 0, |(_, frac)| frac.len()); let scale = u8::try_from(scale_usize).ok()?; - parse_string_to_decimal_native::(input, scale_usize) - .ok() - .and_then(|raw| VD::try_new(raw, scale).ok()) + let raw = parse_string_to_decimal_native::(input, scale_usize).ok()?; + VD::try_new(raw, scale).ok() } /// Converts this variant to tuple with a 4-byte unscaled value if possible. @@ -1240,9 +1243,9 @@ impl<'m, 'v> Variant<'m, 'v> { .and_then(|x: i32| x.try_into().ok()), Variant::Double(f) => single_float_to_decimal::(f, 1f64) .and_then(|x: i32| x.try_into().ok()), - Variant::String(v) => Self::convert_string_to_decimal::<_, Decimal32Type>(v), + Variant::String(v) => Self::convert_string_to_decimal::(v), Variant::ShortString(v) => { - Self::convert_string_to_decimal::<_, Decimal32Type>(v.as_str()) + Self::convert_string_to_decimal::(v.as_str()) } Variant::Decimal4(decimal4) => Some(decimal4), Variant::Decimal8(decimal8) => decimal8.try_into().ok(), @@ -1291,9 +1294,9 @@ impl<'m, 'v> Variant<'m, 'v> { .and_then(|x: i64| x.try_into().ok()), Variant::Double(f) => single_float_to_decimal::(f, 1f64) .and_then(|x: i64| x.try_into().ok()), - Variant::String(v) => Self::convert_string_to_decimal::<_, Decimal64Type>(v), + Variant::String(v) => Self::convert_string_to_decimal::(v), Variant::ShortString(v) => { - Self::convert_string_to_decimal::<_, Decimal64Type>(v.as_str()) + Self::convert_string_to_decimal::(v.as_str()) } Variant::Decimal4(decimal4) => Some(decimal4.into()), Variant::Decimal8(decimal8) => Some(decimal8), @@ -1329,15 +1332,15 @@ impl<'m, 'v> Variant<'m, 'v> { match *self { Variant::Int8(_) | Variant::Int16(_) | Variant::Int32(_) | Variant::Int64(_) => self .as_num::() - .map(|x| (x as i128).try_into().ok()) + .map(|x| >::from(x).try_into().ok()) .unwrap(), Variant::Float(f) => single_float_to_decimal::(f as _, 1f64) .and_then(|x: i128| x.try_into().ok()), Variant::Double(f) => single_float_to_decimal::(f, 1f64) .and_then(|x: i128| x.try_into().ok()), - Variant::String(v) => Self::convert_string_to_decimal::<_, Decimal128Type>(v), + Variant::String(v) => Self::convert_string_to_decimal::(v), Variant::ShortString(v) => { - Self::convert_string_to_decimal::<_, Decimal128Type>(v.as_str()) + Self::convert_string_to_decimal::(v.as_str()) } Variant::Decimal4(decimal4) => Some(decimal4.into()), Variant::Decimal8(decimal8) => Some(decimal8.into()), From 98a4843c4d887af645b2dad89ea0f0a07e7f9624 Mon Sep 17 00:00:00 2001 From: klion26 Date: Tue, 14 Apr 2026 21:01:12 +0800 Subject: [PATCH 04/13] address comment --- .../src/type_conversion.rs | 16 ++--- parquet-variant/src/variant.rs | 60 +++++++++---------- 2 files changed, 35 insertions(+), 41 deletions(-) diff --git a/parquet-variant-compute/src/type_conversion.rs b/parquet-variant-compute/src/type_conversion.rs index e8b9104ad997..2255d4316b25 100644 --- a/parquet-variant-compute/src/type_conversion.rs +++ b/parquet-variant-compute/src/type_conversion.rs @@ -256,16 +256,12 @@ where ), Variant::Float(f) => single_float_to_decimal::(f64::from(*f), mul), Variant::Double(f) => single_float_to_decimal::(*f, mul), - Variant::String(v) if scale > 0 => parse_string_to_decimal_native::( - v, - >::try_into(scale).expect("scale is positive, would never fail"), - ) - .ok(), - Variant::ShortString(v) if scale > 0 => parse_string_to_decimal_native::( - v, - >::try_into(scale).expect("scale is positive, would never fail"), - ) - .ok(), + // arrow-cast only support cast string to decimal with scale >=0 for now + // Please see `cast_string_to_decimal` in arrow-cast/src/cast/decimal.rs for more detail + Variant::String(v) if scale >= 0 => parse_string_to_decimal_native::(v, scale as _).ok(), + Variant::ShortString(v) if scale >= 0 => { + parse_string_to_decimal_native::(v, scale as _).ok() + } Variant::Decimal4(d) => rescale_decimal::( d.integer(), VariantDecimal4::MAX_PRECISION, diff --git a/parquet-variant/src/variant.rs b/parquet-variant/src/variant.rs index 24f0a62d4a8f..f5cc0c1f9f47 100644 --- a/parquet-variant/src/variant.rs +++ b/parquet-variant/src/variant.rs @@ -309,16 +309,14 @@ enum NumericKind { trait DecimalCastTarget: NumCast + Default { const KIND: NumericKind; - fn arrow_type() -> DataType; + const ARROW_TYPE: DataType; } macro_rules! impl_decimal_cast_target { ($raw_type: ident, $target_kind:expr, $arrow_type: expr) => { impl DecimalCastTarget for $raw_type { const KIND: NumericKind = $target_kind; - fn arrow_type() -> DataType { - $arrow_type - } + const ARROW_TYPE: DataType = $arrow_type; } }; } @@ -847,22 +845,21 @@ impl<'m, 'v> Variant<'m, 'v> { { let base: D::Native = NumCast::from(10)?; - base.pow_checked(>::from(scale)) - .ok() - .and_then(|div| match T::KIND { - NumericKind::Integer => cast_single_decimal_to_integer::( - raw, - div, - >::from(scale), - T::arrow_type(), - ) - .ok(), - NumericKind::Float => T::from(single_decimal_to_float_lossy::( - &as_float, - raw, - >::from(scale), - )), - }) + let div = base.pow_checked(>::from(scale)).ok()?; + match T::KIND { + NumericKind::Integer => cast_single_decimal_to_integer::( + raw, + div, + >::from(scale), + T::ARROW_TYPE, + ) + .ok(), + NumericKind::Float => T::from(single_decimal_to_float_lossy::( + &as_float, + raw, + >::from(scale), + )), + } } /// Converts a boolean or numeric variant(integers, floating-point, and decimals) @@ -1193,9 +1190,7 @@ impl<'m, 'v> Variant<'m, 'v> { D::Native: NumCast + DecimalCast, { // find the last '.' - let scale_usize = input - .rsplit_once('.') - .map_or_else(|| 0, |(_, frac)| frac.len()); + let scale_usize = input.rsplit_once('.').map_or(0, |(_, frac)| frac.len()); let scale = u8::try_from(scale_usize).ok()?; @@ -1330,14 +1325,17 @@ impl<'m, 'v> Variant<'m, 'v> { /// ``` pub fn as_decimal16(&self) -> Option { match *self { - Variant::Int8(_) | Variant::Int16(_) | Variant::Int32(_) | Variant::Int64(_) => self - .as_num::() - .map(|x| >::from(x).try_into().ok()) - .unwrap(), - Variant::Float(f) => single_float_to_decimal::(f as _, 1f64) - .and_then(|x: i128| x.try_into().ok()), - Variant::Double(f) => single_float_to_decimal::(f, 1f64) - .and_then(|x: i128| x.try_into().ok()), + Variant::Int8(_) | Variant::Int16(_) | Variant::Int32(_) | Variant::Int64(_) => { + let x = self.as_num::()?; + >::from(x).try_into().ok() + } + Variant::Float(f) => { + single_float_to_decimal::(>::from(f), 1f64) + .and_then(|x| x.try_into().ok()) + } + Variant::Double(f) => { + single_float_to_decimal::(f, 1f64).and_then(|x| x.try_into().ok()) + } Variant::String(v) => Self::convert_string_to_decimal::(v), Variant::ShortString(v) => { Self::convert_string_to_decimal::(v.as_str()) From 153233fed9be2e5437e73e81bdd02b236ba54da1 Mon Sep 17 00:00:00 2001 From: klion26 Date: Wed, 15 Apr 2026 17:02:21 +0800 Subject: [PATCH 05/13] performance ok --- arrow-cast/src/cast/decimal.rs | 94 ++++++++++++++++++++++++---------- arrow-cast/src/cast/mod.rs | 2 +- parquet-variant/src/variant.rs | 10 ++-- 3 files changed, 71 insertions(+), 35 deletions(-) diff --git a/arrow-cast/src/cast/decimal.rs b/arrow-cast/src/cast/decimal.rs index 230a3b4960aa..d04e9dad7ffe 100644 --- a/arrow-cast/src/cast/decimal.rs +++ b/arrow-cast/src/cast/decimal.rs @@ -837,35 +837,74 @@ where let mut value_builder = PrimitiveBuilder::::with_capacity(array.len()); - // Helper macro for emitting nearly the same loop every time, so we can hoist branches out - // The compiler will specialize the resulting code (inlining and jump threading) - macro_rules! cast_loop { - (|$v: ident| $body:expr) => {{ - for i in 0..array.len() { - if array.is_null(i) { - value_builder.append_null(); - } else { - let $v = cast_single_decimal_to_integer::( - array.value(i), - div, - >::from(scale), - T::DATA_TYPE, - ); - $body + if scale < 0 { + match cast_options.safe { + true => { + for i in 0..array.len() { + if array.is_null(i) { + value_builder.append_null(); + } else { + let v = cast_single_decimal_to_integer::( + array.value(i), + div, + true, + T::DATA_TYPE, + ) + .ok(); + value_builder.append_option(v); + } + } + } + false => { + for i in 0..array.len() { + if array.is_null(i) { + value_builder.append_null(); + } else { + let value = cast_single_decimal_to_integer::( + array.value(i), + div, + true, + T::DATA_TYPE, + )?; + value_builder.append_value(value); + } } } - }}; - } - if scale < 0 { - if cast_options.safe { - cast_loop!(|v| value_builder.append_option(v.ok())); - } else { - cast_loop!(|v| value_builder.append_value(v?)); } - } else if cast_options.safe { - cast_loop!(|v| value_builder.append_option(v.ok())); } else { - cast_loop!(|v| value_builder.append_value(v?)); + match cast_options.safe { + true => { + for i in 0..array.len() { + if array.is_null(i) { + value_builder.append_null(); + } else { + let v = cast_single_decimal_to_integer::( + array.value(i), + div, + false, + T::DATA_TYPE, + ) + .ok(); + value_builder.append_option(v); + } + } + } + false => { + for i in 0..array.len() { + if array.is_null(i) { + value_builder.append_null(); + } else { + let value = cast_single_decimal_to_integer::( + array.value(i), + div, + false, + T::DATA_TYPE, + )?; + value_builder.append_value(value); + } + } + } + } } Ok(Arc::new(value_builder.finish())) @@ -874,10 +913,11 @@ where /// Casting a given decimal to an integer based on given div and scale. /// The value is scaled by multiplying or dividing with the div based on the scale sign. /// Returns `Err` if the value is overflow or cannot be represented with the requested precision. +#[inline] pub fn cast_single_decimal_to_integer( value: D::Native, div: D::Native, - scale: i16, + negative: bool, type_name: DataType, ) -> Result where @@ -885,7 +925,7 @@ where D: DecimalType + ArrowPrimitiveType, ::Native: ToPrimitive, { - let v = if scale < 0 { + let v = if negative { value.mul_checked(div)? } else { value.div_checked(div)? diff --git a/arrow-cast/src/cast/mod.rs b/arrow-cast/src/cast/mod.rs index 1679eaa586e9..9c4b6355fb8f 100644 --- a/arrow-cast/src/cast/mod.rs +++ b/arrow-cast/src/cast/mod.rs @@ -89,7 +89,7 @@ where D: DecimalType, F: Fn(D::Native) -> f64, { - f(x) * 10_f64.powi(-scale) + f(x) / 10_f64.powi(scale) } /// CastOptions provides a way to override the default cast behaviors diff --git a/parquet-variant/src/variant.rs b/parquet-variant/src/variant.rs index f5cc0c1f9f47..9772ed47c370 100644 --- a/parquet-variant/src/variant.rs +++ b/parquet-variant/src/variant.rs @@ -847,13 +847,9 @@ impl<'m, 'v> Variant<'m, 'v> { let div = base.pow_checked(>::from(scale)).ok()?; match T::KIND { - NumericKind::Integer => cast_single_decimal_to_integer::( - raw, - div, - >::from(scale), - T::ARROW_TYPE, - ) - .ok(), + NumericKind::Integer => { + cast_single_decimal_to_integer::(raw, div, false, T::ARROW_TYPE).ok() + } NumericKind::Float => T::from(single_decimal_to_float_lossy::( &as_float, raw, From 1a34e05e1b8eac18149644eea7925d6a067ca48a Mon Sep 17 00:00:00 2001 From: klion26 Date: Wed, 15 Apr 2026 20:37:16 +0800 Subject: [PATCH 06/13] improve performance back --- arrow-cast/src/cast/decimal.rs | 60 +++++++++++++++++++++++++--------- arrow-cast/src/cast/mod.rs | 4 +-- parquet-variant/src/variant.rs | 38 +++++++++------------ 3 files changed, 62 insertions(+), 40 deletions(-) diff --git a/arrow-cast/src/cast/decimal.rs b/arrow-cast/src/cast/decimal.rs index d04e9dad7ffe..289831035680 100644 --- a/arrow-cast/src/cast/decimal.rs +++ b/arrow-cast/src/cast/decimal.rs @@ -844,13 +844,11 @@ where if array.is_null(i) { value_builder.append_null(); } else { - let v = cast_single_decimal_to_integer::( + let v = cast_single_decimal_to_integer_opt::( array.value(i), div, true, - T::DATA_TYPE, - ) - .ok(); + ); value_builder.append_option(v); } } @@ -860,7 +858,7 @@ where if array.is_null(i) { value_builder.append_null(); } else { - let value = cast_single_decimal_to_integer::( + let value = cast_single_decimal_to_integer_result::( array.value(i), div, true, @@ -878,13 +876,11 @@ where if array.is_null(i) { value_builder.append_null(); } else { - let v = cast_single_decimal_to_integer::( + let v = cast_single_decimal_to_integer_opt::( array.value(i), div, false, - T::DATA_TYPE, - ) - .ok(); + ); value_builder.append_option(v); } } @@ -894,7 +890,7 @@ where if array.is_null(i) { value_builder.append_null(); } else { - let value = cast_single_decimal_to_integer::( + let value = cast_single_decimal_to_integer_result::( array.value(i), div, false, @@ -912,14 +908,49 @@ where /// Casting a given decimal to an integer based on given div and scale. /// The value is scaled by multiplying or dividing with the div based on the scale sign. -/// Returns `Err` if the value is overflow or cannot be represented with the requested precision. +/// Returns `None` if the value is overflow or cannot be represented with the requested precision. #[inline] -pub fn cast_single_decimal_to_integer( +pub fn cast_single_decimal_to_integer_opt( + value: D::Native, + div: D::Native, + negative: bool, +) -> Option +where + T: NumCast + ToPrimitive, + D: DecimalType + ArrowPrimitiveType, + ::Native: ToPrimitive, +{ + cast_single_decimal_to_integer::(value, div, negative) + .ok() + .flatten() +} + +#[inline] +fn cast_single_decimal_to_integer_result( value: D::Native, div: D::Native, negative: bool, type_name: DataType, ) -> Result +where + T: NumCast + ToPrimitive, + D: DecimalType + ArrowPrimitiveType, + ::Native: ToPrimitive, +{ + cast_single_decimal_to_integer::(value, div, negative)?.ok_or_else(|| { + ArrowError::CastError(format!( + "value of {:?} is out of range {:?}", + value, type_name + )) + }) +} + +#[inline] +fn cast_single_decimal_to_integer( + value: D::Native, + div: D::Native, + negative: bool, +) -> Result, ArrowError> where T: NumCast + ToPrimitive, D: DecimalType + ArrowPrimitiveType, @@ -930,10 +961,7 @@ where } else { value.div_checked(div)? }; - - T::from::(v).ok_or_else(|| { - ArrowError::CastError(format!("value of {:?} is out of range {:?}", v, type_name)) - }) + Ok(T::from::(v)) } /// Cast a decimal array to a floating point array. diff --git a/arrow-cast/src/cast/mod.rs b/arrow-cast/src/cast/mod.rs index 9c4b6355fb8f..4a784a65b4a5 100644 --- a/arrow-cast/src/cast/mod.rs +++ b/arrow-cast/src/cast/mod.rs @@ -73,8 +73,8 @@ use arrow_select::take::take; use num_traits::{NumCast, ToPrimitive, cast::AsPrimitive}; pub use decimal::{ - DecimalCast, cast_single_decimal_to_integer, parse_string_to_decimal_native, rescale_decimal, - single_float_to_decimal, + DecimalCast, cast_single_decimal_to_integer_opt, parse_string_to_decimal_native, + rescale_decimal, single_float_to_decimal, }; pub use string::cast_single_string_to_boolean_default; diff --git a/parquet-variant/src/variant.rs b/parquet-variant/src/variant.rs index 9772ed47c370..d17d785a79f7 100644 --- a/parquet-variant/src/variant.rs +++ b/parquet-variant/src/variant.rs @@ -31,15 +31,13 @@ use crate::path::{VariantPath, VariantPathElement}; use crate::utils::{first_byte_from_slice, slice_from_slice}; use arrow::array::ArrowNativeTypeOp; use arrow::compute::{ - DecimalCast, cast_num_to_bool, cast_single_decimal_to_integer, + DecimalCast, cast_num_to_bool, cast_single_decimal_to_integer_opt, cast_single_string_to_boolean_default, num_cast, parse_string_to_decimal_native, single_bool_to_numeric, single_decimal_to_float_lossy, single_float_to_decimal, }; use arrow::datatypes::{Decimal32Type, Decimal64Type, Decimal128Type, DecimalType}; -use arrow_schema::DataType::{ - Float16, Float32, Float64, Int8, Int16, Int32, Int64, UInt8, UInt16, UInt32, UInt64, -}; -use arrow_schema::{ArrowError, DataType}; + +use arrow_schema::ArrowError; use chrono::{DateTime, NaiveDate, NaiveDateTime, NaiveTime, Timelike, Utc}; use num_traits::NumCast; use std::ops::Deref; @@ -309,29 +307,27 @@ enum NumericKind { trait DecimalCastTarget: NumCast + Default { const KIND: NumericKind; - const ARROW_TYPE: DataType; } macro_rules! impl_decimal_cast_target { - ($raw_type: ident, $target_kind:expr, $arrow_type: expr) => { + ($raw_type: ident, $target_kind:expr) => { impl DecimalCastTarget for $raw_type { const KIND: NumericKind = $target_kind; - const ARROW_TYPE: DataType = $arrow_type; } }; } -impl_decimal_cast_target!(i8, NumericKind::Integer, Int8); -impl_decimal_cast_target!(i16, NumericKind::Integer, Int16); -impl_decimal_cast_target!(i32, NumericKind::Integer, Int32); -impl_decimal_cast_target!(i64, NumericKind::Integer, Int64); -impl_decimal_cast_target!(u8, NumericKind::Integer, UInt8); -impl_decimal_cast_target!(u16, NumericKind::Integer, UInt16); -impl_decimal_cast_target!(u32, NumericKind::Integer, UInt32); -impl_decimal_cast_target!(u64, NumericKind::Integer, UInt64); -impl_decimal_cast_target!(f16, NumericKind::Float, Float16); -impl_decimal_cast_target!(f32, NumericKind::Float, Float32); -impl_decimal_cast_target!(f64, NumericKind::Float, Float64); +impl_decimal_cast_target!(i8, NumericKind::Integer); +impl_decimal_cast_target!(i16, NumericKind::Integer); +impl_decimal_cast_target!(i32, NumericKind::Integer); +impl_decimal_cast_target!(i64, NumericKind::Integer); +impl_decimal_cast_target!(u8, NumericKind::Integer); +impl_decimal_cast_target!(u16, NumericKind::Integer); +impl_decimal_cast_target!(u32, NumericKind::Integer); +impl_decimal_cast_target!(u64, NumericKind::Integer); +impl_decimal_cast_target!(f16, NumericKind::Float); +impl_decimal_cast_target!(f32, NumericKind::Float); +impl_decimal_cast_target!(f64, NumericKind::Float); impl<'m, 'v> Variant<'m, 'v> { /// Attempts to interpret a metadata and value buffer pair as a new `Variant`. @@ -847,9 +843,7 @@ impl<'m, 'v> Variant<'m, 'v> { let div = base.pow_checked(>::from(scale)).ok()?; match T::KIND { - NumericKind::Integer => { - cast_single_decimal_to_integer::(raw, div, false, T::ARROW_TYPE).ok() - } + NumericKind::Integer => cast_single_decimal_to_integer_opt::(raw, div, false), NumericKind::Float => T::from(single_decimal_to_float_lossy::( &as_float, raw, From e22298662e3260a11cf1ea82ce1955765438538c Mon Sep 17 00:00:00 2001 From: klion26 Date: Wed, 15 Apr 2026 21:03:55 +0800 Subject: [PATCH 07/13] fix erro msg --- arrow-cast/src/cast/decimal.rs | 32 +++++++++----------------------- 1 file changed, 9 insertions(+), 23 deletions(-) diff --git a/arrow-cast/src/cast/decimal.rs b/arrow-cast/src/cast/decimal.rs index 289831035680..9a025479e04c 100644 --- a/arrow-cast/src/cast/decimal.rs +++ b/arrow-cast/src/cast/decimal.rs @@ -920,9 +920,12 @@ where D: DecimalType + ArrowPrimitiveType, ::Native: ToPrimitive, { - cast_single_decimal_to_integer::(value, div, negative) - .ok() - .flatten() + let v = if negative { + value.mul_checked(div).ok()? + } else { + value.div_checked(div).ok()? + }; + T::from::(v) } #[inline] @@ -932,25 +935,6 @@ fn cast_single_decimal_to_integer_result( negative: bool, type_name: DataType, ) -> Result -where - T: NumCast + ToPrimitive, - D: DecimalType + ArrowPrimitiveType, - ::Native: ToPrimitive, -{ - cast_single_decimal_to_integer::(value, div, negative)?.ok_or_else(|| { - ArrowError::CastError(format!( - "value of {:?} is out of range {:?}", - value, type_name - )) - }) -} - -#[inline] -fn cast_single_decimal_to_integer( - value: D::Native, - div: D::Native, - negative: bool, -) -> Result, ArrowError> where T: NumCast + ToPrimitive, D: DecimalType + ArrowPrimitiveType, @@ -961,7 +945,9 @@ where } else { value.div_checked(div)? }; - Ok(T::from::(v)) + T::from::(v).ok_or_else(|| { + ArrowError::CastError(format!("value of {:?} is out of range {:?}", v, type_name)) + }) } /// Cast a decimal array to a floating point array. From c74d244c984bf0b3f30c6379b82dd5986fbdfb53 Mon Sep 17 00:00:00 2001 From: klion26 Date: Thu, 23 Apr 2026 10:58:18 +0800 Subject: [PATCH 08/13] change used to debug benchmark --- arrow-cast/src/cast/decimal.rs | 13 +++++-------- parquet-variant/src/variant.rs | 2 +- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/arrow-cast/src/cast/decimal.rs b/arrow-cast/src/cast/decimal.rs index 9a025479e04c..2de6295149c6 100644 --- a/arrow-cast/src/cast/decimal.rs +++ b/arrow-cast/src/cast/decimal.rs @@ -844,10 +844,9 @@ where if array.is_null(i) { value_builder.append_null(); } else { - let v = cast_single_decimal_to_integer_opt::( + let v = cast_single_decimal_to_integer_opt::( array.value(i), div, - true, ); value_builder.append_option(v); } @@ -876,10 +875,9 @@ where if array.is_null(i) { value_builder.append_null(); } else { - let v = cast_single_decimal_to_integer_opt::( + let v = cast_single_decimal_to_integer_opt::( array.value(i), div, - false, ); value_builder.append_option(v); } @@ -910,22 +908,21 @@ where /// The value is scaled by multiplying or dividing with the div based on the scale sign. /// Returns `None` if the value is overflow or cannot be represented with the requested precision. #[inline] -pub fn cast_single_decimal_to_integer_opt( +pub fn cast_single_decimal_to_integer_opt( value: D::Native, div: D::Native, - negative: bool, ) -> Option where T: NumCast + ToPrimitive, D: DecimalType + ArrowPrimitiveType, ::Native: ToPrimitive, { - let v = if negative { + let v = if NEGATIVE_SCALE { value.mul_checked(div).ok()? } else { value.div_checked(div).ok()? }; - T::from::(v) + ::from::(v) } #[inline] diff --git a/parquet-variant/src/variant.rs b/parquet-variant/src/variant.rs index d17d785a79f7..6656d0cdb01d 100644 --- a/parquet-variant/src/variant.rs +++ b/parquet-variant/src/variant.rs @@ -843,7 +843,7 @@ impl<'m, 'v> Variant<'m, 'v> { let div = base.pow_checked(>::from(scale)).ok()?; match T::KIND { - NumericKind::Integer => cast_single_decimal_to_integer_opt::(raw, div, false), + NumericKind::Integer => cast_single_decimal_to_integer_opt::(raw, div), NumericKind::Float => T::from(single_decimal_to_float_lossy::( &as_float, raw, From 70918ceded2fd499c4c8969c759f956d18deba88 Mon Sep 17 00:00:00 2001 From: klion26 Date: Fri, 24 Apr 2026 11:27:38 +0800 Subject: [PATCH 09/13] change inline to inline(always) --- arrow-cast/src/cast/decimal.rs | 17 +++++++---------- arrow-cast/src/cast/mod.rs | 2 +- 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/arrow-cast/src/cast/decimal.rs b/arrow-cast/src/cast/decimal.rs index 2de6295149c6..ac35344aaae6 100644 --- a/arrow-cast/src/cast/decimal.rs +++ b/arrow-cast/src/cast/decimal.rs @@ -804,7 +804,7 @@ where /// Cast a single floating point value to a decimal native with the given multiple. /// Returns `None` if the value cannot be represented with the requested precision. -#[inline] +#[inline(always)] pub fn single_float_to_decimal(input: f64, mul: f64) -> Option where D: DecimalType + ArrowPrimitiveType, @@ -857,10 +857,9 @@ where if array.is_null(i) { value_builder.append_null(); } else { - let value = cast_single_decimal_to_integer_result::( + let value = cast_single_decimal_to_integer_result::( array.value(i), div, - true, T::DATA_TYPE, )?; value_builder.append_value(value); @@ -888,10 +887,9 @@ where if array.is_null(i) { value_builder.append_null(); } else { - let value = cast_single_decimal_to_integer_result::( + let value = cast_single_decimal_to_integer_result::( array.value(i), div, - false, T::DATA_TYPE, )?; value_builder.append_value(value); @@ -907,7 +905,7 @@ where /// Casting a given decimal to an integer based on given div and scale. /// The value is scaled by multiplying or dividing with the div based on the scale sign. /// Returns `None` if the value is overflow or cannot be represented with the requested precision. -#[inline] +#[inline(always)] pub fn cast_single_decimal_to_integer_opt( value: D::Native, div: D::Native, @@ -925,11 +923,10 @@ where ::from::(v) } -#[inline] -fn cast_single_decimal_to_integer_result( +#[inline(always)] +fn cast_single_decimal_to_integer_result( value: D::Native, div: D::Native, - negative: bool, type_name: DataType, ) -> Result where @@ -937,7 +934,7 @@ where D: DecimalType + ArrowPrimitiveType, ::Native: ToPrimitive, { - let v = if negative { + let v = if NEGATIVE_SCALE { value.mul_checked(div)? } else { value.div_checked(div)? diff --git a/arrow-cast/src/cast/mod.rs b/arrow-cast/src/cast/mod.rs index 4a784a65b4a5..78c2067aaf7f 100644 --- a/arrow-cast/src/cast/mod.rs +++ b/arrow-cast/src/cast/mod.rs @@ -83,7 +83,7 @@ pub use string::cast_single_string_to_boolean_default; /// Conversion is lossy and follows standard floating point semantics. Values /// that exceed the representable range become `INFINITY` or `-INFINITY` without /// returning an error. -#[inline] +#[inline(always)] pub fn single_decimal_to_float_lossy(f: &F, x: D::Native, scale: i32) -> f64 where D: DecimalType, From ee6e6829b33d323fdc7aff48a97b888df0c5c808 Mon Sep 17 00:00:00 2001 From: klion26 Date: Fri, 24 Apr 2026 17:52:48 +0800 Subject: [PATCH 10/13] debug benchmark: revert decimal_to_integer logic --- arrow-cast/src/cast/decimal.rs | 72 +++++++++++++++------------------- 1 file changed, 32 insertions(+), 40 deletions(-) diff --git a/arrow-cast/src/cast/decimal.rs b/arrow-cast/src/cast/decimal.rs index ac35344aaae6..eb15e108356c 100644 --- a/arrow-cast/src/cast/decimal.rs +++ b/arrow-cast/src/cast/decimal.rs @@ -844,10 +844,11 @@ where if array.is_null(i) { value_builder.append_null(); } else { - let v = cast_single_decimal_to_integer_opt::( - array.value(i), - div, - ); + let v = array + .value(i) + .mul_checked(div) + .ok() + .and_then(::from::); value_builder.append_option(v); } } @@ -857,11 +858,17 @@ where if array.is_null(i) { value_builder.append_null(); } else { - let value = cast_single_decimal_to_integer_result::( - array.value(i), - div, - T::DATA_TYPE, - )?; + let v = array.value(i).mul_checked(div)?; + + let value = + ::from::(v).ok_or_else(|| { + ArrowError::CastError(format!( + "value of {:?} is out of range {}", + v, + T::DATA_TYPE + )) + })?; + value_builder.append_value(value); } } @@ -874,10 +881,11 @@ where if array.is_null(i) { value_builder.append_null(); } else { - let v = cast_single_decimal_to_integer_opt::( - array.value(i), - div, - ); + let v = array + .value(i) + .div_checked(div) + .ok() + .and_then(::from::); value_builder.append_option(v); } } @@ -887,18 +895,23 @@ where if array.is_null(i) { value_builder.append_null(); } else { - let value = cast_single_decimal_to_integer_result::( - array.value(i), - div, - T::DATA_TYPE, - )?; + let v = array.value(i).div_checked(div)?; + + let value = + ::from::(v).ok_or_else(|| { + ArrowError::CastError(format!( + "value of {:?} is out of range {}", + v, + T::DATA_TYPE + )) + })?; + value_builder.append_value(value); } } } } } - Ok(Arc::new(value_builder.finish())) } @@ -923,27 +936,6 @@ where ::from::(v) } -#[inline(always)] -fn cast_single_decimal_to_integer_result( - value: D::Native, - div: D::Native, - type_name: DataType, -) -> Result -where - T: NumCast + ToPrimitive, - D: DecimalType + ArrowPrimitiveType, - ::Native: ToPrimitive, -{ - let v = if NEGATIVE_SCALE { - value.mul_checked(div)? - } else { - value.div_checked(div)? - }; - T::from::(v).ok_or_else(|| { - ArrowError::CastError(format!("value of {:?} is out of range {:?}", v, type_name)) - }) -} - /// Cast a decimal array to a floating point array. /// /// Conversion is lossy and follows standard floating point semantics. Values From a0fde6a37d9b649e52d470c71fa4eaa66f6d84ea Mon Sep 17 00:00:00 2001 From: klion26 Date: Fri, 24 Apr 2026 19:39:58 +0800 Subject: [PATCH 11/13] debug benchmark: seperate cast_single_decimal_to_integer_opt to two different for positive and negative scale --- arrow-cast/src/cast/decimal.rs | 47 ++++++++++++++++++---------------- arrow-cast/src/cast/mod.rs | 2 +- parquet-variant/src/variant.rs | 4 +-- 3 files changed, 28 insertions(+), 25 deletions(-) diff --git a/arrow-cast/src/cast/decimal.rs b/arrow-cast/src/cast/decimal.rs index eb15e108356c..ec5599f18c2e 100644 --- a/arrow-cast/src/cast/decimal.rs +++ b/arrow-cast/src/cast/decimal.rs @@ -844,11 +844,10 @@ where if array.is_null(i) { value_builder.append_null(); } else { - let v = array - .value(i) - .mul_checked(div) - .ok() - .and_then(::from::); + let v = cast_single_decimal_to_integer_mul_opt::( + array.value(i), + div, + ); value_builder.append_option(v); } } @@ -881,11 +880,10 @@ where if array.is_null(i) { value_builder.append_null(); } else { - let v = array - .value(i) - .div_checked(div) - .ok() - .and_then(::from::); + let v = cast_single_decimal_to_integer_div_opt::( + array.value(i), + div, + ); value_builder.append_option(v); } } @@ -915,24 +913,29 @@ where Ok(Arc::new(value_builder.finish())) } -/// Casting a given decimal to an integer based on given div and scale. -/// The value is scaled by multiplying or dividing with the div based on the scale sign. -/// Returns `None` if the value is overflow or cannot be represented with the requested precision. +/// Casting a given decimal to an integer by multiplying with the given factor. +/// Returns `None` if checked multiplication overflows or the target cast fails. #[inline(always)] -pub fn cast_single_decimal_to_integer_opt( - value: D::Native, - div: D::Native, -) -> Option +pub fn cast_single_decimal_to_integer_mul_opt(value: D::Native, mul: D::Native) -> Option where T: NumCast + ToPrimitive, D: DecimalType + ArrowPrimitiveType, ::Native: ToPrimitive, { - let v = if NEGATIVE_SCALE { - value.mul_checked(div).ok()? - } else { - value.div_checked(div).ok()? - }; + let v = value.mul_checked(mul).ok()?; + ::from::(v) +} + +/// Casting a given decimal to an integer by dividing with the given divisor. +/// Returns `None` if checked division fails or the target cast fails. +#[inline(always)] +pub fn cast_single_decimal_to_integer_div_opt(value: D::Native, div: D::Native) -> Option +where + T: NumCast + ToPrimitive, + D: DecimalType + ArrowPrimitiveType, + ::Native: ToPrimitive, +{ + let v = value.div_checked(div).ok()?; ::from::(v) } diff --git a/arrow-cast/src/cast/mod.rs b/arrow-cast/src/cast/mod.rs index 78c2067aaf7f..82326a17698a 100644 --- a/arrow-cast/src/cast/mod.rs +++ b/arrow-cast/src/cast/mod.rs @@ -73,7 +73,7 @@ use arrow_select::take::take; use num_traits::{NumCast, ToPrimitive, cast::AsPrimitive}; pub use decimal::{ - DecimalCast, cast_single_decimal_to_integer_opt, parse_string_to_decimal_native, + DecimalCast, cast_single_decimal_to_integer_div_opt, parse_string_to_decimal_native, rescale_decimal, single_float_to_decimal, }; pub use string::cast_single_string_to_boolean_default; diff --git a/parquet-variant/src/variant.rs b/parquet-variant/src/variant.rs index 6656d0cdb01d..2ccac661dad1 100644 --- a/parquet-variant/src/variant.rs +++ b/parquet-variant/src/variant.rs @@ -31,7 +31,7 @@ use crate::path::{VariantPath, VariantPathElement}; use crate::utils::{first_byte_from_slice, slice_from_slice}; use arrow::array::ArrowNativeTypeOp; use arrow::compute::{ - DecimalCast, cast_num_to_bool, cast_single_decimal_to_integer_opt, + DecimalCast, cast_num_to_bool, cast_single_decimal_to_integer_div_opt, cast_single_string_to_boolean_default, num_cast, parse_string_to_decimal_native, single_bool_to_numeric, single_decimal_to_float_lossy, single_float_to_decimal, }; @@ -843,7 +843,7 @@ impl<'m, 'v> Variant<'m, 'v> { let div = base.pow_checked(>::from(scale)).ok()?; match T::KIND { - NumericKind::Integer => cast_single_decimal_to_integer_opt::(raw, div), + NumericKind::Integer => cast_single_decimal_to_integer_div_opt::(raw, div), NumericKind::Float => T::from(single_decimal_to_float_lossy::( &as_float, raw, From 305797a67b9ff4f8d804e4a4cdd7d0cdcae24495 Mon Sep 17 00:00:00 2001 From: klion26 Date: Fri, 24 Apr 2026 20:41:26 +0800 Subject: [PATCH 12/13] debug benchmark: float to decimal --- arrow-cast/src/cast/decimal.rs | 26 +++++++------------------- 1 file changed, 7 insertions(+), 19 deletions(-) diff --git a/arrow-cast/src/cast/decimal.rs b/arrow-cast/src/cast/decimal.rs index ec5599f18c2e..63627c80019d 100644 --- a/arrow-cast/src/cast/decimal.rs +++ b/arrow-cast/src/cast/decimal.rs @@ -777,7 +777,7 @@ where if cast_options.safe { array .unary_opt::<_, D>(|v| { - single_float_to_decimal::(v.as_(), mul) + D::Native::from_f64((mul * v.as_()).round()) .filter(|v| D::is_valid_decimal_precision(*v, precision)) }) .with_precision_and_scale(precision, scale) @@ -785,7 +785,7 @@ where } else { array .try_unary::<_, D, _>(|v| { - single_float_to_decimal::(v.as_(), mul) + D::Native::from_f64((mul * v.as_()).round()) .ok_or_else(|| { ArrowError::CastError(format!( "Cannot cast to {}({}, {}). Overflowing on {:?}", @@ -844,10 +844,11 @@ where if array.is_null(i) { value_builder.append_null(); } else { - let v = cast_single_decimal_to_integer_mul_opt::( - array.value(i), - div, - ); + let v = array + .value(i) + .mul_checked(div) + .ok() + .and_then(::from::); value_builder.append_option(v); } } @@ -913,19 +914,6 @@ where Ok(Arc::new(value_builder.finish())) } -/// Casting a given decimal to an integer by multiplying with the given factor. -/// Returns `None` if checked multiplication overflows or the target cast fails. -#[inline(always)] -pub fn cast_single_decimal_to_integer_mul_opt(value: D::Native, mul: D::Native) -> Option -where - T: NumCast + ToPrimitive, - D: DecimalType + ArrowPrimitiveType, - ::Native: ToPrimitive, -{ - let v = value.mul_checked(mul).ok()?; - ::from::(v) -} - /// Casting a given decimal to an integer by dividing with the given divisor. /// Returns `None` if checked division fails or the target cast fails. #[inline(always)] From 8c23792d3b4422663e2929c40bec54c4279a425e Mon Sep 17 00:00:00 2001 From: klion26 Date: Fri, 24 Apr 2026 21:57:07 +0800 Subject: [PATCH 13/13] reuse single_float_to_decimal when cast float to decimal --- arrow-cast/src/cast/decimal.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arrow-cast/src/cast/decimal.rs b/arrow-cast/src/cast/decimal.rs index 63627c80019d..db18d19df8ef 100644 --- a/arrow-cast/src/cast/decimal.rs +++ b/arrow-cast/src/cast/decimal.rs @@ -777,7 +777,7 @@ where if cast_options.safe { array .unary_opt::<_, D>(|v| { - D::Native::from_f64((mul * v.as_()).round()) + single_float_to_decimal::(v.as_(), mul) .filter(|v| D::is_valid_decimal_precision(*v, precision)) }) .with_precision_and_scale(precision, scale) @@ -785,7 +785,7 @@ where } else { array .try_unary::<_, D, _>(|v| { - D::Native::from_f64((mul * v.as_()).round()) + single_float_to_decimal::(v.as_(), mul) .ok_or_else(|| { ArrowError::CastError(format!( "Cannot cast to {}({}, {}). Overflowing on {:?}",