diff --git a/arrow-cast/src/cast/decimal.rs b/arrow-cast/src/cast/decimal.rs index 3553f2b6a76f..db18d19df8ef 100644 --- a/arrow-cast/src/cast/decimal.rs +++ b/arrow-cast/src/cast/decimal.rs @@ -531,7 +531,7 @@ where /// Parses given string to specified decimal native (i128/i256) based on given /// scale. Returns an `Err` if it cannot parse given string. -pub(crate) fn parse_string_to_decimal_native( +pub fn parse_string_to_decimal_native( value_str: &str, scale: usize, ) -> Result @@ -777,7 +777,7 @@ where if cast_options.safe { array .unary_opt::<_, D>(|v| { - D::Native::from_f64((mul * v.as_()).round()) + single_float_to_decimal::(v.as_(), mul) .filter(|v| D::is_valid_decimal_precision(*v, precision)) }) .with_precision_and_scale(precision, scale) @@ -785,7 +785,7 @@ where } else { array .try_unary::<_, D, _>(|v| { - D::Native::from_f64((mul * v.as_()).round()) + single_float_to_decimal::(v.as_(), mul) .ok_or_else(|| { ArrowError::CastError(format!( "Cannot cast to {}({}, {}). Overflowing on {:?}", @@ -802,6 +802,17 @@ where } } +/// Cast a single floating point value to a decimal native with the given multiple. +/// Returns `None` if the value cannot be represented with the requested precision. +#[inline(always)] +pub fn single_float_to_decimal(input: f64, mul: f64) -> Option +where + D: DecimalType + ArrowPrimitiveType, + ::Native: DecimalCast, +{ + D::Native::from_f64((mul * input).round()) +} + pub(crate) fn cast_decimal_to_integer( array: &dyn Array, base: D::Native, @@ -870,11 +881,10 @@ where if array.is_null(i) { value_builder.append_null(); } else { - let v = array - .value(i) - .div_checked(div) - .ok() - .and_then(::from::); + let v = cast_single_decimal_to_integer_div_opt::( + array.value(i), + div, + ); value_builder.append_option(v); } } @@ -904,6 +914,19 @@ where Ok(Arc::new(value_builder.finish())) } +/// Casting a given decimal to an integer by dividing with the given divisor. +/// Returns `None` if checked division fails or the target cast fails. +#[inline(always)] +pub fn cast_single_decimal_to_integer_div_opt(value: D::Native, div: D::Native) -> Option +where + T: NumCast + ToPrimitive, + D: DecimalType + ArrowPrimitiveType, + ::Native: ToPrimitive, +{ + let v = value.div_checked(div).ok()?; + ::from::(v) +} + /// Cast a decimal array to a floating point array. /// /// Conversion is lossy and follows standard floating point semantics. Values diff --git a/arrow-cast/src/cast/mod.rs b/arrow-cast/src/cast/mod.rs index 5f08dcbfc138..82326a17698a 100644 --- a/arrow-cast/src/cast/mod.rs +++ b/arrow-cast/src/cast/mod.rs @@ -72,9 +72,26 @@ use arrow_schema::*; use arrow_select::take::take; use num_traits::{NumCast, ToPrimitive, cast::AsPrimitive}; -pub use decimal::{DecimalCast, rescale_decimal}; +pub use decimal::{ + DecimalCast, cast_single_decimal_to_integer_div_opt, parse_string_to_decimal_native, + rescale_decimal, single_float_to_decimal, +}; pub use string::cast_single_string_to_boolean_default; +/// Lossy conversion from decimal to float. +/// +/// Conversion is lossy and follows standard floating point semantics. Values +/// that exceed the representable range become `INFINITY` or `-INFINITY` without +/// returning an error. +#[inline(always)] +pub fn single_decimal_to_float_lossy(f: &F, x: D::Native, scale: i32) -> f64 +where + D: DecimalType, + F: Fn(D::Native) -> f64, +{ + f(x) / 10_f64.powi(scale) +} + /// CastOptions provides a way to override the default cast behaviors #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct CastOptions<'a> { @@ -2314,10 +2331,11 @@ where Int32 => cast_decimal_to_integer::(array, base, *scale, cast_options), Int64 => cast_decimal_to_integer::(array, base, *scale, cast_options), Float32 => cast_decimal_to_float::(array, |x| { - (as_float(x) / 10_f64.powi(*scale as i32)) as f32 + single_decimal_to_float_lossy::(&as_float, x, >::from(*scale)) + as f32 }), Float64 => cast_decimal_to_float::(array, |x| { - as_float(x) / 10_f64.powi(*scale as i32) + single_decimal_to_float_lossy::(&as_float, x, >::from(*scale)) }), Utf8View => value_to_string_view(array, cast_options), Utf8 => value_to_string::(array, cast_options), diff --git a/parquet-variant-compute/src/type_conversion.rs b/parquet-variant-compute/src/type_conversion.rs index 7b9eb67d1a95..2255d4316b25 100644 --- a/parquet-variant-compute/src/type_conversion.rs +++ b/parquet-variant-compute/src/type_conversion.rs @@ -17,7 +17,10 @@ //! Module for transforming a typed arrow `Array` to `VariantArray`. -use arrow::compute::{CastOptions, DecimalCast, rescale_decimal}; +use arrow::compute::{ + CastOptions, DecimalCast, parse_string_to_decimal_native, rescale_decimal, + single_float_to_decimal, +}; use arrow::datatypes::{ self, ArrowPrimitiveType, ArrowTimestampType, Decimal32Type, Decimal64Type, Decimal128Type, DecimalType, @@ -204,9 +207,12 @@ impl_timestamp_from_variant!( /// /// - `precision` and `scale` specify the target Arrow decimal parameters /// - Integer variants (`Int8/16/32/64`) are treated as decimals with scale 0 +/// - Floating point variants (`Float/Double`) are converted to decimals with the given scale +/// - String variants (`String/ShortString`) are parsed as decimals with the given scale /// - Decimal variants (`Decimal4/8/16`) use their embedded precision and scale /// -/// The value is rescaled to (`precision`, `scale`) using `rescale_decimal` and +/// The value is rescaled to (`precision`, `scale`) using `rescale_decimal` for integers, +/// `single_float_to_decimal` for floats, and `parse_string_to_decimal_native` for strings. /// returns `None` if it cannot fit the requested precision. pub(crate) fn variant_to_unscaled_decimal( variant: &Variant<'_, '_>, @@ -217,6 +223,8 @@ where O: DecimalType, O::Native: DecimalCast, { + let mul = 10_f64.powi(scale as i32); + match variant { Variant::Int8(i) => rescale_decimal::( *i as i32, @@ -246,6 +254,14 @@ where precision, scale, ), + Variant::Float(f) => single_float_to_decimal::(f64::from(*f), mul), + Variant::Double(f) => single_float_to_decimal::(*f, mul), + // arrow-cast only support cast string to decimal with scale >=0 for now + // Please see `cast_string_to_decimal` in arrow-cast/src/cast/decimal.rs for more detail + Variant::String(v) if scale >= 0 => parse_string_to_decimal_native::(v, scale as _).ok(), + Variant::ShortString(v) if scale >= 0 => { + parse_string_to_decimal_native::(v, scale as _).ok() + } Variant::Decimal4(d) => rescale_decimal::( d.integer(), VariantDecimal4::MAX_PRECISION, diff --git a/parquet-variant/src/variant.rs b/parquet-variant/src/variant.rs index accff009045a..2ccac661dad1 100644 --- a/parquet-variant/src/variant.rs +++ b/parquet-variant/src/variant.rs @@ -29,9 +29,14 @@ use crate::decoder::{ }; use crate::path::{VariantPath, VariantPathElement}; use crate::utils::{first_byte_from_slice, slice_from_slice}; +use arrow::array::ArrowNativeTypeOp; use arrow::compute::{ - cast_num_to_bool, cast_single_string_to_boolean_default, num_cast, single_bool_to_numeric, + DecimalCast, cast_num_to_bool, cast_single_decimal_to_integer_div_opt, + cast_single_string_to_boolean_default, num_cast, parse_string_to_decimal_native, + single_bool_to_numeric, single_decimal_to_float_lossy, single_float_to_decimal, }; +use arrow::datatypes::{Decimal32Type, Decimal64Type, Decimal128Type, DecimalType}; + use arrow_schema::ArrowError; use chrono::{DateTime, NaiveDate, NaiveDateTime, NaiveTime, Timelike, Utc}; use num_traits::NumCast; @@ -166,10 +171,11 @@ impl Deref for ShortString<'_> { /// Arrow UTF8-to-boolean cast rules. /// - Numeric accessors such as [`Self::as_int8`], [`Self::as_int64`], [`Self::as_u8`], /// [`Self::as_u64`], [`Self::as_f16`], [`Self::as_f32`], and [`Self::as_f64`] accept -/// boolean and numeric variants (integers, floating-point, and decimals with scale `0`). +/// boolean and numeric variants (integers, floating-point, and decimals). /// They return `None` when conversion is not possible. /// - Decimal accessors such as [`Self::as_decimal4`], [`Self::as_decimal8`], and -/// [`Self::as_decimal16`] accept compatible decimal variants and integer variants. +/// [`Self::as_decimal16`] accept compatible decimal variants, integer variants, +/// float variants and string variants. /// They return `None` when conversion is not possible. /// /// # Examples: @@ -294,6 +300,35 @@ pub enum Variant<'m, 'v> { // We don't want this to grow because it could hurt performance of a frequently-created type. const _: () = crate::utils::expect_size_of::(80); +enum NumericKind { + Integer, + Float, +} + +trait DecimalCastTarget: NumCast + Default { + const KIND: NumericKind; +} + +macro_rules! impl_decimal_cast_target { + ($raw_type: ident, $target_kind:expr) => { + impl DecimalCastTarget for $raw_type { + const KIND: NumericKind = $target_kind; + } + }; +} + +impl_decimal_cast_target!(i8, NumericKind::Integer); +impl_decimal_cast_target!(i16, NumericKind::Integer); +impl_decimal_cast_target!(i32, NumericKind::Integer); +impl_decimal_cast_target!(i64, NumericKind::Integer); +impl_decimal_cast_target!(u8, NumericKind::Integer); +impl_decimal_cast_target!(u16, NumericKind::Integer); +impl_decimal_cast_target!(u32, NumericKind::Integer); +impl_decimal_cast_target!(u64, NumericKind::Integer); +impl_decimal_cast_target!(f16, NumericKind::Float); +impl_decimal_cast_target!(f32, NumericKind::Float); +impl_decimal_cast_target!(f64, NumericKind::Float); + impl<'m, 'v> Variant<'m, 'v> { /// Attempts to interpret a metadata and value buffer pair as a new `Variant`. /// @@ -797,14 +832,34 @@ impl<'m, 'v> Variant<'m, 'v> { } } - /// Converts a boolean or numeric variant(integers, floating-point, and decimals with scale 0) + fn cast_decimal_to_num(raw: D::Native, scale: u8, as_float: F) -> Option + where + D: DecimalType, + D::Native: NumCast + ArrowNativeTypeOp, + T: DecimalCastTarget, + F: Fn(D::Native) -> f64, + { + let base: D::Native = NumCast::from(10)?; + + let div = base.pow_checked(>::from(scale)).ok()?; + match T::KIND { + NumericKind::Integer => cast_single_decimal_to_integer_div_opt::(raw, div), + NumericKind::Float => T::from(single_decimal_to_float_lossy::( + &as_float, + raw, + >::from(scale), + )), + } + } + + /// Converts a boolean or numeric variant(integers, floating-point, and decimals) /// to the specified numeric type `T`. /// /// Uses Arrow's casting logic to perform the conversion. Returns `Some(T)` if /// the conversion succeeds, `None` if the variant can't be casted to type `T`. fn as_num(&self) -> Option where - T: NumCast + Default, + T: DecimalCastTarget, { match *self { Variant::BooleanFalse => single_bool_to_numeric(false), @@ -815,9 +870,21 @@ impl<'m, 'v> Variant<'m, 'v> { Variant::Int64(i) => num_cast(i), Variant::Float(f) => num_cast(f), Variant::Double(d) => num_cast(d), - Variant::Decimal4(d) if d.scale() == 0 => num_cast(d.integer()), - Variant::Decimal8(d) if d.scale() == 0 => num_cast(d.integer()), - Variant::Decimal16(d) if d.scale() == 0 => num_cast(d.integer()), + Variant::Decimal4(d) => { + Self::cast_decimal_to_num::(d.integer(), d.scale(), |x| { + x as f64 + }) + } + Variant::Decimal8(d) => { + Self::cast_decimal_to_num::(d.integer(), d.scale(), |x| { + x as f64 + }) + } + Variant::Decimal16(d) => { + Self::cast_decimal_to_num::(d.integer(), d.scale(), |x| { + x as f64 + }) + } _ => None, } } @@ -962,17 +1029,17 @@ impl<'m, 'v> Variant<'m, 'v> { /// let v2 = Variant::from(d); /// assert_eq!(v2.as_u8(), Some(26u8)); /// + /// // or a variant that decimal with scale not equal to zero + /// let d = VariantDecimal4::try_new(123, 2).unwrap(); + /// let v3 = Variant::from(d); + /// assert_eq!(v3.as_u8(), Some(1)); + /// /// // or from boolean variant - /// let v3 = Variant::BooleanFalse; - /// assert_eq!(v3.as_u8(), Some(0)); + /// let v4 = Variant::BooleanFalse; + /// assert_eq!(v4.as_u8(), Some(0)); /// /// // but not a variant that can't fit into the range - /// let v4 = Variant::from(-1); - /// assert_eq!(v4.as_u8(), None); - /// - /// // not a variant that decimal with scale not equal to zero - /// let d = VariantDecimal4::try_new(1, 2).unwrap(); - /// let v5 = Variant::from(d); + /// let v5 = Variant::from(-1); /// assert_eq!(v5.as_u8(), None); /// /// // or not a variant that cannot be cast into an integer @@ -1003,17 +1070,17 @@ impl<'m, 'v> Variant<'m, 'v> { /// let v2 = Variant::from(d); /// assert_eq!(v2.as_u16(), Some(u16::MAX)); /// + /// // or a variant that decimal with scale not equal to zero + /// let d = VariantDecimal4::try_new(123, 2).unwrap(); + /// let v3 = Variant::from(d); + /// assert_eq!(v3.as_u16(), Some(1)); + /// /// // or from boolean variant - /// let v3= Variant::BooleanFalse; - /// assert_eq!(v3.as_u16(), Some(0)); + /// let v4= Variant::BooleanFalse; + /// assert_eq!(v4.as_u16(), Some(0)); /// /// // but not a variant that can't fit into the range - /// let v4 = Variant::from(-1); - /// assert_eq!(v4.as_u16(), None); - /// - /// // not a variant that decimal with scale not equal to zero - /// let d = VariantDecimal4::try_new(1, 2).unwrap(); - /// let v5 = Variant::from(d); + /// let v5 = Variant::from(-1); /// assert_eq!(v5.as_u16(), None); /// /// // or not a variant that cannot be cast into an integer @@ -1044,17 +1111,17 @@ impl<'m, 'v> Variant<'m, 'v> { /// let v2 = Variant::from(d); /// assert_eq!(v2.as_u32(), Some(u32::MAX)); /// + /// // or a variant that decimal with scale not equal to zero + /// let d = VariantDecimal8::try_new(123, 2).unwrap(); + /// let v3 = Variant::from(d); + /// assert_eq!(v3.as_u32(), Some(1)); + /// /// // or from boolean variant - /// let v3 = Variant::BooleanFalse; - /// assert_eq!(v3.as_u32(), Some(0)); + /// let v4 = Variant::BooleanFalse; + /// assert_eq!(v4.as_u32(), Some(0)); /// /// // but not a variant that can't fit into the range - /// let v4 = Variant::from(-1); - /// assert_eq!(v4.as_u32(), None); - /// - /// // not a variant that decimal with scale not equal to zero - /// let d = VariantDecimal8::try_new(1, 2).unwrap(); - /// let v5 = Variant::from(d); + /// let v5 = Variant::from(-1); /// assert_eq!(v5.as_u32(), None); /// /// // or not a variant that cannot be cast into an integer @@ -1085,17 +1152,17 @@ impl<'m, 'v> Variant<'m, 'v> { /// let v2 = Variant::from(d); /// assert_eq!(v2.as_u64(), Some(u64::MAX)); /// + /// // or a variant that decimal with scale not equal to zero + /// let d = VariantDecimal16::try_new(123, 2).unwrap(); + /// let v3 = Variant::from(d); + /// assert_eq!(v3.as_u64(), Some(1)); + /// /// // or from boolean variant - /// let v3 = Variant::BooleanFalse; - /// assert_eq!(v3.as_u64(), Some(0)); + /// let v4 = Variant::BooleanFalse; + /// assert_eq!(v4.as_u64(), Some(0)); /// /// // but not a variant that can't fit into the range - /// let v4 = Variant::from(-1); - /// assert_eq!(v4.as_u64(), None); - /// - /// // not a variant that decimal with scale not equal to zero - /// let d = VariantDecimal16::try_new(1, 2).unwrap(); - /// let v5 = Variant::from(d); + /// let v5 = Variant::from(-1); /// assert_eq!(v5.as_u64(), None); /// /// // or not a variant that cannot be cast into an integer @@ -1106,6 +1173,21 @@ impl<'m, 'v> Variant<'m, 'v> { self.as_num() } + fn convert_string_to_decimal(input: &str) -> Option + where + D: DecimalType, + VD: VariantDecimalType, + D::Native: NumCast + DecimalCast, + { + // find the last '.' + let scale_usize = input.rsplit_once('.').map_or(0, |(_, frac)| frac.len()); + + let scale = u8::try_from(scale_usize).ok()?; + + let raw = parse_string_to_decimal_native::(input, scale_usize).ok()?; + VD::try_new(raw, scale).ok() + } + /// Converts this variant to tuple with a 4-byte unscaled value if possible. /// /// Returns `Some((i32, u8))` for decimal variants where the unscaled value @@ -1125,19 +1207,31 @@ impl<'m, 'v> Variant<'m, 'v> { /// let v2 = Variant::from(VariantDecimal8::try_new(1234_i64, 2).unwrap()); /// assert_eq!(v2.as_decimal4(), VariantDecimal4::try_new(1234_i32, 2).ok()); /// + /// // or from string variants if they can be parsed as decimals + /// let v3 = Variant::from("123.45"); + /// assert_eq!(v3.as_decimal4(), VariantDecimal4::try_new(12345, 2).ok()); + /// /// // but not if the value would overflow i32 - /// let v3 = Variant::from(VariantDecimal8::try_new(12345678901i64, 2).unwrap()); - /// assert_eq!(v3.as_decimal4(), None); + /// let v4 = Variant::from(VariantDecimal8::try_new(12345678901i64, 2).unwrap()); + /// assert_eq!(v4.as_decimal4(), None); /// /// // or if the variant is not a decimal - /// let v4 = Variant::from("hello!"); - /// assert_eq!(v4.as_decimal4(), None); + /// let v5 = Variant::from("hello!"); + /// assert_eq!(v5.as_decimal4(), None); /// ``` pub fn as_decimal4(&self) -> Option { match *self { Variant::Int8(_) | Variant::Int16(_) | Variant::Int32(_) | Variant::Int64(_) => { self.as_num::().and_then(|x| x.try_into().ok()) } + Variant::Float(f) => single_float_to_decimal::(f as _, 1f64) + .and_then(|x: i32| x.try_into().ok()), + Variant::Double(f) => single_float_to_decimal::(f, 1f64) + .and_then(|x: i32| x.try_into().ok()), + Variant::String(v) => Self::convert_string_to_decimal::(v), + Variant::ShortString(v) => { + Self::convert_string_to_decimal::(v.as_str()) + } Variant::Decimal4(decimal4) => Some(decimal4), Variant::Decimal8(decimal8) => decimal8.try_into().ok(), Variant::Decimal16(decimal16) => decimal16.try_into().ok(), @@ -1148,7 +1242,7 @@ impl<'m, 'v> Variant<'m, 'v> { /// Converts this variant to tuple with an 8-byte unscaled value if possible. /// /// Returns `Some((i64, u8))` for decimal variants where the unscaled value - /// fits in `i64` range, + /// fits in `i64` range, the scale will be 0 if the input is string variants. /// `None` for non-decimal variants or decimal values that would overflow. /// /// # Examples @@ -1164,19 +1258,31 @@ impl<'m, 'v> Variant<'m, 'v> { /// let v2 = Variant::from(VariantDecimal16::try_new(1234_i128, 2).unwrap()); /// assert_eq!(v2.as_decimal8(), VariantDecimal8::try_new(1234_i64, 2).ok()); /// + /// // or from string variants if they can be parsed as decimals + /// let v3 = Variant::from("123.45"); + /// assert_eq!(v3.as_decimal8(), VariantDecimal8::try_new(12345, 2).ok()); + /// /// // but not if the value would overflow i64 - /// let v3 = Variant::from(VariantDecimal16::try_new(2e19 as i128, 2).unwrap()); - /// assert_eq!(v3.as_decimal8(), None); + /// let v4 = Variant::from(VariantDecimal16::try_new(2e19 as i128, 2).unwrap()); + /// assert_eq!(v4.as_decimal8(), None); /// /// // or if the variant is not a decimal - /// let v4 = Variant::from("hello!"); - /// assert_eq!(v4.as_decimal8(), None); + /// let v5 = Variant::from("hello!"); + /// assert_eq!(v5.as_decimal8(), None); /// ``` pub fn as_decimal8(&self) -> Option { match *self { Variant::Int8(_) | Variant::Int16(_) | Variant::Int32(_) | Variant::Int64(_) => { self.as_num::().and_then(|x| x.try_into().ok()) } + Variant::Float(f) => single_float_to_decimal::(f as _, 1f64) + .and_then(|x: i64| x.try_into().ok()), + Variant::Double(f) => single_float_to_decimal::(f, 1f64) + .and_then(|x: i64| x.try_into().ok()), + Variant::String(v) => Self::convert_string_to_decimal::(v), + Variant::ShortString(v) => { + Self::convert_string_to_decimal::(v.as_str()) + } Variant::Decimal4(decimal4) => Some(decimal4.into()), Variant::Decimal8(decimal8) => Some(decimal8), Variant::Decimal16(decimal16) => decimal16.try_into().ok(), @@ -1187,7 +1293,7 @@ impl<'m, 'v> Variant<'m, 'v> { /// Converts this variant to tuple with a 16-byte unscaled value if possible. /// /// Returns `Some((i128, u8))` for decimal variants where the unscaled value - /// fits in `i128` range, + /// fits in `i128` range, the scale will be 0 if the input is string variants. /// `None` for non-decimal variants or decimal values that would overflow. /// /// # Examples @@ -1199,14 +1305,30 @@ impl<'m, 'v> Variant<'m, 'v> { /// let v1 = Variant::from(VariantDecimal4::try_new(1234_i32, 2).unwrap()); /// assert_eq!(v1.as_decimal16(), VariantDecimal16::try_new(1234_i128, 2).ok()); /// + /// // or from a string variant if it can be parsed as decimal + /// let v2 = Variant::from("123.45"); + /// assert_eq!(v2.as_decimal16(), VariantDecimal16::try_new(12345, 2).ok()); + /// /// // but not if the variant is not a decimal - /// let v2 = Variant::from("hello!"); - /// assert_eq!(v2.as_decimal16(), None); + /// let v3 = Variant::from("hello!"); + /// assert_eq!(v3.as_decimal16(), None); /// ``` pub fn as_decimal16(&self) -> Option { match *self { Variant::Int8(_) | Variant::Int16(_) | Variant::Int32(_) | Variant::Int64(_) => { - self.as_num::().and_then(|x| x.try_into().ok()) + let x = self.as_num::()?; + >::from(x).try_into().ok() + } + Variant::Float(f) => { + single_float_to_decimal::(>::from(f), 1f64) + .and_then(|x| x.try_into().ok()) + } + Variant::Double(f) => { + single_float_to_decimal::(f, 1f64).and_then(|x| x.try_into().ok()) + } + Variant::String(v) => Self::convert_string_to_decimal::(v), + Variant::ShortString(v) => { + Self::convert_string_to_decimal::(v.as_str()) } Variant::Decimal4(decimal4) => Some(decimal4.into()), Variant::Decimal8(decimal8) => Some(decimal8.into()),