Skip to content
Open
39 changes: 31 additions & 8 deletions arrow-cast/src/cast/decimal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -531,7 +531,7 @@ where

/// Parses given string to specified decimal native (i128/i256) based on given
/// scale. Returns an `Err` if it cannot parse given string.
pub(crate) fn parse_string_to_decimal_native<T: DecimalType>(
pub fn parse_string_to_decimal_native<T: DecimalType>(
value_str: &str,
scale: usize,
) -> Result<T::Native, ArrowError>
Expand Down Expand Up @@ -777,15 +777,15 @@ where
if cast_options.safe {
array
.unary_opt::<_, D>(|v| {
D::Native::from_f64((mul * v.as_()).round())
single_float_to_decimal::<D>(v.as_(), mul)
.filter(|v| D::is_valid_decimal_precision(*v, precision))
})
.with_precision_and_scale(precision, scale)
.map(|a| Arc::new(a) as ArrayRef)
} else {
array
.try_unary::<_, D, _>(|v| {
D::Native::from_f64((mul * v.as_()).round())
single_float_to_decimal::<D>(v.as_(), mul)
.ok_or_else(|| {
ArrowError::CastError(format!(
"Cannot cast to {}({}, {}). Overflowing on {:?}",
Expand All @@ -802,6 +802,17 @@ where
}
}

/// Cast a single floating point value to a decimal native with the given multiple.
/// Returns `None` if the value cannot be represented with the requested precision.
#[inline(always)]
pub fn single_float_to_decimal<D>(input: f64, mul: f64) -> Option<D::Native>
where
D: DecimalType + ArrowPrimitiveType,
<D as ArrowPrimitiveType>::Native: DecimalCast,
{
D::Native::from_f64((mul * input).round())
Comment thread
scovich marked this conversation as resolved.
}

pub(crate) fn cast_decimal_to_integer<D, T>(
array: &dyn Array,
base: D::Native,
Expand Down Expand Up @@ -870,11 +881,10 @@ where
if array.is_null(i) {
value_builder.append_null();
} else {
let v = array
.value(i)
.div_checked(div)
.ok()
.and_then(<T::Native as NumCast>::from::<D::Native>);
let v = cast_single_decimal_to_integer_div_opt::<D, T::Native>(
array.value(i),
div,
);
value_builder.append_option(v);
}
}
Expand Down Expand Up @@ -904,6 +914,19 @@ where
Ok(Arc::new(value_builder.finish()))
}

/// Casting a given decimal to an integer by dividing with the given divisor.
/// Returns `None` if checked division fails or the target cast fails.
#[inline(always)]
pub fn cast_single_decimal_to_integer_div_opt<D, T>(value: D::Native, div: D::Native) -> Option<T>
where
T: NumCast + ToPrimitive,
D: DecimalType + ArrowPrimitiveType,
<D as ArrowPrimitiveType>::Native: ToPrimitive,
{
let v = value.div_checked(div).ok()?;
<T as NumCast>::from::<D::Native>(v)
}

/// Cast a decimal array to a floating point array.
///
/// Conversion is lossy and follows standard floating point semantics. Values
Expand Down
24 changes: 21 additions & 3 deletions arrow-cast/src/cast/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -72,9 +72,26 @@ use arrow_schema::*;
use arrow_select::take::take;
use num_traits::{NumCast, ToPrimitive, cast::AsPrimitive};

pub use decimal::{DecimalCast, rescale_decimal};
pub use decimal::{
DecimalCast, cast_single_decimal_to_integer_div_opt, parse_string_to_decimal_native,
rescale_decimal, single_float_to_decimal,
};
pub use string::cast_single_string_to_boolean_default;

/// Lossy conversion from decimal to float.
///
/// Conversion is lossy and follows standard floating point semantics. Values
/// that exceed the representable range become `INFINITY` or `-INFINITY` without
/// returning an error.
#[inline(always)]
pub fn single_decimal_to_float_lossy<D, F>(f: &F, x: D::Native, scale: i32) -> f64
where
D: DecimalType,
F: Fn(D::Native) -> f64,
{
f(x) / 10_f64.powi(scale)
}

/// CastOptions provides a way to override the default cast behaviors
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct CastOptions<'a> {
Expand Down Expand Up @@ -2314,10 +2331,11 @@ where
Int32 => cast_decimal_to_integer::<D, Int32Type>(array, base, *scale, cast_options),
Int64 => cast_decimal_to_integer::<D, Int64Type>(array, base, *scale, cast_options),
Float32 => cast_decimal_to_float::<D, Float32Type, _>(array, |x| {
(as_float(x) / 10_f64.powi(*scale as i32)) as f32
single_decimal_to_float_lossy::<D, F>(&as_float, x, <i32 as From<i8>>::from(*scale))
as f32
}),
Float64 => cast_decimal_to_float::<D, Float64Type, _>(array, |x| {
as_float(x) / 10_f64.powi(*scale as i32)
single_decimal_to_float_lossy::<D, F>(&as_float, x, <i32 as From<i8>>::from(*scale))
}),
Utf8View => value_to_string_view(array, cast_options),
Utf8 => value_to_string::<i32>(array, cast_options),
Expand Down
20 changes: 18 additions & 2 deletions parquet-variant-compute/src/type_conversion.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,10 @@

//! Module for transforming a typed arrow `Array` to `VariantArray`.

use arrow::compute::{CastOptions, DecimalCast, rescale_decimal};
use arrow::compute::{
CastOptions, DecimalCast, parse_string_to_decimal_native, rescale_decimal,
single_float_to_decimal,
};
use arrow::datatypes::{
self, ArrowPrimitiveType, ArrowTimestampType, Decimal32Type, Decimal64Type, Decimal128Type,
DecimalType,
Expand Down Expand Up @@ -204,9 +207,12 @@ impl_timestamp_from_variant!(
///
/// - `precision` and `scale` specify the target Arrow decimal parameters
/// - Integer variants (`Int8/16/32/64`) are treated as decimals with scale 0
/// - Floating point variants (`Float/Double`) are converted to decimals with the given scale
/// - String variants (`String/ShortString`) are parsed as decimals with the given scale
/// - Decimal variants (`Decimal4/8/16`) use their embedded precision and scale
///
/// The value is rescaled to (`precision`, `scale`) using `rescale_decimal` and
/// The value is rescaled to (`precision`, `scale`) using `rescale_decimal` for integers,
/// `single_float_to_decimal` for floats, and `parse_string_to_decimal_native` for strings.
/// returns `None` if it cannot fit the requested precision.
pub(crate) fn variant_to_unscaled_decimal<O>(
variant: &Variant<'_, '_>,
Expand All @@ -217,6 +223,8 @@ where
O: DecimalType,
O::Native: DecimalCast,
{
let mul = 10_f64.powi(scale as i32);

match variant {
Variant::Int8(i) => rescale_decimal::<Decimal32Type, O>(
*i as i32,
Expand Down Expand Up @@ -246,6 +254,14 @@ where
precision,
scale,
),
Variant::Float(f) => single_float_to_decimal::<O>(f64::from(*f), mul),
Variant::Double(f) => single_float_to_decimal::<O>(*f, mul),
// arrow-cast only support cast string to decimal with scale >=0 for now
// Please see `cast_string_to_decimal` in arrow-cast/src/cast/decimal.rs for more detail
Variant::String(v) if scale >= 0 => parse_string_to_decimal_native::<O>(v, scale as _).ok(),
Variant::ShortString(v) if scale >= 0 => {
parse_string_to_decimal_native::<O>(v, scale as _).ok()
}
Variant::Decimal4(d) => rescale_decimal::<Decimal32Type, O>(
d.integer(),
VariantDecimal4::MAX_PRECISION,
Expand Down
Loading
Loading