Skip to content

Commit bd07a7c

Browse files
committed
feat: Refactor integer value handling
- Removed IntegerFormatValue. - Added direct IntegerValue helpers. - Introduced private From implementations via local macros. - Shortened integer ScalarValue arms for clarity. - Deduplicated invalid integer conversion error handling. - Made test argument counts explicit for better readability.
1 parent d8433f2 commit bd07a7c

1 file changed

Lines changed: 90 additions & 92 deletions

File tree

datafusion/spark/src/function/string/format_string.rs

Lines changed: 90 additions & 92 deletions
Original file line numberDiff line numberDiff line change
@@ -898,13 +898,6 @@ enum IntegerValue {
898898
}
899899

900900
impl IntegerValue {
901-
fn decimal(self) -> IntegerFormatValue {
902-
match self {
903-
Self::Signed { decimal, .. } => IntegerFormatValue::Signed(decimal),
904-
Self::Unsigned(value) => IntegerFormatValue::Unsigned(value),
905-
}
906-
}
907-
908901
fn unsigned_bits(self) -> u64 {
909902
match self {
910903
Self::Signed { unsigned_bits, .. } => unsigned_bits,
@@ -918,22 +911,59 @@ impl IntegerValue {
918911
Self::Unsigned(value) => unsigned_to_char(value),
919912
}
920913
}
921-
}
922914

923-
enum IntegerFormatValue {
924-
Signed(i64),
925-
Unsigned(u64),
926-
}
915+
fn format_decimal(
916+
self,
917+
spec: &ConversionSpecifier,
918+
writer: &mut String,
919+
) -> Result<()> {
920+
match self {
921+
Self::Signed { decimal, .. } => spec.format_signed(writer, decimal),
922+
Self::Unsigned(value) => spec.format_unsigned(writer, value),
923+
}
924+
}
927925

928-
impl std::fmt::Display for IntegerFormatValue {
929-
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
926+
fn decimal_string(self) -> String {
930927
match self {
931-
Self::Signed(value) => write!(f, "{value}"),
932-
Self::Unsigned(value) => write!(f, "{value}"),
928+
Self::Signed { decimal, .. } => decimal.to_string(),
929+
Self::Unsigned(value) => value.to_string(),
933930
}
934931
}
935932
}
936933

934+
macro_rules! signed_integer_value {
935+
($source:ty, $unsigned:ty) => {
936+
impl From<$source> for IntegerValue {
937+
fn from(value: $source) -> Self {
938+
Self::Signed {
939+
decimal: value as i64,
940+
unsigned_bits: (value as $unsigned) as u64,
941+
}
942+
}
943+
}
944+
};
945+
}
946+
947+
signed_integer_value!(i8, u8);
948+
signed_integer_value!(i16, u16);
949+
signed_integer_value!(i32, u32);
950+
signed_integer_value!(i64, u64);
951+
952+
macro_rules! unsigned_integer_value {
953+
($source:ty) => {
954+
impl From<$source> for IntegerValue {
955+
fn from(value: $source) -> Self {
956+
Self::Unsigned(value as u64)
957+
}
958+
}
959+
};
960+
}
961+
962+
unsigned_integer_value!(u8);
963+
unsigned_integer_value!(u16);
964+
unsigned_integer_value!(u32);
965+
unsigned_integer_value!(u64);
966+
937967
impl ConversionSpecifier {
938968
/// Validates that the grouping separator flag is not used with scientific
939969
/// notation conversions, matching Java/Spark behavior which throws
@@ -966,56 +996,14 @@ impl ConversionSpecifier {
966996

967997
_ => self.format_boolean(string, value),
968998
},
969-
ScalarValue::Int8(value) => self.format_integer(
970-
string,
971-
value.map(|value| IntegerValue::Signed {
972-
decimal: value as i64,
973-
unsigned_bits: (value as u8) as u64,
974-
}),
975-
"Int8",
976-
),
977-
ScalarValue::Int16(value) => self.format_integer(
978-
string,
979-
value.map(|value| IntegerValue::Signed {
980-
decimal: value as i64,
981-
unsigned_bits: (value as u16) as u64,
982-
}),
983-
"Int16",
984-
),
985-
ScalarValue::Int32(value) => self.format_integer(
986-
string,
987-
value.map(|value| IntegerValue::Signed {
988-
decimal: value as i64,
989-
unsigned_bits: (value as u32) as u64,
990-
}),
991-
"Int32",
992-
),
993-
ScalarValue::Int64(value) => self.format_integer(
994-
string,
995-
value.map(|value| IntegerValue::Signed {
996-
decimal: value,
997-
unsigned_bits: value as u64,
998-
}),
999-
"Int64",
1000-
),
1001-
ScalarValue::UInt8(value) => self.format_integer(
1002-
string,
1003-
value.map(|value| IntegerValue::Unsigned(value as u64)),
1004-
"UInt8",
1005-
),
1006-
ScalarValue::UInt16(value) => self.format_integer(
1007-
string,
1008-
value.map(|value| IntegerValue::Unsigned(value as u64)),
1009-
"UInt16",
1010-
),
1011-
ScalarValue::UInt32(value) => self.format_integer(
1012-
string,
1013-
value.map(|value| IntegerValue::Unsigned(value as u64)),
1014-
"UInt32",
1015-
),
1016-
ScalarValue::UInt64(value) => {
1017-
self.format_integer(string, value.map(IntegerValue::Unsigned), "UInt64")
1018-
}
999+
ScalarValue::Int8(value) => self.format_integer(string, value, "Int8"),
1000+
ScalarValue::Int16(value) => self.format_integer(string, value, "Int16"),
1001+
ScalarValue::Int32(value) => self.format_integer(string, value, "Int32"),
1002+
ScalarValue::Int64(value) => self.format_integer(string, value, "Int64"),
1003+
ScalarValue::UInt8(value) => self.format_integer(string, value, "UInt8"),
1004+
ScalarValue::UInt16(value) => self.format_integer(string, value, "UInt16"),
1005+
ScalarValue::UInt32(value) => self.format_integer(string, value, "UInt32"),
1006+
ScalarValue::UInt64(value) => self.format_integer(string, value, "UInt64"),
10191007
ScalarValue::Float16(value) => match (self.conversion_type, value) {
10201008
(
10211009
ConversionType::DecFloatLower
@@ -1377,31 +1365,25 @@ impl ConversionSpecifier {
13771365
}
13781366
}
13791367

1380-
fn format_integer(
1368+
fn format_integer<T>(
13811369
&self,
13821370
writer: &mut String,
1383-
value: Option<IntegerValue>,
1371+
value: &Option<T>,
13841372
type_name: &str,
1385-
) -> Result<()> {
1386-
let Some(value) = value else {
1373+
) -> Result<()>
1374+
where
1375+
T: Copy + Into<IntegerValue>,
1376+
{
1377+
let Some(value) = value.map(Into::into) else {
13871378
return if self.conversion_type.supports_integer() {
13881379
self.format_string(writer, "null")
13891380
} else {
1390-
exec_err!(
1391-
"Invalid conversion type: {:?} for {}",
1392-
self.conversion_type,
1393-
type_name
1394-
)
1381+
self.invalid_integer_conversion(type_name)
13951382
};
13961383
};
13971384

13981385
match self.conversion_type {
1399-
ConversionType::DecInt => match value.decimal() {
1400-
IntegerFormatValue::Signed(value) => self.format_signed(writer, value),
1401-
IntegerFormatValue::Unsigned(value) => {
1402-
self.format_unsigned(writer, value)
1403-
}
1404-
},
1386+
ConversionType::DecInt => value.format_decimal(self, writer),
14051387
ConversionType::HexIntLower
14061388
| ConversionType::HexIntUpper
14071389
| ConversionType::OctInt => {
@@ -1411,16 +1393,20 @@ impl ConversionSpecifier {
14111393
self.format_char(writer, value.to_char()?)
14121394
}
14131395
ConversionType::StringLower | ConversionType::StringUpper => {
1414-
self.format_string(writer, &value.decimal().to_string())
1396+
self.format_string(writer, &value.decimal_string())
14151397
}
1416-
_ => exec_err!(
1417-
"Invalid conversion type: {:?} for {}",
1418-
self.conversion_type,
1419-
type_name
1420-
),
1398+
_ => self.invalid_integer_conversion(type_name),
14211399
}
14221400
}
14231401

1402+
fn invalid_integer_conversion<T>(&self, type_name: &str) -> Result<T> {
1403+
exec_err!(
1404+
"Invalid conversion type: {:?} for {}",
1405+
self.conversion_type,
1406+
type_name
1407+
)
1408+
}
1409+
14241410
fn format_hex_float(&self, writer: &mut String, value: f64) -> Result<()> {
14251411
// Handle special cases first
14261412
let (sign, raw_exponent, mantissa) = value.to_parts();
@@ -2528,54 +2514,66 @@ mod tests {
25282514
#[test]
25292515
fn test_integer_formatting_across_widths() -> Result<()> {
25302516
let cases = [
2531-
(ScalarValue::Int8(Some(-1)), "%d|%x|%o|%s", "-1|ff|377|-1"),
2517+
(
2518+
ScalarValue::Int8(Some(-1)),
2519+
"%d|%x|%o|%s",
2520+
4,
2521+
"-1|ff|377|-1",
2522+
),
25322523
(
25332524
ScalarValue::Int16(Some(-1)),
25342525
"%d|%x|%o|%s",
2526+
4,
25352527
"-1|ffff|177777|-1",
25362528
),
25372529
(
25382530
ScalarValue::Int32(Some(-1)),
25392531
"%d|%x|%o|%s",
2532+
4,
25402533
"-1|ffffffff|37777777777|-1",
25412534
),
25422535
(
25432536
ScalarValue::Int64(Some(-1)),
25442537
"%d|%x|%o|%s",
2538+
4,
25452539
"-1|ffffffffffffffff|1777777777777777777777|-1",
25462540
),
25472541
(
25482542
ScalarValue::UInt8(Some(255)),
25492543
"%d|%x|%o|%s|%c",
2544+
5,
25502545
"255|ff|377|255|ÿ",
25512546
),
25522547
(
25532548
ScalarValue::UInt16(Some(65535)),
25542549
"%d|%x|%o|%s",
2550+
4,
25552551
"65535|ffff|177777|65535",
25562552
),
25572553
(
25582554
ScalarValue::UInt32(Some(u32::MAX)),
25592555
"%d|%x|%o|%s",
2556+
4,
25602557
"4294967295|ffffffff|37777777777|4294967295",
25612558
),
25622559
(
25632560
ScalarValue::UInt64(Some(u64::MAX)),
25642561
"%d|%x|%o|%s",
2562+
4,
25652563
"18446744073709551615|ffffffffffffffff|1777777777777777777777|18446744073709551615",
25662564
),
25672565
(
25682566
ScalarValue::Int32(None),
25692567
"%d|%x|%o|%s|%c",
2568+
5,
25702569
"null|null|null|null|null",
25712570
),
25722571
];
25732572

2574-
for (value, fmt, expected) in cases {
2575-
let data_types =
2576-
vec![value.data_type(); fmt.chars().filter(|c| *c == '%').count()];
2573+
for (value, fmt, arg_count, expected) in cases {
2574+
let data_types = vec![value.data_type(); arg_count];
25772575
let formatter = Formatter::parse(fmt, &data_types)?;
2578-
let args = vec![value; data_types.len()];
2576+
let args = vec![value; arg_count];
25792577
assert_eq!(formatter.format(&args)?, expected, "{fmt}");
25802578
}
25812579
Ok(())

0 commit comments

Comments
 (0)