Skip to content

Commit 4e6b6bb

Browse files
committed
int_to_binary
1 parent ef34493 commit 4e6b6bb

3 files changed

Lines changed: 116 additions & 77 deletions

File tree

native/spark-expr/src/conversion_funcs/cast.rs

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ use std::{
6666
num::Wrapping,
6767
sync::Arc,
6868
};
69+
use crate::EvalMode::Legacy;
6970

7071
static TIMESTAMP_FORMAT: Option<&str> = Some("%Y-%m-%d %H:%M:%S%.f");
7172

@@ -1123,10 +1124,10 @@ fn cast_array(
11231124
Ok(cast_with_options(&array, to_type, &CAST_OPTIONS)?)
11241125
}
11251126
(Binary, Utf8) => Ok(cast_binary_to_string::<i32>(&array, cast_options)?),
1126-
(Int8, Binary) => cast_whole_num_to_binary!(&array, Int8Array, 1),
1127-
(Int16, Binary) => cast_whole_num_to_binary!(&array, Int16Array, 2),
1128-
(Int32, Binary) => cast_whole_num_to_binary!(&array, Int32Array, 4),
1129-
(Int64, Binary) => cast_whole_num_to_binary!(&array, Int64Array, 8),
1127+
(Int8, Binary) if (eval_mode == Legacy) => cast_whole_num_to_binary!(&array, Int8Array, 1),
1128+
(Int16, Binary) if (eval_mode == Legacy) => cast_whole_num_to_binary!(&array, Int16Array, 2),
1129+
(Int32, Binary) if (eval_mode == Legacy) => cast_whole_num_to_binary!(&array, Int32Array, 4),
1130+
(Int64, Binary) if (eval_mode == Legacy) => cast_whole_num_to_binary!(&array, Int64Array, 8),
11301131
_ if cast_options.is_adapting_schema
11311132
|| is_datafusion_spark_compatible(from_type, to_type) =>
11321133
{

spark/src/main/scala/org/apache/comet/expressions/CometCast.scala

Lines changed: 80 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,6 @@ object CometCast extends CometExpressionSerde[Cast] with CometExprShim {
126126
isSupported(dt.elementType, DataTypes.StringType, timeZoneId, evalMode)
127127
case (dt: ArrayType, dt1: ArrayType) =>
128128
isSupported(dt.elementType, dt1.elementType, timeZoneId, evalMode)
129-
case (from: DataType, _: BinaryType) => canCastToBinary(from)
130129
case (dt: DataType, _) if dt.typeName == "timestamp_ntz" =>
131130
// https://github.com/apache/datafusion-comet/issues/378
132131
toType match {
@@ -148,13 +147,13 @@ object CometCast extends CometExpressionSerde[Cast] with CometExprShim {
148147
case (DataTypes.BooleanType, _) =>
149148
canCastFromBoolean(toType)
150149
case (DataTypes.ByteType, _) =>
151-
canCastFromByte(toType)
150+
canCastFromByte(toType, evalMode)
152151
case (DataTypes.ShortType, _) =>
153-
canCastFromShort(toType)
152+
canCastFromShort(toType, evalMode)
154153
case (DataTypes.IntegerType, _) =>
155-
canCastFromInt(toType)
154+
canCastFromInt(toType, evalMode)
156155
case (DataTypes.LongType, _) =>
157-
canCastFromLong(toType)
156+
canCastFromLong(toType, evalMode)
158157
case (DataTypes.FloatType, _) =>
159158
canCastFromFloat(toType)
160159
case (DataTypes.DoubleType, _) =>
@@ -269,53 +268,85 @@ object CometCast extends CometExpressionSerde[Cast] with CometExprShim {
269268
case _ => unsupported(DataTypes.BooleanType, toType)
270269
}
271270

272-
private def canCastFromByte(toType: DataType): SupportLevel = toType match {
273-
case DataTypes.BooleanType =>
274-
Compatible()
275-
case DataTypes.ShortType | DataTypes.IntegerType | DataTypes.LongType =>
276-
Compatible()
277-
case DataTypes.FloatType | DataTypes.DoubleType | _: DecimalType =>
278-
Compatible()
279-
case _ =>
280-
unsupported(DataTypes.ByteType, toType)
281-
}
271+
private def canCastFromByte(toType: DataType, evalMode: CometEvalMode.Value): SupportLevel =
272+
toType match {
273+
case DataTypes.BooleanType =>
274+
Compatible()
275+
case DataTypes.ShortType | DataTypes.IntegerType | DataTypes.LongType =>
276+
Compatible()
277+
case DataTypes.FloatType | DataTypes.DoubleType | _: DecimalType =>
278+
Compatible()
279+
case DataTypes.BinaryType =>
280+
if (evalMode == CometEvalMode.LEGACY) {
281+
Compatible()
282+
} else {
283+
Unsupported(
284+
Some(s"Spark does not support byte to binary conversion in ${evalMode} eval mode"))
285+
}
286+
case _ =>
287+
unsupported(DataTypes.ByteType, toType)
288+
}
282289

283-
private def canCastFromShort(toType: DataType): SupportLevel = toType match {
284-
case DataTypes.BooleanType =>
285-
Compatible()
286-
case DataTypes.ByteType | DataTypes.IntegerType | DataTypes.LongType =>
287-
Compatible()
288-
case DataTypes.FloatType | DataTypes.DoubleType | _: DecimalType =>
289-
Compatible()
290-
case _ =>
291-
unsupported(DataTypes.ShortType, toType)
292-
}
290+
private def canCastFromShort(toType: DataType, evalMode: CometEvalMode.Value): SupportLevel =
291+
toType match {
292+
case DataTypes.BooleanType =>
293+
Compatible()
294+
case DataTypes.ByteType | DataTypes.IntegerType | DataTypes.LongType =>
295+
Compatible()
296+
case DataTypes.FloatType | DataTypes.DoubleType | _: DecimalType =>
297+
Compatible()
298+
case DataTypes.BinaryType =>
299+
if (evalMode == CometEvalMode.LEGACY) {
300+
Compatible()
301+
} else {
302+
Unsupported(
303+
Some(s"Spark does not support short to binary conversion in ${evalMode} eval mode"))
304+
}
305+
case _ =>
306+
unsupported(DataTypes.ShortType, toType)
307+
}
293308

294-
private def canCastFromInt(toType: DataType): SupportLevel = toType match {
295-
case DataTypes.BooleanType =>
296-
Compatible()
297-
case DataTypes.ByteType | DataTypes.ShortType | DataTypes.LongType =>
298-
Compatible()
299-
case DataTypes.FloatType | DataTypes.DoubleType =>
300-
Compatible()
301-
case _: DecimalType =>
302-
Compatible()
303-
case _ =>
304-
unsupported(DataTypes.IntegerType, toType)
305-
}
309+
private def canCastFromInt(toType: DataType, evalMode: CometEvalMode.Value): SupportLevel =
310+
toType match {
311+
case DataTypes.BooleanType =>
312+
Compatible()
313+
case DataTypes.ByteType | DataTypes.ShortType | DataTypes.LongType =>
314+
Compatible()
315+
case DataTypes.FloatType | DataTypes.DoubleType =>
316+
Compatible()
317+
case _: DecimalType =>
318+
Compatible()
319+
case DataTypes.BinaryType =>
320+
if (evalMode == CometEvalMode.LEGACY) {
321+
Compatible()
322+
} else {
323+
Unsupported(
324+
Some(s"Spark does not support int to binary conversion in ${evalMode} eval mode"))
325+
}
326+
case _ =>
327+
unsupported(DataTypes.IntegerType, toType)
328+
}
306329

307-
private def canCastFromLong(toType: DataType): SupportLevel = toType match {
308-
case DataTypes.BooleanType =>
309-
Compatible()
310-
case DataTypes.ByteType | DataTypes.ShortType | DataTypes.IntegerType =>
311-
Compatible()
312-
case DataTypes.FloatType | DataTypes.DoubleType =>
313-
Compatible()
314-
case _: DecimalType =>
315-
Compatible()
316-
case _ =>
317-
unsupported(DataTypes.LongType, toType)
318-
}
330+
private def canCastFromLong(toType: DataType, evalMode: CometEvalMode.Value): SupportLevel =
331+
toType match {
332+
case DataTypes.BooleanType =>
333+
Compatible()
334+
case DataTypes.ByteType | DataTypes.ShortType | DataTypes.IntegerType =>
335+
Compatible()
336+
case DataTypes.FloatType | DataTypes.DoubleType =>
337+
Compatible()
338+
case _: DecimalType =>
339+
Compatible()
340+
case DataTypes.BinaryType =>
341+
if (evalMode == CometEvalMode.LEGACY) {
342+
Compatible()
343+
} else {
344+
Unsupported(
345+
Some(s"Spark does not support long to binary conversion in ${evalMode} eval mode"))
346+
}
347+
case _ =>
348+
unsupported(DataTypes.LongType, toType)
349+
}
319350

320351
private def canCastFromFloat(toType: DataType): SupportLevel = toType match {
321352
case DataTypes.BooleanType | DataTypes.DoubleType | DataTypes.ByteType | DataTypes.ShortType |

spark/src/test/scala/org/apache/comet/CometCastSuite.scala

Lines changed: 31 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -207,11 +207,12 @@ class CometCastSuite extends CometTestBase with AdaptiveSparkPlanHelper {
207207
hasIncompatibleType = usingParquetExecWithIncompatTypes)
208208
}
209209

210-
ignore("cast ByteType to BinaryType") {
210+
test("cast ByteType to BinaryType") {
211+
// Spark does not support ANSI or Try mode
211212
castTest(
212213
generateBytes(),
213214
DataTypes.BinaryType,
214-
hasIncompatibleType = usingParquetExecWithIncompatTypes)
215+
hasIncompatibleType = usingParquetExecWithIncompatTypes, testAnsi = false, testTry = false)
215216
}
216217

217218
ignore("cast ByteType to TimestampType") {
@@ -282,10 +283,11 @@ class CometCastSuite extends CometTestBase with AdaptiveSparkPlanHelper {
282283
}
283284

284285
test("cast ShortType to BinaryType") {
286+
// Spark does not support ANSI or Try mode
285287
castTest(
286288
generateShorts(),
287289
DataTypes.BinaryType,
288-
hasIncompatibleType = usingParquetExecWithIncompatTypes)
290+
hasIncompatibleType = usingParquetExecWithIncompatTypes, testAnsi = false, testTry = false)
289291
}
290292

291293
ignore("cast ShortType to TimestampType") {
@@ -346,8 +348,9 @@ class CometCastSuite extends CometTestBase with AdaptiveSparkPlanHelper {
346348
castTest(generateInts(), DataTypes.StringType)
347349
}
348350

349-
ignore("cast IntegerType to BinaryType") {
350-
castTest(generateInts(), DataTypes.BinaryType)
351+
test("cast IntegerType to BinaryType") {
352+
// Spark does not support ANSI or Try mode
353+
castTest(generateInts(), DataTypes.BinaryType, testAnsi = false, testTry = false)
351354
}
352355

353356
ignore("cast IntegerType to TimestampType") {
@@ -392,8 +395,9 @@ class CometCastSuite extends CometTestBase with AdaptiveSparkPlanHelper {
392395
castTest(generateLongs(), DataTypes.StringType)
393396
}
394397

395-
ignore("cast LongType to BinaryType") {
396-
castTest(generateLongs(), DataTypes.BinaryType)
398+
test("cast LongType to BinaryType") {
399+
// Spark does not support ANSI or Try mode
400+
castTest(generateLongs(), DataTypes.BinaryType , testAnsi = false, testTry = false)
397401
}
398402

399403
ignore("cast LongType to TimestampType") {
@@ -1329,28 +1333,30 @@ class CometCastSuite extends CometTestBase with AdaptiveSparkPlanHelper {
13291333
input: DataFrame,
13301334
toType: DataType,
13311335
hasIncompatibleType: Boolean = false,
1332-
testAnsi: Boolean = true): Unit = {
1336+
testAnsi: Boolean = true,
1337+
testTry: Boolean = true): Unit = {
13331338

13341339
withTempPath { dir =>
13351340
val data = roundtripParquet(input, dir).coalesce(1)
1336-
data.createOrReplaceTempView("t")
13371341

13381342
withSQLConf((SQLConf.ANSI_ENABLED.key, "false")) {
13391343
// cast() should return null for invalid inputs when ansi mode is disabled
1340-
val df = spark.sql(s"select a, cast(a as ${toType.sql}) from t order by a")
1344+
val df = data.select(col("a"), col("a").cast(toType)).orderBy(col("a"))
13411345
if (hasIncompatibleType) {
13421346
checkSparkAnswer(df)
13431347
} else {
13441348
checkSparkAnswerAndOperator(df)
13451349
}
13461350

1347-
// try_cast() should always return null for invalid inputs
1348-
val df2 =
1349-
spark.sql(s"select a, try_cast(a as ${toType.sql}) from t order by a")
1350-
if (hasIncompatibleType) {
1351-
checkSparkAnswer(df2)
1352-
} else {
1353-
checkSparkAnswerAndOperator(df2)
1351+
if (testTry){
1352+
// try_cast() should always return null for invalid inputs
1353+
val df2 =
1354+
data.select(col("a"), col("a").try_cast(toType)).orderBy(col("a"))
1355+
if (hasIncompatibleType) {
1356+
checkSparkAnswer(df2)
1357+
} else {
1358+
checkSparkAnswerAndOperator(df2)
1359+
}
13541360
}
13551361
}
13561362

@@ -1408,14 +1414,15 @@ class CometCastSuite extends CometTestBase with AdaptiveSparkPlanHelper {
14081414
}
14091415

14101416
// try_cast() should always return null for invalid inputs
1411-
val df2 =
1412-
spark.sql(s"select a, try_cast(a as ${toType.sql}) from t order by a")
1413-
if (hasIncompatibleType) {
1414-
checkSparkAnswer(df2)
1415-
} else {
1416-
checkSparkAnswerAndOperator(df2)
1417+
if (testTry){
1418+
val df2 =
1419+
data.select(col("a"), col("a").cast(toType)).orderBy(col("a"))
1420+
if (hasIncompatibleType) {
1421+
checkSparkAnswer(df2)
1422+
} else {
1423+
checkSparkAnswerAndOperator(df2)
1424+
}
14171425
}
1418-
14191426
}
14201427
}
14211428
}

0 commit comments

Comments
 (0)