Skip to content

Commit 9c1dc34

Browse files
author
B Vadlamani
committed
int_to_binary
1 parent 999e1f1 commit 9c1dc34

4 files changed

Lines changed: 116 additions & 169 deletions

File tree

docs/source/user-guide/latest/compatibility.md

Lines changed: 0 additions & 92 deletions
Original file line numberDiff line numberDiff line change
@@ -131,89 +131,6 @@ The following cast operations are generally compatible with Spark except for the
131131
<!-- WARNING! DO NOT MANUALLY MODIFY CONTENT BETWEEN THE BEGIN AND END TAGS -->
132132

133133
<!--BEGIN:COMPAT_CAST_TABLE-->
134-
<!-- prettier-ignore-start -->
135-
| From Type | To Type | Notes |
136-
|-|-|-|
137-
| boolean | byte | |
138-
| boolean | short | |
139-
| boolean | integer | |
140-
| boolean | long | |
141-
| boolean | float | |
142-
| boolean | double | |
143-
| boolean | string | |
144-
| byte | boolean | |
145-
| byte | short | |
146-
| byte | integer | |
147-
| byte | long | |
148-
| byte | float | |
149-
| byte | double | |
150-
| byte | decimal | |
151-
| byte | string | |
152-
| byte | binary | |
153-
| short | boolean | |
154-
| short | byte | |
155-
| short | integer | |
156-
| short | long | |
157-
| short | float | |
158-
| short | double | |
159-
| short | decimal | |
160-
| short | string | |
161-
| short | binary | |
162-
| integer | boolean | |
163-
| integer | byte | |
164-
| integer | short | |
165-
| integer | long | |
166-
| integer | float | |
167-
| integer | double | |
168-
| integer | decimal | |
169-
| integer | string | |
170-
| integer | binary | |
171-
| long | boolean | |
172-
| long | byte | |
173-
| long | short | |
174-
| long | integer | |
175-
| long | float | |
176-
| long | double | |
177-
| long | decimal | |
178-
| long | string | |
179-
| long | binary | |
180-
| float | boolean | |
181-
| float | byte | |
182-
| float | short | |
183-
| float | integer | |
184-
| float | long | |
185-
| float | double | |
186-
| float | string | There can be differences in precision. For example, the input "1.4E-45" will produce 1.0E-45 instead of 1.4E-45 |
187-
| double | boolean | |
188-
| double | byte | |
189-
| double | short | |
190-
| double | integer | |
191-
| double | long | |
192-
| double | float | |
193-
| double | string | There can be differences in precision. For example, the input "1.4E-45" will produce 1.0E-45 instead of 1.4E-45 |
194-
| decimal | boolean | |
195-
| decimal | byte | |
196-
| decimal | short | |
197-
| decimal | integer | |
198-
| decimal | long | |
199-
| decimal | float | |
200-
| decimal | double | |
201-
| decimal | decimal | |
202-
| decimal | string | There can be formatting differences in some case due to Spark using scientific notation where Comet does not |
203-
| string | boolean | |
204-
| string | byte | |
205-
| string | short | |
206-
| string | integer | |
207-
| string | long | |
208-
| string | float | |
209-
| string | double | |
210-
| string | date | Only supports years between 262143 BC and 262142 AD |
211-
| binary | string | |
212-
| date | string | |
213-
| timestamp | long | |
214-
| timestamp | string | |
215-
| timestamp | date | |
216-
<!-- prettier-ignore-end -->
217134
<!--END:COMPAT_CAST_TABLE-->
218135

219136
### Incompatible Casts
@@ -223,15 +140,6 @@ The following cast operations are not compatible with Spark for all inputs and a
223140
<!-- WARNING! DO NOT MANUALLY MODIFY CONTENT BETWEEN THE BEGIN AND END TAGS -->
224141

225142
<!--BEGIN:INCOMPAT_CAST_TABLE-->
226-
<!-- prettier-ignore-start -->
227-
| From Type | To Type | Notes |
228-
|-|-|-|
229-
| float | decimal | There can be rounding differences |
230-
| double | decimal | There can be rounding differences |
231-
| string | decimal | Does not support fullwidth unicode digits (e.g \\uFF10)
232-
or strings containing null bytes (e.g \\u0000) |
233-
| string | timestamp | Not all valid formats are supported |
234-
<!-- prettier-ignore-end -->
235143
<!--END:INCOMPAT_CAST_TABLE-->
236144

237145
### Unsupported Casts

native/spark-expr/src/conversion_funcs/cast.rs

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ use std::{
6666
num::Wrapping,
6767
sync::Arc,
6868
};
69+
use crate::EvalMode::Legacy;
6970

7071
static TIMESTAMP_FORMAT: Option<&str> = Some("%Y-%m-%d %H:%M:%S%.f");
7172

@@ -1123,10 +1124,10 @@ fn cast_array(
11231124
Ok(cast_with_options(&array, to_type, &CAST_OPTIONS)?)
11241125
}
11251126
(Binary, Utf8) => Ok(cast_binary_to_string::<i32>(&array, cast_options)?),
1126-
(Int8, Binary) => cast_whole_num_to_binary!(&array, Int8Array, 1),
1127-
(Int16, Binary) => cast_whole_num_to_binary!(&array, Int16Array, 2),
1128-
(Int32, Binary) => cast_whole_num_to_binary!(&array, Int32Array, 4),
1129-
(Int64, Binary) => cast_whole_num_to_binary!(&array, Int64Array, 8),
1127+
(Int8, Binary) if (eval_mode == Legacy) => cast_whole_num_to_binary!(&array, Int8Array, 1),
1128+
(Int16, Binary) if (eval_mode == Legacy) => cast_whole_num_to_binary!(&array, Int16Array, 2),
1129+
(Int32, Binary) if (eval_mode == Legacy) => cast_whole_num_to_binary!(&array, Int32Array, 4),
1130+
(Int64, Binary) if (eval_mode == Legacy) => cast_whole_num_to_binary!(&array, Int64Array, 8),
11301131
_ if cast_options.is_adapting_schema
11311132
|| is_datafusion_spark_compatible(from_type, to_type) =>
11321133
{

spark/src/main/scala/org/apache/comet/expressions/CometCast.scala

Lines changed: 80 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,6 @@ object CometCast extends CometExpressionSerde[Cast] with CometExprShim {
126126
isSupported(dt.elementType, DataTypes.StringType, timeZoneId, evalMode)
127127
case (dt: ArrayType, dt1: ArrayType) =>
128128
isSupported(dt.elementType, dt1.elementType, timeZoneId, evalMode)
129-
case (from: DataType, _: BinaryType) => canCastToBinary(from)
130129
case (dt: DataType, _) if dt.typeName == "timestamp_ntz" =>
131130
// https://github.com/apache/datafusion-comet/issues/378
132131
toType match {
@@ -148,13 +147,13 @@ object CometCast extends CometExpressionSerde[Cast] with CometExprShim {
148147
case (DataTypes.BooleanType, _) =>
149148
canCastFromBoolean(toType)
150149
case (DataTypes.ByteType, _) =>
151-
canCastFromByte(toType)
150+
canCastFromByte(toType, evalMode)
152151
case (DataTypes.ShortType, _) =>
153-
canCastFromShort(toType)
152+
canCastFromShort(toType, evalMode)
154153
case (DataTypes.IntegerType, _) =>
155-
canCastFromInt(toType)
154+
canCastFromInt(toType, evalMode)
156155
case (DataTypes.LongType, _) =>
157-
canCastFromLong(toType)
156+
canCastFromLong(toType, evalMode)
158157
case (DataTypes.FloatType, _) =>
159158
canCastFromFloat(toType)
160159
case (DataTypes.DoubleType, _) =>
@@ -269,53 +268,85 @@ object CometCast extends CometExpressionSerde[Cast] with CometExprShim {
269268
case _ => unsupported(DataTypes.BooleanType, toType)
270269
}
271270

272-
private def canCastFromByte(toType: DataType): SupportLevel = toType match {
273-
case DataTypes.BooleanType =>
274-
Compatible()
275-
case DataTypes.ShortType | DataTypes.IntegerType | DataTypes.LongType =>
276-
Compatible()
277-
case DataTypes.FloatType | DataTypes.DoubleType | _: DecimalType =>
278-
Compatible()
279-
case _ =>
280-
unsupported(DataTypes.ByteType, toType)
281-
}
271+
private def canCastFromByte(toType: DataType, evalMode: CometEvalMode.Value): SupportLevel =
272+
toType match {
273+
case DataTypes.BooleanType =>
274+
Compatible()
275+
case DataTypes.ShortType | DataTypes.IntegerType | DataTypes.LongType =>
276+
Compatible()
277+
case DataTypes.FloatType | DataTypes.DoubleType | _: DecimalType =>
278+
Compatible()
279+
case DataTypes.BinaryType =>
280+
if (evalMode == CometEvalMode.LEGACY) {
281+
Compatible()
282+
} else {
283+
Unsupported(
284+
Some(s"Spark does not support byte to binary conversion in ${evalMode} eval mode"))
285+
}
286+
case _ =>
287+
unsupported(DataTypes.ByteType, toType)
288+
}
282289

283-
private def canCastFromShort(toType: DataType): SupportLevel = toType match {
284-
case DataTypes.BooleanType =>
285-
Compatible()
286-
case DataTypes.ByteType | DataTypes.IntegerType | DataTypes.LongType =>
287-
Compatible()
288-
case DataTypes.FloatType | DataTypes.DoubleType | _: DecimalType =>
289-
Compatible()
290-
case _ =>
291-
unsupported(DataTypes.ShortType, toType)
292-
}
290+
private def canCastFromShort(toType: DataType, evalMode: CometEvalMode.Value): SupportLevel =
291+
toType match {
292+
case DataTypes.BooleanType =>
293+
Compatible()
294+
case DataTypes.ByteType | DataTypes.IntegerType | DataTypes.LongType =>
295+
Compatible()
296+
case DataTypes.FloatType | DataTypes.DoubleType | _: DecimalType =>
297+
Compatible()
298+
case DataTypes.BinaryType =>
299+
if (evalMode == CometEvalMode.LEGACY) {
300+
Compatible()
301+
} else {
302+
Unsupported(
303+
Some(s"Spark does not support short to binary conversion in ${evalMode} eval mode"))
304+
}
305+
case _ =>
306+
unsupported(DataTypes.ShortType, toType)
307+
}
293308

294-
private def canCastFromInt(toType: DataType): SupportLevel = toType match {
295-
case DataTypes.BooleanType =>
296-
Compatible()
297-
case DataTypes.ByteType | DataTypes.ShortType | DataTypes.LongType =>
298-
Compatible()
299-
case DataTypes.FloatType | DataTypes.DoubleType =>
300-
Compatible()
301-
case _: DecimalType =>
302-
Compatible()
303-
case _ =>
304-
unsupported(DataTypes.IntegerType, toType)
305-
}
309+
private def canCastFromInt(toType: DataType, evalMode: CometEvalMode.Value): SupportLevel =
310+
toType match {
311+
case DataTypes.BooleanType =>
312+
Compatible()
313+
case DataTypes.ByteType | DataTypes.ShortType | DataTypes.LongType =>
314+
Compatible()
315+
case DataTypes.FloatType | DataTypes.DoubleType =>
316+
Compatible()
317+
case _: DecimalType =>
318+
Compatible()
319+
case DataTypes.BinaryType =>
320+
if (evalMode == CometEvalMode.LEGACY) {
321+
Compatible()
322+
} else {
323+
Unsupported(
324+
Some(s"Spark does not support int to binary conversion in ${evalMode} eval mode"))
325+
}
326+
case _ =>
327+
unsupported(DataTypes.IntegerType, toType)
328+
}
306329

307-
private def canCastFromLong(toType: DataType): SupportLevel = toType match {
308-
case DataTypes.BooleanType =>
309-
Compatible()
310-
case DataTypes.ByteType | DataTypes.ShortType | DataTypes.IntegerType =>
311-
Compatible()
312-
case DataTypes.FloatType | DataTypes.DoubleType =>
313-
Compatible()
314-
case _: DecimalType =>
315-
Compatible()
316-
case _ =>
317-
unsupported(DataTypes.LongType, toType)
318-
}
330+
private def canCastFromLong(toType: DataType, evalMode: CometEvalMode.Value): SupportLevel =
331+
toType match {
332+
case DataTypes.BooleanType =>
333+
Compatible()
334+
case DataTypes.ByteType | DataTypes.ShortType | DataTypes.IntegerType =>
335+
Compatible()
336+
case DataTypes.FloatType | DataTypes.DoubleType =>
337+
Compatible()
338+
case _: DecimalType =>
339+
Compatible()
340+
case DataTypes.BinaryType =>
341+
if (evalMode == CometEvalMode.LEGACY) {
342+
Compatible()
343+
} else {
344+
Unsupported(
345+
Some(s"Spark does not support long to binary conversion in ${evalMode} eval mode"))
346+
}
347+
case _ =>
348+
unsupported(DataTypes.LongType, toType)
349+
}
319350

320351
private def canCastFromFloat(toType: DataType): SupportLevel = toType match {
321352
case DataTypes.BooleanType | DataTypes.DoubleType | DataTypes.ByteType | DataTypes.ShortType |

0 commit comments

Comments
 (0)