Skip to content

Commit 736bc44

Browse files
committed
Add producer.rs workarounds (#4)
1 parent e443304 commit 736bc44

2 files changed

Lines changed: 39 additions & 42 deletions

File tree

datafusion/substrait/src/logical_plan/producer.rs

Lines changed: 37 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -25,14 +25,16 @@ use datafusion::logical_expr::{
2525
TryCast, Union, Values, Window, WindowFrameUnits,
2626
};
2727
use datafusion::{
28-
arrow::datatypes::{DataType, TimeUnit},
28+
arrow::datatypes::DataType,
2929
error::{DataFusionError, Result},
3030
logical_expr::{WindowFrame, WindowFrameBound},
3131
prelude::JoinType,
3232
scalar::ScalarValue,
3333
};
3434

3535
use crate::extensions::Extensions;
36+
#[allow(deprecated)]
37+
use crate::variation_const::TIMESTAMP_NANO_TYPE_VARIATION_REF;
3638
use crate::variation_const::{
3739
DATE_32_TYPE_VARIATION_REF, DATE_64_TYPE_VARIATION_REF,
3840
DECIMAL_128_TYPE_VARIATION_REF, DECIMAL_256_TYPE_VARIATION_REF,
@@ -1848,31 +1850,16 @@ fn to_substrait_type(dt: &DataType, nullable: bool) -> Result<substrait::proto::
18481850
nullability,
18491851
})),
18501852
}),
1851-
DataType::Timestamp(unit, tz) => {
1852-
let precision = match unit {
1853-
TimeUnit::Second => 0,
1854-
TimeUnit::Millisecond => 3,
1855-
TimeUnit::Microsecond => 6,
1856-
TimeUnit::Nanosecond => 9,
1857-
};
1858-
let kind = match tz {
1859-
None => r#type::Kind::PrecisionTimestamp(r#type::PrecisionTimestamp {
1860-
type_variation_reference: DEFAULT_TYPE_VARIATION_REF,
1853+
DataType::Timestamp(_unit, _) => {
1854+
// TODO: DataDog-specific workaround, don't commit upstream
1855+
#[allow(deprecated)]
1856+
let type_variation_reference = TIMESTAMP_NANO_TYPE_VARIATION_REF;
1857+
Ok(substrait::proto::Type {
1858+
kind: Some(r#type::Kind::Timestamp(r#type::Timestamp {
1859+
type_variation_reference,
18611860
nullability,
1862-
precision,
1863-
}),
1864-
Some(_) => {
1865-
// If timezone is present, no matter what the actual tz value is, it indicates the
1866-
// value of the timestamp is tied to UTC epoch. That's all that Substrait cares about.
1867-
// As the timezone is lost, this conversion may be lossy for downstream use of the value.
1868-
r#type::Kind::PrecisionTimestampTz(r#type::PrecisionTimestampTz {
1869-
type_variation_reference: DEFAULT_TYPE_VARIATION_REF,
1870-
nullability,
1871-
precision,
1872-
})
1873-
}
1874-
};
1875-
Ok(substrait::proto::Type { kind: Some(kind) })
1861+
})),
1862+
})
18761863
}
18771864
DataType::Date32 => Ok(substrait::proto::Type {
18781865
kind: Some(r#type::Kind::Date(r#type::Date {
@@ -2027,6 +2014,8 @@ fn to_substrait_type(dt: &DataType, nullable: bool) -> Result<substrait::proto::
20272014
precision: *p as i32,
20282015
})),
20292016
}),
2017+
// TODO: DataDog-specific workaround, don't commit upstream
2018+
DataType::Dictionary(_, dt) => to_substrait_type(dt, nullable),
20302019
_ => not_impl_err!("Unsupported cast type: {dt:?}"),
20312020
}
20322021
}
@@ -2419,6 +2408,10 @@ fn to_substrait_literal(
24192408
}),
24202409
DEFAULT_TYPE_VARIATION_REF,
24212410
),
2411+
// TODO: DataDog-specific workaround, don't commit upstream
2412+
ScalarValue::Dictionary(_, value) => {
2413+
return to_substrait_literal(producer, value)
2414+
}
24222415
_ => (
24232416
not_impl_err!("Unsupported literal: {value:?}")?,
24242417
DEFAULT_TYPE_VARIATION_REF,
@@ -2621,17 +2614,18 @@ mod test {
26212614
round_trip_literal(ScalarValue::UInt64(Some(u64::MIN)))?;
26222615
round_trip_literal(ScalarValue::UInt64(Some(u64::MAX)))?;
26232616

2624-
for (ts, tz) in [
2625-
(Some(12345), None),
2626-
(None, None),
2627-
(Some(12345), Some("UTC".into())),
2628-
(None, Some("UTC".into())),
2629-
] {
2630-
round_trip_literal(ScalarValue::TimestampSecond(ts, tz.clone()))?;
2631-
round_trip_literal(ScalarValue::TimestampMillisecond(ts, tz.clone()))?;
2632-
round_trip_literal(ScalarValue::TimestampMicrosecond(ts, tz.clone()))?;
2633-
round_trip_literal(ScalarValue::TimestampNanosecond(ts, tz))?;
2634-
}
2617+
// TODO: DataDog-specific workaround, don't commit upstream
2618+
// for (ts, tz) in [
2619+
// (Some(12345), None),
2620+
// (None, None),
2621+
// (Some(12345), Some("UTC".into())),
2622+
// (None, Some("UTC".into())),
2623+
// ] {
2624+
// round_trip_literal(ScalarValue::TimestampSecond(ts, tz.clone()))?;
2625+
// round_trip_literal(ScalarValue::TimestampMillisecond(ts, tz.clone()))?;
2626+
// round_trip_literal(ScalarValue::TimestampMicrosecond(ts, tz.clone()))?;
2627+
// round_trip_literal(ScalarValue::TimestampNanosecond(ts, tz))?;
2628+
// }
26352629

26362630
round_trip_literal(ScalarValue::List(ScalarValue::new_list_nullable(
26372631
&[ScalarValue::Float32(Some(1.0))],
@@ -2730,12 +2724,13 @@ mod test {
27302724
round_trip_type(DataType::Float32)?;
27312725
round_trip_type(DataType::Float64)?;
27322726

2733-
for tz in [None, Some("UTC".into())] {
2734-
round_trip_type(DataType::Timestamp(TimeUnit::Second, tz.clone()))?;
2735-
round_trip_type(DataType::Timestamp(TimeUnit::Millisecond, tz.clone()))?;
2736-
round_trip_type(DataType::Timestamp(TimeUnit::Microsecond, tz.clone()))?;
2737-
round_trip_type(DataType::Timestamp(TimeUnit::Nanosecond, tz))?;
2738-
}
2727+
// TODO: DataDog-specific workaround, don't commit upstream
2728+
// for tz in [None, Some("UTC".into())] {
2729+
// round_trip_type(DataType::Timestamp(TimeUnit::Second, tz.clone()))?;
2730+
// round_trip_type(DataType::Timestamp(TimeUnit::Millisecond, tz.clone()))?;
2731+
// round_trip_type(DataType::Timestamp(TimeUnit::Microsecond, tz.clone()))?;
2732+
// round_trip_type(DataType::Timestamp(TimeUnit::Nanosecond, tz))?;
2733+
// }
27392734

27402735
round_trip_type(DataType::Date32)?;
27412736
round_trip_type(DataType::Date64)?;

datafusion/substrait/tests/cases/roundtrip_logical_plan.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1023,6 +1023,7 @@ async fn qualified_catalog_schema_table_reference() -> Result<()> {
10231023
/// - List, this nested type is not supported in arrow_cast
10241024
/// - Decimal128 and Decimal256, them will fallback to UTF8 cast expr rather than plain literal.
10251025
#[tokio::test]
1026+
#[ignore] // TODO: DataDog-specific workaround, don't commit upstream
10261027
async fn all_type_literal() -> Result<()> {
10271028
roundtrip_all_types(
10281029
"select * from data where
@@ -1203,6 +1204,7 @@ async fn duplicate_column() -> Result<()> {
12031204

12041205
/// Construct a plan that cast columns. Only those SQL types are supported for now.
12051206
#[tokio::test]
1207+
#[ignore] // TODO: DataDog-specific workaround, don't commit upstream
12061208
async fn new_test_grammar() -> Result<()> {
12071209
roundtrip_all_types(
12081210
"select

0 commit comments

Comments
 (0)