Skip to content

Commit 5651fdc

Browse files
committed
float_to_timestamp
1 parent 3e9f850 commit 5651fdc

5 files changed

Lines changed: 413 additions & 13 deletions

File tree

native/spark-expr/Cargo.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,3 +103,7 @@ path = "tests/spark_expr_reg.rs"
103103
[[bench]]
104104
name = "cast_from_boolean"
105105
harness = false
106+
107+
[[bench]]
108+
name = "cast_non_int_numeric_timestamp"
109+
harness = false
Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
use arrow::array::builder::{BooleanBuilder, Decimal128Builder, Float32Builder, Float64Builder};
19+
use arrow::array::RecordBatch;
20+
use arrow::datatypes::{DataType, Field, Schema, TimeUnit};
21+
use criterion::{criterion_group, criterion_main, Criterion};
22+
use datafusion::physical_expr::{expressions::Column, PhysicalExpr};
23+
use datafusion_comet_spark_expr::{Cast, EvalMode, SparkCastOptions};
24+
use std::sync::Arc;
25+
26+
const BATCH_SIZE: usize = 8192;
27+
28+
fn criterion_benchmark(c: &mut Criterion) {
29+
let spark_cast_options = SparkCastOptions::new(EvalMode::Legacy, "UTC", false);
30+
let timestamp_type = DataType::Timestamp(TimeUnit::Microsecond, Some("UTC".into()));
31+
32+
let mut group = c.benchmark_group("cast_non_int_numeric_to_timestamp");
33+
34+
// Float32 -> Timestamp
35+
let batch_f32 = create_float32_batch();
36+
let expr_f32 = Arc::new(Column::new("a", 0));
37+
let cast_f32_to_ts = Cast::new(expr_f32, timestamp_type.clone(), spark_cast_options.clone());
38+
group.bench_function("cast_f32_to_timestamp", |b| {
39+
b.iter(|| cast_f32_to_ts.evaluate(&batch_f32).unwrap());
40+
});
41+
42+
// Float64 -> Timestamp
43+
let batch_f64 = create_float64_batch();
44+
let expr_f64 = Arc::new(Column::new("a", 0));
45+
let cast_f64_to_ts = Cast::new(expr_f64, timestamp_type.clone(), spark_cast_options.clone());
46+
group.bench_function("cast_f64_to_timestamp", |b| {
47+
b.iter(|| cast_f64_to_ts.evaluate(&batch_f64).unwrap());
48+
});
49+
50+
// Boolean -> Timestamp
51+
let batch_bool = create_boolean_batch();
52+
let expr_bool = Arc::new(Column::new("a", 0));
53+
let cast_bool_to_ts = Cast::new(expr_bool, timestamp_type.clone(), spark_cast_options.clone());
54+
group.bench_function("cast_bool_to_timestamp", |b| {
55+
b.iter(|| cast_bool_to_ts.evaluate(&batch_bool).unwrap());
56+
});
57+
58+
// Decimal128 -> Timestamp
59+
let batch_decimal = create_decimal128_batch();
60+
let expr_decimal = Arc::new(Column::new("a", 0));
61+
let cast_decimal_to_ts =
62+
Cast::new(expr_decimal, timestamp_type.clone(), spark_cast_options.clone());
63+
group.bench_function("cast_decimal_to_timestamp", |b| {
64+
b.iter(|| cast_decimal_to_ts.evaluate(&batch_decimal).unwrap());
65+
});
66+
67+
group.finish();
68+
}
69+
70+
fn create_float32_batch() -> RecordBatch {
71+
let schema = Arc::new(Schema::new(vec![Field::new("a", DataType::Float32, true)]));
72+
let mut b = Float32Builder::with_capacity(BATCH_SIZE);
73+
for i in 0..BATCH_SIZE {
74+
if i % 10 == 0 {
75+
b.append_null();
76+
} else {
77+
b.append_value(rand::random::<f32>());
78+
}
79+
}
80+
RecordBatch::try_new(schema, vec![Arc::new(b.finish())]).unwrap()
81+
}
82+
83+
fn create_float64_batch() -> RecordBatch {
84+
let schema = Arc::new(Schema::new(vec![Field::new("a", DataType::Float64, true)]));
85+
let mut b = Float64Builder::with_capacity(BATCH_SIZE);
86+
for i in 0..BATCH_SIZE {
87+
if i % 10 == 0 {
88+
b.append_null();
89+
} else {
90+
b.append_value(rand::random::<f64>());
91+
}
92+
}
93+
RecordBatch::try_new(schema, vec![Arc::new(b.finish())]).unwrap()
94+
}
95+
96+
fn create_boolean_batch() -> RecordBatch {
97+
let schema = Arc::new(Schema::new(vec![Field::new("a", DataType::Boolean, true)]));
98+
let mut b = BooleanBuilder::with_capacity(BATCH_SIZE);
99+
for i in 0..BATCH_SIZE {
100+
if i % 10 == 0 {
101+
b.append_null();
102+
} else {
103+
b.append_value(rand::random::<bool>());
104+
}
105+
}
106+
RecordBatch::try_new(schema, vec![Arc::new(b.finish())]).unwrap()
107+
}
108+
109+
fn create_decimal128_batch() -> RecordBatch {
110+
let schema = Arc::new(Schema::new(vec![Field::new(
111+
"a",
112+
DataType::Decimal128(18, 6),
113+
true,
114+
)]));
115+
let mut b = Decimal128Builder::with_capacity(BATCH_SIZE);
116+
for i in 0..BATCH_SIZE {
117+
if i % 10 == 0 {
118+
b.append_null();
119+
} else {
120+
b.append_value(rand::random::<i64>() as i128);
121+
}
122+
}
123+
let array = b.finish().with_precision_and_scale(18, 6).unwrap();
124+
RecordBatch::try_new(schema, vec![Arc::new(array)]).unwrap()
125+
}
126+
127+
fn config() -> Criterion {
128+
Criterion::default()
129+
}
130+
131+
criterion_group! {
132+
name = benches;
133+
config = config();
134+
targets = criterion_benchmark
135+
}
136+
criterion_main!(benches);

0 commit comments

Comments
 (0)