1919
2020use arrow:: array:: {
2121 builder:: { Int64Builder , StringBuilder } ,
22- Float32Array , Float64Array , RecordBatch , StringArray , UInt64Array ,
22+ ArrayRef , Float32Array , Float64Array , RecordBatch , StringArray , StringViewBuilder ,
23+ UInt64Array ,
2324} ;
2425use arrow:: datatypes:: { DataType , Field , Schema , SchemaRef } ;
2526use datafusion:: datasource:: MemTable ;
@@ -158,13 +159,39 @@ pub fn create_record_batches(
158159 . collect :: < Vec < _ > > ( )
159160}
160161
162+ /// An enum that wraps either a regular StringBuilder or a GenericByteViewBuilder
163+ /// so that both can be used interchangeably.
164+ enum TraceIdBuilder {
165+ Utf8 ( StringBuilder ) ,
166+ Utf8View ( StringViewBuilder ) ,
167+ }
168+
169+ impl TraceIdBuilder {
170+ /// Append a value to the builder.
171+ fn append_value ( & mut self , value : & str ) {
172+ match self {
173+ TraceIdBuilder :: Utf8 ( builder) => builder. append_value ( value) ,
174+ TraceIdBuilder :: Utf8View ( builder) => builder. append_value ( value) ,
175+ }
176+ }
177+
178+ /// Finish building and return the ArrayRef.
179+ fn finish ( self ) -> ArrayRef {
180+ match self {
181+ TraceIdBuilder :: Utf8 ( mut builder) => Arc :: new ( builder. finish ( ) ) ,
182+ TraceIdBuilder :: Utf8View ( mut builder) => Arc :: new ( builder. finish ( ) ) ,
183+ }
184+ }
185+ }
186+
161187/// Create time series data with `partition_cnt` partitions and `sample_cnt` rows per partition
162188/// in ascending order, if `asc` is true, otherwise randomly sampled using a Pareto distribution
163189#[ allow( dead_code) ]
164190pub ( crate ) fn make_data (
165191 partition_cnt : i32 ,
166192 sample_cnt : i32 ,
167193 asc : bool ,
194+ use_view : bool ,
168195) -> Result < ( Arc < Schema > , Vec < Vec < RecordBatch > > ) , DataFusionError > {
169196 // constants observed from trace data
170197 let simultaneous_group_cnt = 2000 ;
@@ -177,11 +204,17 @@ pub(crate) fn make_data(
177204 let mut rng = rand:: rngs:: SmallRng :: from_seed ( [ 0 ; 32 ] ) ;
178205
179206 // populate data
180- let schema = test_schema ( ) ;
207+ let schema = test_schema ( use_view ) ;
181208 let mut partitions = vec ! [ ] ;
182209 let mut cur_time = 16909000000000i64 ;
183210 for _ in 0 ..partition_cnt {
184- let mut id_builder = StringBuilder :: new ( ) ;
211+ // Choose the appropriate builder based on use_view.
212+ let mut id_builder = if use_view {
213+ TraceIdBuilder :: Utf8View ( StringViewBuilder :: new ( ) )
214+ } else {
215+ TraceIdBuilder :: Utf8 ( StringBuilder :: new ( ) )
216+ } ;
217+
185218 let mut ts_builder = Int64Builder :: new ( ) ;
186219 let gen_id = |rng : & mut rand:: rngs:: SmallRng | {
187220 rng. gen :: < [ u8 ; 16 ] > ( )
@@ -230,10 +263,19 @@ pub(crate) fn make_data(
230263 Ok ( ( schema, partitions) )
231264}
232265
233- /// The Schema used by make_data
234- fn test_schema ( ) -> SchemaRef {
235- Arc :: new ( Schema :: new ( vec ! [
236- Field :: new( "trace_id" , DataType :: Utf8 , false ) ,
237- Field :: new( "timestamp_ms" , DataType :: Int64 , false ) ,
238- ] ) )
266+ /// Returns a Schema based on the use_view flag
267+ fn test_schema ( use_view : bool ) -> SchemaRef {
268+ if use_view {
269+ // Return Utf8View schema
270+ Arc :: new ( Schema :: new ( vec ! [
271+ Field :: new( "trace_id" , DataType :: Utf8View , false ) ,
272+ Field :: new( "timestamp_ms" , DataType :: Int64 , false ) ,
273+ ] ) )
274+ } else {
275+ // Return regular Utf8 schema
276+ Arc :: new ( Schema :: new ( vec ! [
277+ Field :: new( "trace_id" , DataType :: Utf8 , false ) ,
278+ Field :: new( "timestamp_ms" , DataType :: Int64 , false ) ,
279+ ] ) )
280+ }
239281}
0 commit comments