@@ -41,21 +41,41 @@ impl ClickBenchBenchmark {
4141 }
4242}
4343
44+ /// ClickBench sorted by event date and event time.
45+ pub struct ClickBenchSortedBenchmark {
46+ pub queries_file : Option < String > ,
47+ pub data_url : Url ,
48+ }
49+
50+ impl ClickBenchSortedBenchmark {
51+ /// Create the sorted ClickBench benchmark, optionally using a remote data directory.
52+ pub fn new ( use_remote_data_dir : Option < String > ) -> Result < Self > {
53+ Ok ( Self {
54+ queries_file : None ,
55+ data_url : resolve_data_url ( use_remote_data_dir. as_deref ( ) , CLICKBENCH_SORTED_NAME ) ?,
56+ } )
57+ }
58+ }
59+
60+ fn read_clickbench_queries ( queries_file : Option < & str > ) -> Result < Vec < ( usize , String ) > > {
61+ let queries_filepath = match queries_file {
62+ Some ( file) => file. into ( ) ,
63+ None => Path :: new ( env ! ( "CARGO_MANIFEST_DIR" ) ) . join ( "clickbench_queries.sql" ) ,
64+ } ;
65+
66+ Ok ( fs:: read_to_string ( queries_filepath) ?
67+ . split ( ';' )
68+ . map ( |s| s. trim ( ) )
69+ . filter ( |s| !s. is_empty ( ) )
70+ . map ( |s| s. to_string ( ) )
71+ . enumerate ( )
72+ . collect ( ) )
73+ }
74+
4475#[ async_trait:: async_trait]
4576impl Benchmark for ClickBenchBenchmark {
4677 fn queries ( & self ) -> Result < Vec < ( usize , String ) > > {
47- let queries_filepath = match & self . queries_file {
48- Some ( file) => file. into ( ) ,
49- None => Path :: new ( env ! ( "CARGO_MANIFEST_DIR" ) ) . join ( "clickbench_queries.sql" ) ,
50- } ;
51-
52- Ok ( fs:: read_to_string ( queries_filepath) ?
53- . split ( ';' )
54- . map ( |s| s. trim ( ) )
55- . filter ( |s| !s. is_empty ( ) )
56- . map ( |s| s. to_string ( ) )
57- . enumerate ( )
58- . collect ( ) )
78+ read_clickbench_queries ( self . queries_file . as_deref ( ) )
5979 }
6080
6181 async fn generate_base_data ( & self ) -> Result < ( ) > {
@@ -70,10 +90,7 @@ impl Benchmark for ClickBenchBenchmark {
7090 }
7191
7292 fn expected_row_counts ( & self ) -> Option < Vec < usize > > {
73- Some ( vec ! [
74- 1 , 1 , 1 , 1 , 1 , 1 , 1 , 18 , 10 , 10 , 10 , 10 , 10 , 10 , 10 , 10 , 10 , 10 , 10 , 4 , 1 , 10 , 10 , 10 ,
75- 10 , 10 , 10 , 25 , 25 , 1 , 10 , 10 , 10 , 10 , 10 , 10 , 10 , 10 , 10 , 10 , 10 , 10 , 10 ,
76- ] )
93+ Some ( clickbench_expected_row_counts ( ) )
7794 }
7895
7996 fn dataset ( & self ) -> BenchmarkDataset {
@@ -99,6 +116,48 @@ impl Benchmark for ClickBenchBenchmark {
99116 }
100117}
101118
119+ #[ async_trait:: async_trait]
120+ impl Benchmark for ClickBenchSortedBenchmark {
121+ fn queries ( & self ) -> Result < Vec < ( usize , String ) > > {
122+ Ok ( read_clickbench_queries ( self . queries_file . as_deref ( ) ) ?
123+ . into_iter ( )
124+ . filter ( |( idx, _) | CLICKBENCH_SORTED_QUERY_IDS . contains ( idx) )
125+ . collect ( ) )
126+ }
127+
128+ async fn generate_base_data ( & self ) -> Result < ( ) > {
129+ if self . data_url . scheme ( ) != "file" {
130+ return Ok ( ( ) ) ;
131+ }
132+
133+ generate_sorted_clickbench ( CLICKBENCH_SORTED_NAME . to_data_path ( ) ) . await
134+ }
135+
136+ fn expected_row_counts ( & self ) -> Option < Vec < usize > > {
137+ Some ( clickbench_expected_row_counts ( ) )
138+ }
139+
140+ fn dataset ( & self ) -> BenchmarkDataset {
141+ BenchmarkDataset :: ClickBenchSorted
142+ }
143+
144+ fn dataset_name ( & self ) -> & str {
145+ CLICKBENCH_SORTED_NAME
146+ }
147+
148+ fn dataset_display ( & self ) -> String {
149+ CLICKBENCH_SORTED_NAME . to_string ( )
150+ }
151+
152+ fn data_url ( & self ) -> & Url {
153+ & self . data_url
154+ }
155+
156+ fn table_specs ( & self ) -> Vec < TableSpec > {
157+ vec ! [ TableSpec :: new( "hits" , Some ( HITS_SCHEMA . clone( ) ) ) ]
158+ }
159+ }
160+
102161fn clickbench_flavor ( flavor : Flavor ) -> String {
103162 format ! ( "clickbench_{flavor}" )
104163}
0 commit comments