Skip to content

Commit 32f51ec

Browse files
authored
Add benchmark_runner for sql_benchmarks with help and list commands (#22001)
## Which issue does this PR close? - #21937 (partial) ## Rationale for this change This adds a standalone benchmark runner CLI for discovering benchmark suites and inspecting available commands. The runner provides a foundation for invoking benchmark workflows through a structured interface instead of relying only on shell scripts. This is the first of 3 PR's to complete this feature. ## What changes are included in this PR? - Adds the `benchmark_runner` binary and supporting module structure. - Adds CLI support for help and suite listing commands. - Adds suite discovery/loading support and output formatting helpers. - Adds a TPCH suite manifest for benchmark runner discovery. - Wires the new benchmark runner code into the benchmarks crate. ## Are these changes tested? Yes. ## Are there any user-facing changes? Yes. This adds a new `benchmark_runner` benchmark CLI with help and list functionality. ## Use of AI This code was partially written by Codex but edited, reviewed and tested by @Omega359
1 parent fa03a4c commit 32f51ec

9 files changed

Lines changed: 960 additions & 12 deletions

File tree

Cargo.lock

Lines changed: 43 additions & 11 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

benchmarks/Cargo.toml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,10 +40,11 @@ snmalloc = ["snmalloc-rs"]
4040
mimalloc_extended = ["libmimalloc-sys/extended"]
4141

4242
[dependencies]
43+
anstream = "1.0"
4344
arrow = { workspace = true }
4445
async-trait = "0.1"
4546
bytes = { workspace = true }
46-
clap = { version = "4.6.0", features = ["derive", "env"] }
47+
clap = { version = "4.6.1", features = ["derive", "env", "color"] }
4748
criterion = { workspace = true, features = ["html_reports"] }
4849
datafusion = { workspace = true, default-features = true }
4950
datafusion-common = { workspace = true, default-features = true }
@@ -61,6 +62,7 @@ serde_json = { workspace = true }
6162
snmalloc-rs = { version = "0.7", optional = true }
6263
tokio = { workspace = true, features = ["rt-multi-thread", "parking_lot"] }
6364
tokio-util = { version = "0.7.17" }
65+
toml = "1.1"
6466

6567
[dev-dependencies]
6668
datafusion-proto = { workspace = true }
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
name = "tpch"
2+
description = "TPC-H SQL benchmarks"
3+
4+
[[options]]
5+
name = "format"
6+
short = "f"
7+
default = "parquet"
8+
values = ["parquet", "csv", "mem"]
9+
help = "Selects the TPC-H data format."
10+
11+
[[options]]
12+
name = "scale-factor"
13+
short = "sf"
14+
default = "1"
15+
values = ["1", "10"]
16+
help = "Selects the TPC-H scale factor."
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
//! CLI construction and argument conversion for `benchmark_runner`.
19+
//!
20+
//! This module owns the clap command tree for the initial runner surface:
21+
//! top-level help and suite listing.
22+
23+
use clap::builder::styling::{AnsiColor, Styles};
24+
use clap::{ArgMatches, Command};
25+
use datafusion_common::{Result, exec_datafusion_err};
26+
27+
const HELP_STYLES: Styles = Styles::styled()
28+
.header(AnsiColor::Green.on_default().bold())
29+
.usage(AnsiColor::Green.on_default().bold())
30+
.literal(AnsiColor::Cyan.on_default().bold())
31+
.placeholder(AnsiColor::Cyan.on_default());
32+
33+
#[derive(Debug)]
34+
pub enum RunnerCommand {
35+
Help,
36+
List,
37+
}
38+
39+
/// Builds the command tree for help and suite listing.
40+
pub fn build_cli() -> Command {
41+
Command::new("benchmark_runner")
42+
.about("Inspect DataFusion SQL benchmark suites.")
43+
.styles(HELP_STYLES)
44+
.subcommand_required(false)
45+
.arg_required_else_help(false)
46+
.disable_help_subcommand(true)
47+
.subcommand(Command::new("help").about("Print help"))
48+
.subcommand(Command::new("list").about("List SQL benchmark suites"))
49+
}
50+
51+
/// Converts clap matches into a typed command.
52+
pub(crate) fn command_from_matches(matches: &ArgMatches) -> Result<RunnerCommand> {
53+
match matches.subcommand() {
54+
None | Some(("help", _)) => Ok(RunnerCommand::Help),
55+
Some(("list", _)) => Ok(RunnerCommand::List),
56+
Some((name, _)) => Err(exec_datafusion_err!("Unknown command '{name}'")),
57+
}
58+
}
59+
60+
#[cfg(test)]
61+
mod tests {
62+
use super::*;
63+
64+
#[test]
65+
fn list_rejects_unrecognized_options() {
66+
let matches =
67+
build_cli().try_get_matches_from(["benchmark_runner", "list", "--format"]);
68+
69+
assert!(matches.is_err(), "{matches:?}");
70+
}
71+
72+
#[test]
73+
fn help_mentions_list_command() {
74+
let err = build_cli()
75+
.try_get_matches_from(["benchmark_runner", "--help"])
76+
.unwrap_err();
77+
let help = err.to_string();
78+
79+
assert!(help.contains("list"));
80+
}
81+
}
Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
//! Command-line inspection for SQL benchmark suites.
19+
//!
20+
//! This module backs the `benchmark_runner` binary. The initial command
21+
//! surface lists discovered SQL benchmark suites from `.suite` files and
22+
//! prints the top-level help.
23+
//!
24+
//! Common invocations:
25+
//!
26+
//! ```text
27+
//! cargo run --bin benchmark_runner -- --help
28+
//! cargo run --release --bin benchmark_runner -- list
29+
//! ```
30+
//!
31+
//! The public entry point is [`run_cli`]. The submodules are kept private so
32+
//! the command-line flow remains the single supported API:
33+
//!
34+
//! - `cli` builds the clap command tree and parses the selected command.
35+
//! - `suite` loads `.suite` metadata and discovers benchmark query files.
36+
//! - `output` formats colored `list` command output.
37+
38+
mod cli;
39+
mod output;
40+
mod suite;
41+
42+
use crate::benchmark_runner::cli::{RunnerCommand, build_cli, command_from_matches};
43+
use crate::benchmark_runner::output::format_suite_list_styled;
44+
use crate::benchmark_runner::suite::SuiteRegistry;
45+
use datafusion::error::Result;
46+
use datafusion_common::DataFusionError;
47+
use std::io::Write;
48+
use std::path::PathBuf;
49+
50+
/// Runs the benchmark runner command-line flow for the provided argument list.
51+
///
52+
/// This discovers suite metadata, parses the help/list command, and dispatches
53+
/// to the selected implementation.
54+
pub fn run_cli<I, T>(args: I) -> Result<()>
55+
where
56+
I: IntoIterator<Item = T>,
57+
T: Clone + Into<std::ffi::OsString>,
58+
{
59+
let benchmark_dir = default_benchmark_dir();
60+
let registry = SuiteRegistry::discover(&benchmark_dir)?;
61+
let mut cli = build_cli();
62+
let matches = match cli.try_get_matches_from_mut(args) {
63+
Ok(matches) => matches,
64+
Err(e) if e.kind() == clap::error::ErrorKind::DisplayHelp => {
65+
e.print()?;
66+
return Ok(());
67+
}
68+
Err(e) => return Err(DataFusionError::External(Box::new(e))),
69+
};
70+
let command = command_from_matches(&matches)?;
71+
72+
match command {
73+
RunnerCommand::Help => {
74+
cli.print_long_help()?;
75+
println!();
76+
}
77+
RunnerCommand::List => {
78+
print_styled(&format_suite_list_styled(&registry)?)?;
79+
}
80+
}
81+
82+
Ok(())
83+
}
84+
85+
/// Writes already styled output through `anstream` so ANSI color handling
86+
/// matches clap help output on supported terminals.
87+
fn print_styled(output: &str) -> Result<()> {
88+
let mut stdout = anstream::stdout();
89+
90+
write!(&mut stdout, "{output}")
91+
.map_err(|e| DataFusionError::External(Box::new(e)))?;
92+
Ok(())
93+
}
94+
95+
/// Resolves the SQL benchmark root from either the repository root or the
96+
/// benchmarks crate manifest directory.
97+
fn default_benchmark_dir() -> PathBuf {
98+
let repo_root_path = PathBuf::from("benchmarks/sql_benchmarks");
99+
if repo_root_path.exists() {
100+
repo_root_path
101+
} else {
102+
PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("sql_benchmarks")
103+
}
104+
}

0 commit comments

Comments
 (0)