|
| 1 | +// SPDX-License-Identifier: Apache-2.0 |
| 2 | +// SPDX-FileCopyrightText: Copyright the Vortex contributors |
| 3 | + |
| 4 | +//! Editorial group descriptions, ported from v2. |
| 5 | +//! |
| 6 | +//! These strings are the source of truth for the hover tooltip rendered on |
| 7 | +//! every `<details>` group title on the landing page and on the |
| 8 | +//! `/group/{slug}` permalink. They are deliberately editorial and |
| 9 | +//! hand-maintained — derived from the group *name*, not from the database — |
| 10 | +//! so that adding a new group's blurb is a one-line edit here rather than a |
| 11 | +//! schema or ingest change. |
| 12 | +//! |
| 13 | +//! Source of truth in v2 (kept verbatim where applicable): |
| 14 | +//! - `benchmarks-website/src/utils.js` — `getBenchmarkDescription` |
| 15 | +//! - `benchmarks-website/src/config.js` — `BENCHMARK_DESCRIPTIONS`, |
| 16 | +//! `QUERY_SUITES.description` |
| 17 | +//! |
| 18 | +//! TPC-H / TPC-DS fan out by storage and scale factor; the description is |
| 19 | +//! synthesised from the parsed name so we don't have to hand-maintain one |
| 20 | +//! entry per `(storage, sf)` pair. |
| 21 | +
|
| 22 | +/// Look up a short editorial description for a group display name. Returns |
| 23 | +/// `None` when the group has no canonical description (e.g. vector-search |
| 24 | +/// groups) — callers render the title without a tooltip in that case. |
| 25 | +pub fn group_description(name: &str) -> Option<String> { |
| 26 | + if let Some(d) = tpc_description(name) { |
| 27 | + return Some(d); |
| 28 | + } |
| 29 | + static_description(name).map(str::to_string) |
| 30 | +} |
| 31 | + |
| 32 | +/// Hard-coded, name-keyed descriptions for the non-fan-out groups. These |
| 33 | +/// match v2 verbatim where v2 had a description; new strings here should |
| 34 | +/// match the wording style v2 set. |
| 35 | +fn static_description(name: &str) -> Option<&'static str> { |
| 36 | + match name { |
| 37 | + "Random Access" => { |
| 38 | + Some("Tests performance of selecting arbitrary row indices from a file on NVMe storage") |
| 39 | + } |
| 40 | + "Compression" => Some( |
| 41 | + "Measures encoding and decoding throughput (MB/s) for Vortex files and Parquet \ |
| 42 | + files (with zstd page compression)", |
| 43 | + ), |
| 44 | + "Compression Size" => Some( |
| 45 | + "Compares compressed file sizes and compression ratios across different encoding \ |
| 46 | + strategies", |
| 47 | + ), |
| 48 | + "Clickbench" => Some( |
| 49 | + "ClickHouse's analytical benchmark suite testing real-world query patterns on web \ |
| 50 | + analytics data", |
| 51 | + ), |
| 52 | + "Statistical and Population Genetics" => { |
| 53 | + Some("A suite of Statistical and Population genetics queries using the gnomAD dataset") |
| 54 | + } |
| 55 | + "PolarSignals Profiling" => Some( |
| 56 | + "Profiling data benchmark modeled on PolarSignals/Parca, exercising scan-layer \ |
| 57 | + performance with projection and filter pushdown on deeply nested schemas", |
| 58 | + ), |
| 59 | + _ => None, |
| 60 | + } |
| 61 | +} |
| 62 | + |
| 63 | +/// Derive a description for `TPC-H (NVMe|S3) (SF=N)` and `TPC-DS (NVMe) (SF=N)` |
| 64 | +/// group names. The shape is fixed because [`crate::api::groups::group_name_query`] |
| 65 | +/// emits exactly this format for tpch/tpcds. Returns `None` for any name that |
| 66 | +/// does not start with `TPC-H ` or `TPC-DS `. |
| 67 | +fn tpc_description(name: &str) -> Option<String> { |
| 68 | + let parts = if let Some(rest) = name.strip_prefix("TPC-H ") { |
| 69 | + Some(("TPC-H", rest)) |
| 70 | + } else { |
| 71 | + name.strip_prefix("TPC-DS ").map(|rest| ("TPC-DS", rest)) |
| 72 | + }; |
| 73 | + let (suite, rest) = parts?; |
| 74 | + let storage = if rest.starts_with("(NVMe)") { |
| 75 | + "nvme" |
| 76 | + } else if rest.starts_with("(S3)") { |
| 77 | + "s3" |
| 78 | + } else { |
| 79 | + return None; |
| 80 | + }; |
| 81 | + let sf = parse_sf(rest)?; |
| 82 | + Some(format_tpc(suite, storage, &sf)) |
| 83 | +} |
| 84 | + |
| 85 | +/// Pull `SF=N` (digits only) out of strings like `(NVMe) (SF=10)`. Returns |
| 86 | +/// `None` if no `SF=` substring or the digits don't parse. |
| 87 | +fn parse_sf(s: &str) -> Option<String> { |
| 88 | + let after = s.split_once("SF=")?.1; |
| 89 | + let digits: String = after.chars().take_while(char::is_ascii_digit).collect(); |
| 90 | + if digits.is_empty() { |
| 91 | + None |
| 92 | + } else { |
| 93 | + Some(digits) |
| 94 | + } |
| 95 | +} |
| 96 | + |
| 97 | +/// Render the v2-compatible TPC blurb. Storage label comes from the parsed |
| 98 | +/// group name; scale-bytes annotation only renders for TPC-H (TPC-DS in v2 |
| 99 | +/// did not annotate scale bytes). |
| 100 | +fn format_tpc(suite: &str, storage: &str, sf: &str) -> String { |
| 101 | + let storage_phrase = match storage { |
| 102 | + "nvme" => "on local NVMe storage", |
| 103 | + "s3" => "against S3 storage", |
| 104 | + _ => "on local NVMe storage", |
| 105 | + }; |
| 106 | + let bytes = match sf { |
| 107 | + "1" => Some("1GB"), |
| 108 | + "10" => Some("10GB"), |
| 109 | + "100" => Some("100GB"), |
| 110 | + "1000" => Some("1TB"), |
| 111 | + _ => None, |
| 112 | + }; |
| 113 | + match (suite, bytes) { |
| 114 | + ("TPC-H", Some(b)) => { |
| 115 | + format!("TPC-H benchmark queries {storage_phrase} at SF={sf} (~{b} of data)",) |
| 116 | + } |
| 117 | + ("TPC-H", None) => format!("TPC-H benchmark queries {storage_phrase} at SF={sf}"), |
| 118 | + ("TPC-DS", _) => format!("TPC-DS benchmark queries {storage_phrase} at SF={sf}"), |
| 119 | + _ => format!("{suite} benchmark queries {storage_phrase} at SF={sf}"), |
| 120 | + } |
| 121 | +} |
| 122 | + |
| 123 | +#[cfg(test)] |
| 124 | +mod tests { |
| 125 | + use super::*; |
| 126 | + |
| 127 | + #[test] |
| 128 | + fn static_descriptions_match_v2() { |
| 129 | + assert_eq!( |
| 130 | + group_description("Random Access").as_deref(), |
| 131 | + Some( |
| 132 | + "Tests performance of selecting arbitrary row indices from a file on NVMe storage" |
| 133 | + ), |
| 134 | + ); |
| 135 | + assert_eq!( |
| 136 | + group_description("Compression").as_deref(), |
| 137 | + Some( |
| 138 | + "Measures encoding and decoding throughput (MB/s) for Vortex files and Parquet \ |
| 139 | + files (with zstd page compression)", |
| 140 | + ), |
| 141 | + ); |
| 142 | + assert_eq!( |
| 143 | + group_description("Compression Size").as_deref(), |
| 144 | + Some( |
| 145 | + "Compares compressed file sizes and compression ratios across different encoding \ |
| 146 | + strategies", |
| 147 | + ), |
| 148 | + ); |
| 149 | + assert_eq!( |
| 150 | + group_description("Clickbench").as_deref(), |
| 151 | + Some( |
| 152 | + "ClickHouse's analytical benchmark suite testing real-world query patterns on \ |
| 153 | + web analytics data", |
| 154 | + ), |
| 155 | + ); |
| 156 | + assert_eq!( |
| 157 | + group_description("Statistical and Population Genetics").as_deref(), |
| 158 | + Some("A suite of Statistical and Population genetics queries using the gnomAD dataset",), |
| 159 | + ); |
| 160 | + assert_eq!( |
| 161 | + group_description("PolarSignals Profiling").as_deref(), |
| 162 | + Some( |
| 163 | + "Profiling data benchmark modeled on PolarSignals/Parca, exercising scan-layer \ |
| 164 | + performance with projection and filter pushdown on deeply nested schemas", |
| 165 | + ), |
| 166 | + ); |
| 167 | + } |
| 168 | + |
| 169 | + #[test] |
| 170 | + fn tpch_descriptions_carry_scale_bytes() { |
| 171 | + assert_eq!( |
| 172 | + group_description("TPC-H (NVMe) (SF=1)").as_deref(), |
| 173 | + Some("TPC-H benchmark queries on local NVMe storage at SF=1 (~1GB of data)"), |
| 174 | + ); |
| 175 | + assert_eq!( |
| 176 | + group_description("TPC-H (S3) (SF=10)").as_deref(), |
| 177 | + Some("TPC-H benchmark queries against S3 storage at SF=10 (~10GB of data)"), |
| 178 | + ); |
| 179 | + assert_eq!( |
| 180 | + group_description("TPC-H (NVMe) (SF=100)").as_deref(), |
| 181 | + Some("TPC-H benchmark queries on local NVMe storage at SF=100 (~100GB of data)"), |
| 182 | + ); |
| 183 | + assert_eq!( |
| 184 | + group_description("TPC-H (S3) (SF=1000)").as_deref(), |
| 185 | + Some("TPC-H benchmark queries against S3 storage at SF=1000 (~1TB of data)"), |
| 186 | + ); |
| 187 | + } |
| 188 | + |
| 189 | + #[test] |
| 190 | + fn tpcds_descriptions_omit_scale_bytes() { |
| 191 | + assert_eq!( |
| 192 | + group_description("TPC-DS (NVMe) (SF=1)").as_deref(), |
| 193 | + Some("TPC-DS benchmark queries on local NVMe storage at SF=1"), |
| 194 | + ); |
| 195 | + assert_eq!( |
| 196 | + group_description("TPC-DS (NVMe) (SF=10)").as_deref(), |
| 197 | + Some("TPC-DS benchmark queries on local NVMe storage at SF=10"), |
| 198 | + ); |
| 199 | + } |
| 200 | + |
| 201 | + #[test] |
| 202 | + fn unknown_groups_have_no_description() { |
| 203 | + assert_eq!(group_description("cohere-large-10m / partitioned"), None); |
| 204 | + assert_eq!(group_description("Made-up benchmark"), None); |
| 205 | + } |
| 206 | + |
| 207 | + #[test] |
| 208 | + fn malformed_tpc_names_fall_through() { |
| 209 | + // No `(NVMe)` / `(S3)` prefix → not matched. |
| 210 | + assert_eq!(group_description("TPC-H something else"), None); |
| 211 | + // SF= without digits → not matched. |
| 212 | + assert_eq!(group_description("TPC-H (NVMe) (SF=)"), None); |
| 213 | + } |
| 214 | +} |
0 commit comments