Skip to content

Commit 6b76221

Browse files
connortsui20claude
andauthored
[claude] benchmarks v3 cleanup (#7745)
<!-- Thank you for submitting a pull request! We appreciate your time and effort. Please make sure to provide enough information so that we can review your pull request. The Summary and Testing sections below contain guidance on what to include. --> ## Summary <!-- If this PR is related to a tracked effort, please link to the relevant issue here (e.g., `Closes: #123`). Otherwise, feel free to ignore / delete this. In this section, please: 1. Explain the rationale for this change. 2. Summarize the changes included in this PR. A general rule of thumb is that larger PRs should have larger summaries. If there are a lot of changes, please help us review the code by explaining what was changed and why. If there is an issue or discussion attached, there is no need to duplicate all the details, but clarity is always preferred over brevity. --> Closes: #000 <!-- ## API Changes Uncomment this section if there are any user-facing changes. Consider whether the change affects users in one of the following ways: 1. Breaks public APIs in some way. 2. Changes the underlying behavior of one of the engine integrations. 3. Should some documentation be updated to reflect this change? If a public API is changed in a breaking manner, make sure to add the appropriate label. You can run `./scripts/public-api.sh` locally to see if there are any public API changes (and this also runs in our CI). --> ## Testing <!-- Please describe how this change was tested. Here are some common categories for testing in Vortex: 1. Verifying existing behavior is maintained. 2. Verifying new behavior and functionality works correctly. 3. Serialization compatibility (backwards and forwards) should be maintained or explicitly broken. --> --------- Signed-off-by: Claude <noreply@anthropic.com> Co-authored-by: Claude <noreply@anthropic.com>
1 parent ab5592f commit 6b76221

10 files changed

Lines changed: 83 additions & 70 deletions

File tree

benchmarks-website/migrate/src/classifier.rs

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -432,17 +432,13 @@ pub enum Skip {
432432
HistoricalMemory,
433433
}
434434

435-
/// Engines the v3 emitter produces today. Anything else is historical
436-
/// and gets bucketed as `Skip::Deprecated`.
437-
///
438-
/// ORCHESTRATOR NOTE: confirm against `vortex-bench`'s `Engine` enum
439-
/// before handing off; edit if the live set differs.
435+
/// Engines the v3 emitter produces today. Mirrors
436+
/// `vortex-bench/src/lib.rs::Engine`. Anything else is historical and gets
437+
/// bucketed as `Skip::Deprecated`.
440438
const V3_ENGINES: &[&str] = &["datafusion", "duckdb", "vortex", "arrow"];
441439

442-
/// Formats the v3 emitter produces today (`Format::name()` values).
443-
///
444-
/// ORCHESTRATOR NOTE: confirm against `vortex-bench/src/lib.rs`
445-
/// `Format::name()` before handing off.
440+
/// Formats the v3 emitter produces today (`Format::name()` values from
441+
/// `vortex-bench/src/lib.rs`).
446442
const V3_FORMATS: &[&str] = &[
447443
"vortex-file-compressed",
448444
"vortex-compact",
@@ -797,6 +793,8 @@ fn split_engine_format(series: &str) -> Option<(String, String)> {
797793

798794
#[cfg(test)]
799795
mod tests {
796+
use anyhow::Context as _;
797+
800798
use super::*;
801799

802800
fn record(name: &str) -> V2Record {
@@ -840,14 +838,16 @@ mod tests {
840838
}
841839

842840
#[test]
843-
fn random_access_bins_dataset_pattern() {
844-
let bin = classify(&record("random-access/taxi/take/parquet")).unwrap();
841+
fn random_access_bins_dataset_pattern() -> anyhow::Result<()> {
842+
let bin = classify(&record("random-access/taxi/take/parquet"))
843+
.context("classify returned None for a known-good 4-part random-access name")?;
845844
assert_eq!(
846845
bin,
847846
V3Bin::RandomAccess {
848847
dataset: "taxi/take".into(),
849848
format: "parquet".into(),
850849
}
851850
);
851+
Ok(())
852852
}
853853
}

benchmarks-website/migrate/src/lib.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,15 @@
1313
//! The migrator is throwaway: once v3 cuts over, both the binary and
1414
//! the classifier go away.
1515
16+
/// Routing v2 records into v3 fact tables, ported from v2's `getGroup`.
1617
pub mod classifier;
18+
/// V2 commit -> v3 `commits` row upserts.
1719
pub mod commits;
20+
/// End-to-end migration of v2 dumps into a v3 DuckDB.
1821
pub mod migrate;
22+
/// Streaming readers for the v2 S3 bucket and local dumps.
1923
pub mod source;
24+
/// Wire shapes of the v2 benchmark dataset.
2025
pub mod v2;
26+
/// Structural diff between a migrated v3 DuckDB and v2's `/api/metadata`.
2127
pub mod verify;

benchmarks-website/migrate/src/migrate.rs

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -869,12 +869,12 @@ mod tests {
869869

870870
use super::*;
871871

872-
fn open_db_without(table: &str) -> (tempfile::TempDir, Connection) {
873-
let dir = tempfile::TempDir::new().unwrap();
872+
fn open_db_without(table: &str) -> Result<(tempfile::TempDir, Connection)> {
873+
let dir = tempfile::TempDir::new()?;
874874
let path = dir.path().join("v3.duckdb");
875-
let conn = open_target_db(&path).unwrap();
876-
conn.execute_batch(&format!("DROP TABLE {table}")).unwrap();
877-
(dir, conn)
875+
let conn = open_target_db(&path)?;
876+
conn.execute_batch(&format!("DROP TABLE {table}"))?;
877+
Ok((dir, conn))
878878
}
879879

880880
fn one_query_row() -> QueryMeasurement {
@@ -898,12 +898,12 @@ mod tests {
898898
}
899899

900900
#[test]
901-
fn flush_all_does_not_overcount_on_failure() {
901+
fn flush_all_does_not_overcount_on_failure() -> Result<()> {
902902
// Drop `compression_times` before flushing so the second
903903
// flush in `flush_all` fails. The first (queries) succeeded,
904904
// so its counter must be set; the failed table's counter and
905905
// every later table's counter must stay at zero.
906-
let (_dir, conn) = open_db_without("compression_times");
906+
let (_dir, conn) = open_db_without("compression_times")?;
907907

908908
let mut summary = MigrationSummary::default();
909909
let mut q = QueryAccum::default();
@@ -931,5 +931,6 @@ mod tests {
931931
summary.compression_size_inserted, 0,
932932
"later flushes never ran"
933933
);
934+
Ok(())
934935
}
935936
}

benchmarks-website/migrate/src/v2.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,7 @@ pub struct V2Commit {
137137
pub url: Option<String>,
138138
}
139139

140+
/// Author or committer block on a v2 commit record.
140141
#[derive(Debug, Clone, Deserialize)]
141142
pub struct V2Person {
142143
#[serde(default)]

benchmarks-website/migrate/src/verify.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@ pub struct VerifyReport {
3030
pub chart_diffs: Vec<ChartDiff>,
3131
}
3232

33+
/// One group's chart-count divergence between v2 and v3, captured when the
34+
/// group is structurally present on both sides but the counts differ.
3335
#[derive(Debug, Clone)]
3436
pub struct ChartDiff {
3537
pub group: String,

benchmarks-website/server/src/api.rs

Lines changed: 18 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,9 @@ pub const DEFAULT_COMMIT_WINDOW: u32 = 100;
3939
/// Canonical group ordering, ported from the v2 site's hard-coded list at
4040
/// `origin/ct/vfvb:benchmarks-website/index.html`. Group names not in this
4141
/// list sort after every listed name in alphabetical order. The order is
42-
/// significant for the landing page render — the first group is opened by
43-
/// default and the rest are collapsed.
42+
/// significant for the landing page render — every group is collapsed by
43+
/// default, and only the first group's chart payloads are inlined into the
44+
/// HTML so opening it skips a fetch round-trip.
4445
pub const GROUP_ORDER: &[&str] = &[
4546
"Random Access",
4647
"Compression",
@@ -145,20 +146,13 @@ impl CommitWindow {
145146
}
146147
}
147148

148-
/// Query string for `/api/chart/{slug}` and `/chart/{slug}`.
149-
///
150-
/// `y` (linear|log) and `mode` (abs|rel) are accepted but ignored by the SQL —
151-
/// the JSON response is identical regardless. They exist on the API surface so
152-
/// the client can drive deep links and refetches with a single URL shape; the
153-
/// rendering hints are applied client-side in `chart-init.js`.
149+
/// Query string for `/api/chart/{slug}` and `/chart/{slug}`. Only `?n=`
150+
/// affects the JSON response; per-chart UI state (Y axis, slider) is local
151+
/// to `chart-init.js` and intentionally not in the URL.
154152
#[derive(Debug, Default, Deserialize)]
155153
pub struct ChartQuery {
156154
/// Commit window: `25`, `50`, `100`, `250`, `all`, etc.
157155
pub n: Option<String>,
158-
/// Y-axis hint (linear|log). Echoed for client-side rendering only.
159-
pub y: Option<String>,
160-
/// Display mode hint (abs|rel). Echoed for client-side rendering only.
161-
pub mode: Option<String>,
162156
}
163157

164158
impl ChartQuery {
@@ -168,11 +162,14 @@ impl ChartQuery {
168162
}
169163
}
170164

165+
/// Body of `GET /api/groups`: every group with its chart links and summary.
171166
#[derive(Debug, Serialize)]
172167
pub struct GroupsResponse {
173168
pub groups: Vec<Group>,
174169
}
175170

171+
/// One group: a display name, a slug for the group permalink, and the chart
172+
/// links inside it. Optionally carries a v2-compatible rollup summary.
176173
#[derive(Debug, Serialize)]
177174
pub struct Group {
178175
pub name: String,
@@ -274,12 +271,16 @@ pub struct NamedChartResponse {
274271
pub chart: ChartResponse,
275272
}
276273

274+
/// One chart's short label inside a group (e.g. `Q1`) plus the slug that
275+
/// resolves to its `/api/chart/{slug}` payload.
277276
#[derive(Debug, Serialize)]
278277
pub struct ChartLink {
279278
pub name: String,
280279
pub slug: String,
281280
}
282281

282+
/// Body of `GET /api/chart/{slug}`: every commit with data, every series'
283+
/// values aligned to those commits, and per-series engine/format tags.
283284
#[derive(Debug, Clone, Serialize)]
284285
pub struct ChartResponse {
285286
pub display_name: String,
@@ -317,6 +318,8 @@ pub struct FilterUniverse {
317318
pub formats: Vec<String>,
318319
}
319320

321+
/// One row of the `commits[]` array on a [`ChartResponse`]. Carries enough
322+
/// metadata for the tooltip and the click-to-PR handler in `chart-init.js`.
320323
#[derive(Debug, Clone, Serialize)]
321324
pub struct CommitPoint {
322325
pub sha: String,
@@ -325,6 +328,8 @@ pub struct CommitPoint {
325328
pub url: String,
326329
}
327330

331+
/// Body of `GET /health`: liveness probe plus a row-count rollup that's
332+
/// useful for "did my ingest land?" smoke tests.
328333
#[derive(Debug, Serialize)]
329334
pub struct HealthResponse {
330335
pub status: &'static str,
@@ -334,6 +339,7 @@ pub struct HealthResponse {
334339
pub row_counts: RowCounts,
335340
}
336341

342+
/// Per-fact-table row counts surfaced by `/health`.
337343
#[derive(Debug, Serialize)]
338344
pub struct RowCounts {
339345
pub commits: i64,
@@ -1183,16 +1189,6 @@ pub(crate) fn chart_payload(
11831189
}
11841190
}
11851191

1186-
/// Thin wrapper around [`chart_payload`] kept for callers that prefer the old
1187-
/// name. New code should prefer [`chart_payload`].
1188-
pub(crate) fn collect_chart(
1189-
conn: &Connection,
1190-
key: &ChartKey,
1191-
window: &CommitWindow,
1192-
) -> Result<Option<ChartResponse>> {
1193-
chart_payload(conn, key, window)
1194-
}
1195-
11961192
/// Collect every chart inside one group. Returns `None` if the group has no
11971193
/// data at all (callers should render a 404).
11981194
// TODO: this currently re-runs the entire `collect_groups` discovery pass

benchmarks-website/server/src/html.rs

Lines changed: 16 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -82,10 +82,10 @@ const VORTEX_BLACK_SVG: &[u8] = include_bytes!("../../public/vortex_black_nobg.s
8282
const VORTEX_WHITE_SVG: &[u8] = include_bytes!("../../public/vortex_white_nobg.svg");
8383
const STATIC_ASSET_VERSION: &str = "bench-v3-ui-16";
8484

85-
/// Commits to inline for the open-by-default group. The chart's
86-
/// initial visible window is ~100 commits; bigger windows just bloat
87-
/// the cold-page HTML. Users who zoom out trigger a refetch with
88-
/// `?n=all` via `chart-init.js`.
85+
/// Commits to inline for the first group's pre-fetched chart payloads.
86+
/// The chart's initial visible window is ~100 commits; bigger windows
87+
/// just bloat the cold-page HTML. Users who zoom out trigger a refetch
88+
/// with `?n=all` via `chart-init.js`.
8989
const LANDING_INLINE_N: u32 = 100;
9090

9191
/// HTML routes mounted under `/`.
@@ -127,8 +127,7 @@ pub struct UiQuery {
127127
impl UiQuery {
128128
/// Resolve the [`CommitWindow`] for HTML routes. Defaults to
129129
/// [`CommitWindow::All`] so users can pan/zoom all the way back to
130-
/// the very first commit on every chart, including the first
131-
/// (open-by-default) group on the landing page. Visual downsampling
130+
/// the very first commit on every chart. Visual downsampling
132131
/// happens client-side on the visible commit range only.
133132
fn fetch_window(&self) -> CommitWindow {
134133
match self.n.as_deref() {
@@ -208,21 +207,21 @@ async fn landing(State(state): State<AppState>, Query(ui): Query<UiQuery>) -> Re
208207

209208
/// One group's worth of data for the landing page.
210209
///
211-
/// The first group (in canonical order) ships with `charts` populated so
212-
/// the moment the user expands it the chart hydrates from the inline
213-
/// JSON without a network round-trip. Every other group ships
214-
/// with `charts` empty and only their chart-card shells — payloads are
215-
/// fetched client-side on first `details.toggle` to keep the cold landing
216-
/// HTML small.
210+
/// Every disclosure renders closed by default. The first group (in
211+
/// canonical order) ships with its chart payloads inlined, so the moment
212+
/// the user expands it the chart hydrates from the inline JSON without a
213+
/// network round-trip. Every other group ships only its chart-card
214+
/// shells — payloads are fetched client-side on first `details.toggle`
215+
/// to keep the cold landing HTML small.
217216
struct LandingGroup {
218217
name: String,
219218
summary: Option<Summary>,
220219
/// Chart links for every chart in the group. Always present — we need
221220
/// the slugs server-side so the chart-card shell can carry
222221
/// `data-chart-slug` for the lazy fetch.
223222
chart_links: Vec<api::ChartLink>,
224-
/// Pre-fetched payloads. Populated only for the open-by-default group.
225-
/// `Vec` indices line up with `chart_links`.
223+
/// Pre-fetched payloads. Populated only for the first group in
224+
/// canonical order. `Vec` indices line up with `chart_links`.
226225
inlined: Vec<Option<NamedChartResponse>>,
227226
}
228227

@@ -263,7 +262,7 @@ fn collect_landing_groups(conn: &Connection) -> Result<Vec<LandingGroup>> {
263262
}
264263
v
265264
} else {
266-
// Closed groups: ship only the shells. The client fetches on
265+
// Other groups: ship only the shells. The client fetches on
267266
// first `details.toggle`.
268267
(0..group.charts.len()).map(|_| None).collect()
269268
};
@@ -292,14 +291,14 @@ async fn chart_page(
292291

293292
let window = ui.fetch_window();
294293
let result = db::run_blocking(&state.db, move |conn| {
295-
api::collect_chart(conn, &key, &window)
294+
api::chart_payload(conn, &key, &window)
296295
})
297296
.await;
298297
let chart = match result {
299298
Ok(Some(c)) => c,
300299
Ok(None) => return error_page(StatusCode::NOT_FOUND, "chart not found").into_response(),
301300
Err(err) => {
302-
tracing::error!(error = ?err, "chart_page: collect_chart failed");
301+
tracing::error!(error = ?err, "chart_page: chart_payload failed");
303302
return error_page(StatusCode::INTERNAL_SERVER_ERROR, "internal error").into_response();
304303
}
305304
};

benchmarks-website/server/static/chart-init.js

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1235,17 +1235,17 @@
12351235
}
12361236

12371237
// -----------------------------------------------------------------------
1238-
// Lazy fetch on `<details>` toggle for closed-by-default groups.
1238+
// Lazy fetch on `<details>` toggle. Every group renders closed; this
1239+
// hydrates the chart cards inside whichever group the user expands.
12391240
// -----------------------------------------------------------------------
12401241
function fetchAndConstruct(card) {
12411242
var canvas = card.querySelector("canvas");
12421243
if (!canvas) return Promise.resolve();
12431244
if (canvas.__bench_chart) return Promise.resolve();
12441245
// `constructChart` reads inline JSON (`<script id="chart-data-N">`)
12451246
// when there's no payload on the canvas yet. The first group's
1246-
// payloads are inlined server-side regardless of whether the
1247-
// group is open by default, so try a synchronous construct before
1248-
// hitting the network.
1247+
// payloads are inlined server-side, so try a synchronous construct
1248+
// before hitting the network.
12491249
if (constructChart(card)) {
12501250
bindToolbar(card);
12511251
return Promise.resolve();

benchmarks-website/server/tests/ingest.rs

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -283,7 +283,13 @@ async fn read_routes_serve_after_ingest() -> Result<()> {
283283
assert!(body["display_name"].is_string());
284284
assert!(body["unit"].is_string());
285285
assert!(body["commits"].is_array());
286-
assert_eq!(body["commits"].as_array().unwrap().len(), 1);
286+
assert_eq!(
287+
body["commits"]
288+
.as_array()
289+
.context("commits is array")?
290+
.len(),
291+
1
292+
);
287293
assert!(body["series"].is_object());
288294
Ok(())
289295
}

0 commit comments

Comments
 (0)