Skip to content

Commit 13885d6

Browse files
committed
wip
Signed-off-by: Joe Isaacs <joe.isaacs@live.co.uk>
1 parent 9baa257 commit 13885d6

8 files changed

Lines changed: 176 additions & 177 deletions

File tree

vortex-test/compat-gen/scripts/compat.py

Lines changed: 14 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,7 @@
88
Manages fixture versions in S3 (or local directories) by calling the thin
99
`vortex-compat` Rust binary for generation and checking. The Rust binary
1010
handles only two things: generating .vortex files and comparing them.
11-
Everything else (versioning, S3 upload/download, manifest merging, worktree
12-
management) lives here.
11+
Everything else (versioning, S3 upload/download, manifest merging) lives here.
1312
1413
Quick start:
1514
# Generate + publish for HEAD (version auto-detected from latest tag)
@@ -65,7 +64,7 @@
6564
uv run compat.py publish
6665
uv run compat.py publish --dry-run
6766
68-
# Publish from an older tag via worktree
67+
# Publish using an older tag for version detection
6968
uv run compat.py publish --git-ref v0.62.0
7069
7170
# Generate locally without publishing
@@ -309,54 +308,6 @@ def _merge_manifest(
309308
}
310309

311310

312-
# ---------------------------------------------------------------------------
313-
# Worktree helpers
314-
# ---------------------------------------------------------------------------
315-
316-
317-
def _worktree_generate(git_ref: str, output_dir: Path) -> None:
318-
"""Create a git worktree at `git_ref`, build vortex-compat, and generate fixtures."""
319-
repo_root = Path(
320-
subprocess.run(
321-
["git", "rev-parse", "--show-toplevel"],
322-
capture_output=True,
323-
text=True,
324-
check=True,
325-
).stdout.strip()
326-
)
327-
328-
worktree_dir = Path(tempfile.mkdtemp(prefix=f"vortex-compat-wt-{git_ref}-"))
329-
try:
330-
_info(f"creating worktree at {git_ref} in {worktree_dir}")
331-
_run_cmd(
332-
["git", "-C", str(repo_root), "worktree", "add", str(worktree_dir), git_ref],
333-
check=True,
334-
)
335-
336-
_info(f"building vortex-compat at {git_ref}...")
337-
_run_cmd(
338-
["cargo", "build", "-p", CARGO_BIN, "--release"],
339-
check=True,
340-
cwd=worktree_dir,
341-
)
342-
343-
bin_path = worktree_dir / "target" / "release" / CARGO_BIN
344-
if not bin_path.exists():
345-
print(f"error: binary not found at {bin_path}", file=sys.stderr)
346-
sys.exit(1)
347-
348-
_info(f"generating fixtures with {git_ref} binary...")
349-
_run_cmd([str(bin_path), "generate", "--output", str(output_dir)], check=True)
350-
351-
finally:
352-
_run_cmd(
353-
["git", "-C", str(repo_root), "worktree", "remove", "--force", str(worktree_dir)],
354-
check=False,
355-
)
356-
if worktree_dir.exists():
357-
shutil.rmtree(worktree_dir, ignore_errors=True)
358-
359-
360311
# ---------------------------------------------------------------------------
361312
# Commands
362313
# ---------------------------------------------------------------------------
@@ -365,14 +316,10 @@ def _worktree_generate(git_ref: str, output_dir: Path) -> None:
365316
def cmd_generate(args: argparse.Namespace) -> None:
366317
"""Generate fixtures locally, then write a proper manifest."""
367318
output = Path(args.output)
368-
git_ref = args.git_ref
369319
exclude = _parse_exclude(args)
370-
version = _version_from_ref(git_ref)
320+
version = _version_from_ref(args.git_ref)
371321

372-
if git_ref:
373-
_worktree_generate(git_ref, output)
374-
else:
375-
_run_rust_generate(output, exclude=exclude)
322+
_run_rust_generate(output, exclude=exclude)
376323

377324
# Read fixtures.json and write a versioned manifest.
378325
fixtures_json = json.loads((output / "fixtures.json").read_text())
@@ -399,10 +346,7 @@ def cmd_publish(args: argparse.Namespace) -> None:
399346
output = Path(tmpdir) / "fixtures"
400347

401348
_info("generating fixtures...")
402-
if git_ref:
403-
_worktree_generate(git_ref, output)
404-
else:
405-
_run_rust_generate(output, exclude=exclude)
349+
_run_rust_generate(output, exclude=exclude)
406350

407351
fixtures_json = json.loads((output / "fixtures.json").read_text())
408352

@@ -674,9 +618,9 @@ def main() -> None:
674618
"generate",
675619
help="Generate fixtures locally",
676620
description=(
677-
"Build all fixture .vortex files and write them to a directory.\n"
678-
"Version is auto-detected from the nearest git tag at HEAD\n"
679-
"(or at --git-ref if specified)."
621+
"Build all fixture .vortex files using the current binary and write\n"
622+
"them to a directory. Version is auto-detected from the nearest git\n"
623+
"tag at HEAD (or at --git-ref if specified)."
680624
),
681625
epilog=(
682626
"examples:\n"
@@ -688,8 +632,9 @@ def main() -> None:
688632
p.add_argument("--output", required=True, help="Output directory")
689633
p.add_argument(
690634
"--git-ref",
691-
help="Git ref (tag/branch/SHA) to build from via worktree. "
692-
"Version is derived from the nearest tag at this ref.",
635+
help="Git ref for version detection (e.g. v0.62.0). "
636+
"Version is derived from the nearest tag at this ref. "
637+
"Fixtures are always built with the current binary.",
693638
)
694639
p.add_argument(
695640
"--exclude",
@@ -719,8 +664,9 @@ def main() -> None:
719664
)
720665
p.add_argument(
721666
"--git-ref",
722-
help="Git ref to build from via worktree (e.g. v0.62.0). "
723-
"Version is derived from the nearest tag at this ref.",
667+
help="Git ref for version detection (e.g. v0.62.0). "
668+
"Version is derived from the nearest tag at this ref. "
669+
"Fixtures are always built with the current binary.",
724670
)
725671
p.add_argument(
726672
"--dry-run",

vortex-test/compat-gen/src/adapter.rs

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@ use vortex::file::WriteOptionsSessionExt;
1212
use vortex::io::session::RuntimeSessionExt;
1313
use vortex::layout::layouts::flat::writer::FlatLayoutStrategy;
1414
use vortex_array::ArrayRef;
15-
use vortex_array::DynArray;
1615
use vortex_array::stream::ArrayStreamAdapter;
1716
use vortex_array::stream::ArrayStreamExt;
1817
use vortex_buffer::ByteBuffer;
@@ -38,7 +37,11 @@ pub fn write_file(path: &Path, chunk: ArrayRef) -> VortexResult<()> {
3837
let mut file = tokio::fs::File::create(path)
3938
.await
4039
.map_err(|e| vortex_err!("failed to create {}: {e}", path.display()))?;
41-
let _summary = session.write_options().write(&mut file, stream).await?;
40+
let _summary = session
41+
.write_options()
42+
.with_strategy(strategy)
43+
.write(&mut file, stream)
44+
.await?;
4245
Ok(())
4346
})
4447
}

vortex-test/compat-gen/src/check.rs

Lines changed: 4 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,6 @@ use std::path::Path;
55

66
use clap::ValueEnum;
77
use serde::Serialize;
8-
use vortex_array::IntoArray;
9-
use vortex_array::arrays::ChunkedArray;
108
use vortex_array::assert_arrays_eq;
119
use vortex_buffer::ByteBuffer;
1210
use vortex_error::VortexResult;
@@ -125,7 +123,7 @@ pub fn check(dir: &Path, mode: Mode, exclude: &[String]) -> VortexResult<()> {
125123
continue;
126124
}
127125
let expected = match fixture.build(&tmp_dir) {
128-
Ok(chunks) => chunks,
126+
Ok(arr) => arr,
129127
Err(e) => {
130128
result.failed.push(FailedFixture {
131129
name: fixture.name().to_string(),
@@ -158,19 +156,9 @@ pub fn check(dir: &Path, mode: Mode, exclude: &[String]) -> VortexResult<()> {
158156
};
159157

160158
// Compare.
161-
match compare_arrays(&actual, &expected) {
162-
Ok(()) => {
163-
eprintln!(" pass {}", fixture.name());
164-
result.passed.push(fixture.name().to_string());
165-
}
166-
Err(e) => {
167-
eprintln!(" FAIL {}", fixture.name());
168-
result.failed.push(FailedFixture {
169-
name: fixture.name().to_string(),
170-
error: e.to_string(),
171-
});
172-
}
173-
}
159+
assert_arrays_eq!(actual, expected);
160+
eprintln!(" pass {}", fixture.name());
161+
result.passed.push(fixture.name().to_string());
174162
}
175163

176164
// Print JSON result to stdout.
@@ -195,15 +183,3 @@ pub fn check(dir: &Path, mode: Mode, exclude: &[String]) -> VortexResult<()> {
195183

196184
Ok(())
197185
}
198-
199-
fn compare_arrays(
200-
actual: &[vortex_array::ArrayRef],
201-
expected: &[vortex_array::ArrayRef],
202-
) -> VortexResult<()> {
203-
let actual_dtype = actual[0].dtype().clone();
204-
let expected_dtype = expected[0].dtype().clone();
205-
let actual_arr = ChunkedArray::try_new(actual.to_vec(), actual_dtype)?.into_array();
206-
let expected_arr = ChunkedArray::try_new(expected.to_vec(), expected_dtype)?.into_array();
207-
assert_arrays_eq!(actual_arr, expected_arr);
208-
Ok(())
209-
}

vortex-test/compat-gen/src/fixtures/clickbench.rs

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,26 +12,27 @@ use vortex_array::arrays::Primitive;
1212
use vortex_array::arrays::Struct;
1313
use vortex_array::arrays::VarBin;
1414
use vortex_array::arrow::FromArrowArray;
15+
use vortex_array::vtable::ArrayId;
1516
use vortex_error::VortexResult;
1617
use vortex_error::vortex_err;
1718

18-
use super::Fixture;
19+
use super::ArrayFixture;
1920

2021
/// First partition of ClickBench hits, limited to 1000 rows.
2122
const CLICKBENCH_URL: &str =
2223
"https://pub-3ba949c0f0354ac18db1f0f14f0a2c52.r2.dev/clickbench/parquet_many/hits_0.parquet";
2324

2425
const PARQUET_FILENAME: &str = "clickbench_hits_0.parquet";
2526

26-
pub struct ClickBenchHits1kFixture;
27+
struct ClickBenchHits1kFixture;
2728

2829
impl ArrayFixture for ClickBenchHits1kFixture {
2930
fn name(&self) -> &str {
3031
"clickbench_hits_1k.vortex"
3132
}
3233

3334
fn description(&self) -> &str {
34-
"First 1000 rows of ClickBench hits_0 partition (wide real-world schema)"
35+
"First 1000 rows of ClickBench hits dataset with wide schema of primitives and strings"
3536
}
3637

3738
fn setup(&self, tmp_dir: &Path) -> VortexResult<()> {
@@ -49,7 +50,7 @@ impl ArrayFixture for ClickBenchHits1kFixture {
4950
Ok(())
5051
}
5152

52-
fn build(&self, tmp_dir: &Path) -> VortexResult<Vec<ArrayRef>> {
53+
fn build(&self, tmp_dir: &Path) -> VortexResult<ArrayRef> {
5354
let parquet_path = tmp_dir.join(PARQUET_FILENAME);
5455
let file_bytes = std::fs::read(&parquet_path)
5556
.map_err(|e| vortex_err!("failed to read cached parquet: {e}"))?;

vortex-test/compat-gen/src/fixtures/mod.rs

Lines changed: 49 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,17 +8,19 @@ mod tpch;
88
use std::path::Path;
99

1010
use vortex_array::ArrayRef;
11+
use vortex_array::ArrayVisitorExt;
12+
use vortex_array::vtable::ArrayId;
1113
use vortex_error::VortexResult;
1214
use vortex_error::vortex_bail;
1315

1416
/// A deterministic fixture that produces the same arrays every time.
1517
///
1618
/// Lifecycle:
17-
/// 1. **Setup** (optional, async I/O) — download external data or prepare
19+
/// 1. **Setup** (optional, I/O) — download external data or prepare
1820
/// intermediate files in `tmp_dir`. Run concurrently across fixtures.
1921
/// 2. **Build** (CPU, parallel) — construct arrays from scratch or from
2022
/// data prepared during setup.
21-
pub trait Fixture: Send + Sync {
23+
pub trait ArrayFixture: Send + Sync {
2224
/// Unique name for this fixture, used as the output filename.
2325
fn name(&self) -> &str;
2426

@@ -27,17 +29,57 @@ pub trait Fixture: Send + Sync {
2729

2830
/// Optional setup phase for downloading external data or preparing files.
2931
///
30-
/// Called before `build()`. Runs concurrently across fixtures via
31-
/// `tokio::spawn_blocking`. The default implementation is a no-op.
32+
/// Called before `build()`. Runs concurrently across fixtures.
33+
/// The default implementation is a no-op.
3234
fn setup(&self, _tmp_dir: &Path) -> VortexResult<()> {
3335
Ok(())
3436
}
3537

36-
/// Build the expected arrays, using `tmp_dir` for any data prepared
38+
/// Build the expected array, using `tmp_dir` for any data prepared
3739
/// during `setup()`.
3840
///
39-
/// Must be deterministic. Returns a `Vec` to support chunked fixtures.
40-
fn build(&self, tmp_dir: &Path) -> VortexResult<Vec<ArrayRef>>;
41+
/// Must be deterministic under all versions of vortex.
42+
fn build(&self, tmp_dir: &Path) -> VortexResult<ArrayRef>;
43+
44+
/// Encoding IDs that must appear somewhere in the array tree produced by [`Self::build`].
45+
///
46+
/// Only include encodings that the fixture is specifically testing, not incidental ones
47+
/// (e.g. a primitives fixture should not list struct even if it wraps values in one).
48+
///
49+
/// An empty slice (the default) disables the check.
50+
fn expected_encodings(&self) -> Vec<ArrayId> {
51+
vec![]
52+
}
53+
}
54+
55+
/// Walk the array tree, collect encoding IDs, and assert that all expected encodings
56+
/// are present. This is a no-op when [`ArrayFixture::expected_encodings`] returns an empty vec.
57+
pub fn check_expected_encodings(array: &ArrayRef, fixture: &dyn ArrayFixture) -> VortexResult<()> {
58+
let expected = fixture.expected_encodings();
59+
if expected.is_empty() {
60+
return Ok(());
61+
}
62+
63+
let mut found: Vec<ArrayId> = Vec::new();
64+
for node in array.depth_first_traversal() {
65+
let id = node.encoding_id();
66+
if !found.contains(&id) {
67+
found.push(id);
68+
}
69+
}
70+
71+
let missing: Vec<&ArrayId> = expected.iter().filter(|id| !found.contains(id)).collect();
72+
73+
if !missing.is_empty() {
74+
vortex_bail!(
75+
"fixture '{}' is missing expected encodings: {:?} (found: {:?})",
76+
fixture.name(),
77+
missing,
78+
found,
79+
);
80+
}
81+
82+
Ok(())
4183
}
4284

4385
/// All registered fixtures.

0 commit comments

Comments
 (0)