diff --git a/.github/workflows/compat-gen-upload.yml b/.github/workflows/compat-gen-upload.yml index de7cc486188..d2440d422b5 100644 --- a/.github/workflows/compat-gen-upload.yml +++ b/.github/workflows/compat-gen-upload.yml @@ -3,18 +3,64 @@ name: Compat Fixture Upload on: workflow_dispatch: inputs: - version: - description: "Version to generate fixtures for (e.g. 0.62.0)" + git_ref: + description: "Git ref for version detection (e.g. v0.62.0). Defaults to HEAD." + required: false + confirm_upload: + description: "Type 'yes' to confirm upload after reviewing the dry-run output." required: true + default: "no" jobs: - upload-fixtures: + dry-run: runs-on: ubuntu-latest + permissions: + contents: read + outputs: + version: ${{ steps.detect.outputs.version }} + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - uses: dtolnay/rust-toolchain@stable + + - uses: Swatinem/rust-cache@v2 + + - name: Detect version + id: detect + run: | + GIT_REF="${{ inputs.git_ref }}" + if [ -n "$GIT_REF" ]; then + TAG=$(git describe --tags --abbrev=0 "$GIT_REF") + else + TAG=$(git describe --tags --abbrev=0) + fi + VERSION="${TAG#v}" + echo "version=$VERSION" >> "$GITHUB_OUTPUT" + echo "Detected version: $VERSION (from tag: $TAG)" + + - name: Dry run publish + run: | + GIT_REF="${{ inputs.git_ref }}" + CMD="python3 vortex-test/compat-gen/scripts/compat.py publish --dry-run" + if [ -n "$GIT_REF" ]; then + CMD="$CMD --git-ref $GIT_REF" + fi + $CMD + + upload: + needs: dry-run + if: inputs.confirm_upload == 'yes' + runs-on: ubuntu-latest + environment: compat-upload permissions: id-token: write contents: read steps: - uses: actions/checkout@v4 + with: + fetch-depth: 0 - uses: dtolnay/rust-toolchain@stable @@ -23,10 +69,14 @@ jobs: - name: Configure AWS credentials uses: aws-actions/configure-aws-credentials@v5 with: - role-to-assume: arn:aws:iam::245040174862:role/GitHubBenchmarkRole + role-to-assume: "arn:aws:iam::245040174862:role/GitHubBenchmarkRole" aws-region: us-east-1 - - name: Generate and upload fixtures - run: > - python3 vortex-test/compat-gen/scripts/upload.py - --version "${{ inputs.version }}" + - name: Upload fixtures for v${{ needs.dry-run.outputs.version }} + run: | + GIT_REF="${{ inputs.git_ref }}" + CMD="python3 vortex-test/compat-gen/scripts/compat.py publish --yes" + if [ -n "$GIT_REF" ]; then + CMD="$CMD --git-ref $GIT_REF" + fi + $CMD diff --git a/.github/workflows/compat-test-weekly.yml b/.github/workflows/compat-test-weekly.yml deleted file mode 100644 index 1eaa22c15b8..00000000000 --- a/.github/workflows/compat-test-weekly.yml +++ /dev/null @@ -1,24 +0,0 @@ -name: Compat Test - -on: - schedule: - - cron: "0 6 * * 1" # Monday 6am UTC - workflow_dispatch: { } - -env: - FIXTURES_URL: https://vortex-compat-fixtures.s3.amazonaws.com - -jobs: - compat-test: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - - uses: dtolnay/rust-toolchain@stable - - - uses: Swatinem/rust-cache@v2 - - - name: Run compat tests - run: | - cargo run -p vortex-compat --release --bin compat-validate -- \ - --fixtures-url "$FIXTURES_URL" diff --git a/.github/workflows/compat-validation.yml b/.github/workflows/compat-validation.yml new file mode 100644 index 00000000000..16bd6b4ed2b --- /dev/null +++ b/.github/workflows/compat-validation.yml @@ -0,0 +1,34 @@ +name: Compat Validation + +on: + schedule: + - cron: "0 6 * * 1" # Monday 6am UTC + workflow_dispatch: + inputs: + mode: + description: "Validation mode" + required: true + default: "last" + type: choice + options: + - last + - all + +env: + FIXTURES_URL: https://vortex-compat-fixtures.s3.amazonaws.com + +jobs: + compat-test: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - uses: dtolnay/rust-toolchain@stable + + - uses: Swatinem/rust-cache@v2 + + - name: Run compat tests + run: | + MODE="${{ inputs.mode || 'last' }}" + python3 vortex-test/compat-gen/scripts/compat.py check \ + --mode "$MODE" diff --git a/Cargo.lock b/Cargo.lock index 3a0f3ac9b28..e894e3dff3e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4635,9 +4635,9 @@ dependencies = [ [[package]] name = "instability" -version = "0.3.11" +version = "0.3.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "357b7205c6cd18dd2c86ed312d1e70add149aea98e7ef72b9fdf0270e555c11d" +checksum = "5eb2d60ef19920a3a9193c3e371f726ec1dafc045dac788d0fb3704272458971" dependencies = [ "darling", "indoc", @@ -5558,11 +5558,11 @@ dependencies = [ [[package]] name = "libtest-mimic" -version = "0.8.1" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5297962ef19edda4ce33aaa484386e0a5b3d7f2f4e037cbeee00503ef6b29d33" +checksum = "14e6ba06f0ade6e504aff834d7c34298e5155c6baca353cc6a4aaff2f9fd7f33" dependencies = [ - "anstream 0.6.21", + "anstream 1.0.0", "anstyle", "clap", "escape8259", @@ -9903,13 +9903,13 @@ dependencies = [ "arrow-array", "arrow-select", "bytes", - "chrono", "clap", "futures", "parquet", "reqwest", "serde", "serde_json", + "sha2", "tempfile", "tokio", "tpchgen", @@ -9919,7 +9919,6 @@ dependencies = [ "vortex-buffer", "vortex-error", "vortex-session", - "vortex-utils", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 4039f292440..d095d8334ca 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -216,6 +216,7 @@ rustc-hash = "2.1" serde = "1.0.220" serde_json = "1.0.138" serde_test = "1.0.176" +sha2 = "0.10" simdutf8 = "0.1.5" similar = "2.7.0" sketches-ddsketch = "0.3.0" diff --git a/vortex-test/compat-gen/Cargo.toml b/vortex-test/compat-gen/Cargo.toml index 3bb8df6af4e..62906843e8b 100644 --- a/vortex-test/compat-gen/Cargo.toml +++ b/vortex-test/compat-gen/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "vortex-compat" authors = { workspace = true } -description = "Backward-compatibility fixture generation and testing for Vortex" +description = "Backward-compatibility fixture generation and checking for Vortex" edition = { workspace = true } homepage = { workspace = true } include = { workspace = true } @@ -16,13 +16,9 @@ version = { workspace = true } workspace = true [[bin]] -name = "compat-gen" +name = "vortex-compat" path = "src/main.rs" -[[bin]] -name = "compat-validate" -path = "src/validate_main.rs" - [dependencies] # Vortex crates vortex = { workspace = true, features = ["files", "tokio", "zstd"] } @@ -30,7 +26,6 @@ vortex-array = { workspace = true, features = ["_test-harness"] } vortex-buffer = { workspace = true } vortex-error = { workspace = true } vortex-session = { workspace = true } -vortex-utils = { workspace = true } # TPC-H generation arrow-array = { workspace = true } @@ -46,12 +41,12 @@ parquet = { workspace = true } futures = { workspace = true } tokio = { workspace = true, features = ["full"] } -# HTTP fetching (for ClickBench fixture + compat-test S3 downloads) +# HTTP fetching (for ClickBench fixture setup) reqwest = { workspace = true } # CLI + serialization -chrono = { workspace = true, features = ["serde"] } clap = { workspace = true, features = ["derive"] } serde = { workspace = true, features = ["derive"] } serde_json = { workspace = true } +sha2 = { workspace = true } tempfile = { workspace = true } diff --git a/vortex-test/compat-gen/DESIGN.md b/vortex-test/compat-gen/DESIGN.md new file mode 100644 index 00000000000..56b655eae64 --- /dev/null +++ b/vortex-test/compat-gen/DESIGN.md @@ -0,0 +1,224 @@ + + + +# Vortex Backward-Compatibility Testing + +## The Problem + +Vortex is a columnar file format. Users write `.vortex` files with one +version of the library and expect to read them in any future version. +We need back-compat! + +## The Solution + +We maintain a library of `.vortex` fixture files, one set per released +version, stored in S3. A test reads every old fixture with the current reader +and compares the decoded values against a known-good oracle. If any fixture +from any version decodes to the wrong values, the test fails. + +## How Fixtures Work + +A fixture is a small `.vortex` file with known contents. The expected +contents are defined by a Rust function — `build()` — that deterministically +constructs the arrays. The same `build()` code is used at both ends: + +- **At publish time:** `build()` produces arrays -> the writer serializes them + to a `.vortex` file -> the file is uploaded to S3. +- **At check time:** `build()` produces the same arrays -> the reader decodes + the old file -> the two are compared value-by-value. + +If the reader is correct, the values match. If a code change breaks +decoding, they don't. + +### The oracle question + +"Isn't comparing against `build()` circular? What if `build()` itself +changes?" + +`build()` is the specification — it defines what the fixture *should* +contain. The contract is that `build()` for a given fixture name is immutable +once defined. It must never change its output. If someone modifies it, the +check fails loudly against every old version. + +### Fixture evolution + +Because `build()` is immutable, you cannot add a column to an existing +fixture or change its schema. To test a new type, encoding, or structural +pattern, create a **new fixture file** with a new name. + +## Fixture Trait + +Every fixture implements this trait in Rust: + +```rust +pub trait Fixture { + /// Filename, e.g. "primitives.vortex". + fn name(&self) -> &str; + + /// Human-readable description. + fn description(&self) -> &str; + + /// Optional async setup (download external data, etc). + /// Called before build(). Default is a no-op. + fn setup(&self, _tmp_dir: &Path) -> VortexResult<()> { Ok(()) } + + /// Build the expected arrays. Must be deterministic. + /// Returns a Vec to support chunked fixtures (multiple chunks). + fn build(&self, tmp_dir: &Path) -> VortexResult>; +} +``` + +All fixtures are registered in `all_fixtures()` in `src/fixtures/mod.rs`. + +### Current fixture suite + +| Fixture | File | Description | +|---------|------|-------------| +| Primitives | `primitives.vortex` | u8-u64, i32, i64, f32, f64 at min/mid/max | +| Strings | `strings.vortex` | Empty, ASCII, multibyte (Japanese), emoji | +| Booleans | `booleans.vortex` | Mixed true/false | +| Nullable | `nullable.vortex` | Nullable int and string columns with interspersed nulls | +| Nested Struct | `struct_nested.vortex` | Two-level struct-in-struct | +| Chunked | `chunked.vortex` | 3 chunks of 1000 rows, deterministic values | +| TPC-H Lineitem | `tpch_lineitem.vortex` | TPC-H lineitem at SF 0.01 | +| TPC-H Orders | `tpch_orders.vortex` | TPC-H orders at SF 0.01 | +| ClickBench Hits | `clickbench_hits_1k.vortex` | First 1000 rows of ClickBench hits | + +### Adding a new fixture + +1. Create a struct implementing `Fixture` in `src/fixtures/`. +2. Register it in `all_fixtures()` in `src/fixtures/mod.rs`. +3. Run `cargo run -p vortex-compat --release -- generate --output /tmp/test` to verify. +4. Publish — the orchestrator merges the manifest so the new fixture gets + `since` set to the current version. + +## Architecture + +``` +┌──────────────────────────────────────────────────┐ +│ Python orchestrator (compat.py) │ +│ Version-agnostic: S3, manifests, worktrees │ +│ │ +│ Commands: publish, check, generate, list, │ +│ validate-manifest │ +└──────────────────┬───────────────────────────────┘ + │ invokes +┌──────────────────▼───────────────────────────────┐ +│ Rust binary (vortex-compat) │ +│ Version-specific: builds arrays, writes/reads │ +│ .vortex files, compares arrays │ +│ │ +│ Commands: generate, check │ +└──────────────────────────────────────────────────┘ +``` + +**Why the split?** Only the Rust binary links against Vortex. When Vortex +APIs change across versions, only this small binary needs to recompile. The +Python orchestrator handles S3, manifests, git worktrees, and additive-only +enforcement without touching Vortex code. + +### Rust binary: `vortex-compat` + +See [README.md](README.md) for CLI usage. + +#### Adapter module (`src/adapter.rs`) + +The only code that calls Vortex file I/O APIs directly: + +```rust +pub fn write_file(path: &Path, chunks: Vec) -> VortexResult<()>; +pub fn read_file(bytes: ByteBuffer) -> VortexResult>; +``` + +Uses the Epoch C API (`session.write_options()` / +`session.open_options().open_buffer()`), valid from v0.58.0 through HEAD. + +### Python orchestrator: `compat.py` + +See [README.md](README.md) for CLI usage. + +## Store Format + +### `versions.json` + +A JSON array of version strings: + +```json +["0.62.0", "0.63.0"] +``` + +### Directory layout + +``` +store/ +├── versions.json +├── v0.62.0/ +│ └── arrays/ +│ ├── manifest.json +│ ├── primitives.vortex +│ └── ... +└── v0.63.0/ + └── arrays/ + ├── manifest.json + └── ... +``` + +Each version gets a directory named `v{version}` with an `arrays/` +subdirectory for fixture files. Re-publishing overwrites the existing +directory. + +### Per-version manifest + +```json +{ + "version": "0.63.0", + "generated_at": "2026-03-14T08:00:00Z", + "fixtures": [ + { + "name": "primitives.vortex", + "description": "All primitive types with boundary values", + "since": "0.62.0" + } + ] +} +``` + +The `since` field tracks which version introduced each fixture. It is carried +forward automatically by manifest merging — when publishing version N, the +orchestrator reads version N-1's manifest and preserves existing `since` +values. + +## CI Workflows + +### Fixture upload (`.github/workflows/compat-gen-upload.yml`) + +Used to upload fixtures for each new release. Triggered via **manual +dispatch** with an optional `git_ref` input (defaults to HEAD). + +Two-phase workflow: + +1. **dry-run** — auto-detects the version from the nearest git tag, builds + fixtures, and prints what would be uploaded. +2. **upload** — requires manual approval via the `compat-upload` GitHub + environment, then performs the actual upload to S3. + +### Compat validation (`.github/workflows/compat-validation.yml`) + +Validates the current code against previously published fixtures. Runs weekly +(Monday 6am UTC) and via manual dispatch with a `mode` input: + +| Mode | Behavior | +|------|----------| +| `last` (default) | Compare against the most recent published release only | +| `all` | Compare against all previously published versions (pre-release check) | + +## Known Limitations + +**Does catch:** schema deserialization breaks, encoding algorithm changes +that corrupt values, null bitmap changes, UTF-8/multibyte handling, chunk +boundary regressions, struct field ordering, encoding removal. + +**Does not catch:** predicate pushdown (no filters applied), column +projection (reads all columns), missing DType coverage (no decimal, temporal, +list, binary, extension), degenerate cases (empty files, all-null, float +specials), partial reads / mmap. diff --git a/vortex-test/compat-gen/README.md b/vortex-test/compat-gen/README.md index 6473a866ee3..fe515932931 100644 --- a/vortex-test/compat-gen/README.md +++ b/vortex-test/compat-gen/README.md @@ -1,263 +1,153 @@ -# vortex-compat: Backward-Compatibility Testing +# vortex-compat -This crate provides two binaries that together ensure Vortex can always read files -written by older versions: +Backward-compatibility testing for the Vortex file format. Ensures the +current reader can always decode `.vortex` files written by any older +released version. -- **`compat-gen`** — generates deterministic fixture files for a given Vortex version. -- **`compat-validate`** — reads fixtures from every historical version and validates - they round-trip to the expected arrays. +See [DESIGN.md](DESIGN.md) for full architecture and design rationale. -Fixtures are stored in an S3 bucket. CI uploads new fixtures on every release tag -and runs weekly validation against all prior versions. +## Quick Start -## Fixture Contract +```bash +# Generate fixtures locally +cargo run -p vortex-compat --release -- generate --output /tmp/fixtures -Fixtures are the unit of backward-compatibility. Each fixture is a named file -(e.g. `primitives.vortex`) whose contents are defined by a deterministic `build()` -method. The following rules apply: +# Check fixtures against current reader +cargo run -p vortex-compat --release -- check --dir /tmp/fixtures --mode exact -- **Immutable data.** Once a fixture's `build()` is defined, its output (columns, - values, nulls, ordering) must never change. Every version that includes that - fixture must produce byte-for-byte identical logical arrays. `compat-validate` - validates this by rebuilding expected arrays from `build()` and comparing them - against what was read from the stored file. +# Publish to S3 (requires AWS credentials) +python scripts/compat.py publish -- **New capabilities get new files.** To test a new encoding, data type, or - structural pattern, add a new fixture with a new filename. Never modify an - existing fixture to cover new ground. +# Check all published versions +python scripts/compat.py check -- **Additive-only fixture list.** The fixture list only ever grows; fixtures are - never removed. The upload script (`scripts/upload.py`) enforces this by checking - that every fixture in the previous version's manifest still exists in the - generated output. Each fixture's `since` field in the manifest records the first - version that introduced it. +# Publish from a historical tag via git worktree +python scripts/compat.py publish --git-ref v0.62.0 -- **`versions.json`** is the top-level index listing every version that has - uploaded fixtures. `compat-validate` iterates over all listed versions. +# Local-only workflow (no S3) +python scripts/compat.py publish --store /tmp/compat-store +python scripts/compat.py check --store /tmp/compat-store +``` -- **Watch for dependency drift.** `compat-validate` compares stored files against - `build()` output from the *current* code. If a dependency (e.g. `tpchgen`) - silently changes its output across versions, old fixtures will fail validation - even though the Vortex reader is fine. If you see unexpected failures across - all old versions for a specific fixture, check whether its `build()` deps - changed before blaming the reader. +## Rust Binary: `vortex-compat` -## First-Time Setup: Bootstrap the Bucket +A thin binary with two commands. It has **no** knowledge of versions, S3, +manifests, or orchestration. -After creating the S3 bucket (see [AWS Setup](#aws-setup-one-time) below), seed it -with the first fixture set: +**Output protocol:** progress/diagnostics to stderr, structured JSON to +stdout (`check` only). -```bash -# Generate + upload (first version, no previous manifest to merge) -python3 vortex-test/compat-gen/scripts/upload.py --version 0.62.0 +### `generate --output [--exclude ]` -# Verify the round-trip -AWS_PROFILE=vortex-ci cargo run -p vortex-compat --release --bin compat-validate -- \ - --fixtures-url https://vortex-compat-fixtures.s3.amazonaws.com -``` +Three phases: -## Uploading Fixtures for a New Version +1. **Setup** — run each fixture's `setup()` concurrently via + `tokio::spawn_blocking`. Used by TPC-H and ClickBench fixtures to + download external data. +2. **Build** — construct arrays in parallel threads via `std::thread::scope`. + All must succeed before any files are written. +3. **Write** — serialize each fixture's arrays as a `.vortex` file, then + write `fixtures.json` listing all generated files. -Use the upload script, which handles building, manifest merging, and S3 upload: +Output: +``` +/ +├── fixtures.json +├── primitives.vortex +├── strings.vortex +└── ... +``` -```bash -# Full upload -python3 vortex-test/compat-gen/scripts/upload.py --version 0.63.0 +`fixtures.json` format: +```json +{ + "fixtures": [ + {"name": "primitives.vortex", "description": "..."}, + {"name": "strings.vortex", "description": "..."} + ] +} +``` + +### `check --dir --mode [--exclude ]` -# Dry run (generate + merge manifest, skip S3) -python3 vortex-test/compat-gen/scripts/upload.py --version 0.63.0 --dry-run +For each `.vortex` file in the directory: -# Skip the cargo build (if you already have fixtures generated) -python3 vortex-test/compat-gen/scripts/upload.py \ - --version 0.63.0 --output /tmp/fixtures/ --skip-build +1. Run `setup()` + `build()` to reconstruct expected arrays from current code +2. Read the file bytes and decode via `adapter::read_file()` +3. Combine chunks into `ChunkedArray` and compare with `assert_arrays_eq!` -# Verify all versions -cargo run -p vortex-compat --release --bin compat-validate -- \ - --fixtures-url https://vortex-compat-fixtures.s3.amazonaws.com +JSON result to stdout: +```json +{ + "passed": ["primitives.vortex"], + "failed": [{"name": "foo.vortex", "error": "mismatch at row 42"}], + "skipped": ["old_fixture.vortex"] +} ``` -## Re-uploading Fixtures for an Existing Version +Check modes: -The upload script will overwrite the existing prefix in S3: +| Mode | Extra files in dir | Missing fixtures | +|------|--------------------|------------------| +| `subset` (default) | Skipped | Error | +| `exact` | Error | Error | +| `superset` | Error | Skipped | -```bash -python3 vortex-test/compat-gen/scripts/upload.py --version 0.62.0 -``` +Use `subset` when checking old versions (they may have extra fixtures not in +current code). Use `exact` for the current version. -No need to update `versions.json` — the script handles it idempotently. +## Python Orchestrator: `compat.py` -## Local-Only Workflow +### `publish [--git-ref ] [--store ] [--dry-run] [--exclude ]` -You can skip S3 entirely and work against local directories: +1. Detect version from nearest git tag at HEAD (or ``) +2. Generate fixtures (from current tree, or from a worktree at ``) +3. Fetch previous version's manifest, merge `since` values, enforce additive-only +4. Upload `.vortex` files + `manifest.json` to `v{version}/arrays/` +5. Update `versions.json` -```bash -# Generate into a versioned subdirectory -cargo run -p vortex-compat --release --bin compat-gen -- \ - --version 0.62.0 --output /tmp/compat-root/v0.62.0/ +### `check [--versions ] [--store ] [--exclude ]` -# Validate all local versions -cargo run -p vortex-compat --release --bin compat-validate -- \ - --fixtures-dir /tmp/compat-root/ -``` +1. Read `versions.json` from store +2. For each version, download `arrays/manifest.json` + all `.vortex` files +3. Run `vortex-compat check --dir --mode subset` +4. Aggregate results, exit 1 if any failures -If the bucket requires authenticated access, set your AWS profile: +### `generate --output [--git-ref ] [--exclude ]` -```bash -AWS_PROFILE=vortex-ci cargo run -p vortex-compat --release --bin compat-validate -- \ - --fixtures-url https://vortex-compat-fixtures.s3.amazonaws.com -``` +Generate fixtures locally without publishing. Writes `.vortex` files and a +`manifest.json`. -## AWS Setup (one-time) +### `list [--store ] [--version ]` -All resources live in the **benchmark account (245040174862)**, region **us-east-1**. +Without `--version`: print all version numbers. +With `--version`: print that version's `manifest.json`. -### 1. Create the S3 bucket +### `validate-manifest [--store ]` -```bash -aws s3api create-bucket \ - --bucket vortex-compat-fixtures \ - --region us-east-1 -``` +Walk all versions in order and verify no fixtures were removed between +consecutive versions (additive-only property). -### 2. Enable public read access +### Store abstraction -Disable the "Block Public Access" settings that prevent a public bucket policy: +| Spec | Type | Auth | +|------|------|------| +| `s3://vortex-compat-fixtures` (default) | S3 | Public reads (HTTPS), AWS creds for writes | +| `/tmp/compat` | Local directory | None | -```bash -aws s3api put-public-access-block \ - --bucket vortex-compat-fixtures \ - --public-access-block-configuration \ - BlockPublicAcls=true,IgnorePublicAcls=true,BlockPublicPolicy=false,RestrictPublicBuckets=false -``` +### Git worktree workflow -Then attach a bucket policy that grants unauthenticated read: +`--git-ref` automates publishing from historical releases: ```bash -aws s3api put-bucket-policy \ - --bucket vortex-compat-fixtures \ - --policy '{ - "Version": "2012-10-17", - "Statement": [ - { - "Sid": "PublicRead", - "Effect": "Allow", - "Principal": "*", - "Action": ["s3:GetObject", "s3:ListBucket"], - "Resource": [ - "arn:aws:s3:::vortex-compat-fixtures", - "arn:aws:s3:::vortex-compat-fixtures/*" - ] - } - ] - }' +python compat.py publish --git-ref v0.62.0 ``` -### 3. Grant the benchmark role access to the compat bucket - -The CI workflow reuses the existing `GitHubBenchmarkRole` -(`arn:aws:iam::245040174862:role/GitHubBenchmarkRole`). -Add an inline policy granting it S3 access to the compat fixtures bucket: +This creates a worktree at the tag, builds the binary against that code, +generates fixtures, then cleans up. -```bash -aws iam put-role-policy \ - --role-name GitHubBenchmarkRole \ - --policy-name CompatFixturesS3Access \ - --policy-document '{ - "Version": "2012-10-17", - "Statement": [ - { - "Effect": "Allow", - "Action": [ - "s3:PutObject", - "s3:GetObject", - "s3:ListBucket" - ], - "Resource": [ - "arn:aws:s3:::vortex-compat-fixtures", - "arn:aws:s3:::vortex-compat-fixtures/*" - ] - } - ] - }' -``` +### Environment variables -## CI Workflows - -### Fixture upload (`.github/workflows/compat-gen-upload.yml`) - -Triggered via **manual dispatch** with a required `version` input (e.g. `0.62.0`). -Will be updated to also trigger on release tag pushes once the workflow is proven. - -1. Checks out the current branch -2. Runs `scripts/upload.py --version ` which: - - Builds and runs `compat-gen` to generate fixtures - - Fetches the previous version's manifest and merges `since` values - - Enforces additive-only (no fixtures removed) - - Uploads fixtures to `s3://vortex-compat-fixtures/v/` - - Updates `versions.json` with ETag-based optimistic locking - -### Weekly compat test (`.github/workflows/compat-test-weekly.yml`) - -Runs **every Monday at 06:00 UTC** and on **manual dispatch**. - -1. Checks out `main` at HEAD -2. Runs `compat-test --fixtures-url https://vortex-compat-fixtures.s3.amazonaws.com` -3. Validates every version listed in `versions.json` - -## Fixture Suite - -| Fixture | File | Since | Description | -|---------|------|-------|-------------| -| Primitives | `primitives.vortex` | 0.62.0 | All numeric types (u8–u64, i32, i64, f32, f64) with min/mid/max values | -| Strings | `strings.vortex` | 0.62.0 | Variable-length strings including empty, ASCII, Unicode, and emoji | -| Booleans | `booleans.vortex` | 0.62.0 | Boolean array with mixed true/false values | -| Nullable | `nullable.vortex` | 0.62.0 | Nullable int and string columns with interleaved nulls | -| Nested Struct | `struct_nested.vortex` | 0.62.0 | Two-level nested struct (inner struct within outer struct) | -| Chunked | `chunked.vortex` | 0.62.0 | Multi-chunk file: 3 chunks of 1000 rows each | -| TPC-H Lineitem | `tpch_lineitem.vortex` | 0.62.0 | TPC-H lineitem table at scale factor 0.01 | -| TPC-H Orders | `tpch_orders.vortex` | 0.62.0 | TPC-H orders table at scale factor 0.01 | -| ClickBench Hits | `clickbench_hits_1k.vortex` | 0.62.0 | First 1000 rows of the ClickBench hits table | - -### Adding a new fixture - -New encodings, data types, or structural patterns always get a **new fixture file**. -Never modify an existing fixture's `build()` output (see [Fixture Contract](#fixture-contract)). - -1. Create a struct implementing the `Fixture` trait in `src/fixtures/`: - ```rust - pub struct MyFixture; - impl Fixture for MyFixture { - fn name(&self) -> &str { "my_fixture.vortex" } - fn build(&self) -> VortexResult> { /* deterministic array construction */ } - } - ``` -2. Register it in `all_fixtures()` in `src/fixtures/mod.rs`. -3. Run `compat-gen` locally to verify it produces a valid file. -4. Upload fixtures for the current version — the upload script merges the manifest - so the new fixture gets `since` set to the current version while existing - fixtures keep their original `since` values. - -## Adapter Epochs - -The adapter module (`src/adapter.rs`) contains the read/write logic for the Vortex file -format. As the format API evolves across major versions, new "epochs" are introduced: - -| Epoch | Vortex Versions | Key API Surface | -|-------|----------------|-----------------| -| A | v0.36.0 | Original `VortexFileWriter` / `VortexOpenOptions` | -| B | v0.45.0 – v0.52.0 | Intermediate session-based API | -| C | v0.58.0 – HEAD | `session.write_options()` / `session.open_options().open_buffer()` | - -Only Epoch C is currently active. Earlier epochs were used during initial development -and can be resurrected by cherry-picking the adapter code onto an older release branch -if retroactive fixture generation is needed. - -### Cherry-picking to older releases - -To generate fixtures for a version in Epoch A or B: - -1. Check out the target tag (e.g. `git checkout v0.45.0`) -2. Cherry-pick the compat-gen crate: `git cherry-pick --no-commit ` -3. Swap `src/adapter.rs` to the appropriate epoch's implementation -4. Resolve any dependency mismatches in `Cargo.toml` -5. Run `compat-gen` and upload the resulting fixtures +| Variable | Description | +|----------|-------------| +| `VORTEX_COMPAT_BIN` | Path to pre-built `vortex-compat` binary. Skips `cargo run`. | diff --git a/vortex-test/compat-gen/scripts/compat.py b/vortex-test/compat-gen/scripts/compat.py new file mode 100644 index 00000000000..9c1d6c4fc10 --- /dev/null +++ b/vortex-test/compat-gen/scripts/compat.py @@ -0,0 +1,979 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright the Vortex contributors + +""" +Vortex backward-compatibility orchestrator. + +Manages fixture versions in S3 (or local directories) by calling the thin +`vortex-compat` Rust binary for generation and checking. The Rust binary +handles only two things: generating .vortex files and comparing them. +Everything else (versioning, S3 upload/download, manifest merging) lives here. + +Quick start: + # Generate + publish for HEAD (version auto-detected from latest tag) + uv run compat.py publish + + # Publish from an older tag + uv run compat.py publish --git-ref v0.62.0 + + # Check all published versions against current code + uv run compat.py check +""" + +from __future__ import annotations + +import argparse +import hashlib +import json +import os +import re +import shutil +import subprocess +import sys +import tempfile +from concurrent.futures import ThreadPoolExecutor, as_completed +from datetime import UTC, datetime +from pathlib import Path +from urllib.error import HTTPError +from urllib.request import urlopen + +import jsonschema + +DEFAULT_STORE = "s3://vortex-compat-fixtures" +CARGO_BIN = "vortex-compat" + +EPILOG = """\ +environment variables: + VORTEX_COMPAT_BIN Path to a pre-built vortex-compat binary. + Skips `cargo run` when set. + +store spec: + Local path --store /tmp/compat-store + S3 bucket --store s3://my-bucket + + Default: s3://vortex-compat-fixtures + S3 reads are public HTTPS; writes need AWS credentials (env or IAM role). + +version detection: + The version is always derived from a git tag. By default, HEAD's nearest + tag is used (via `git describe --tags --abbrev=0`). Use --git-ref to + target a different ref (e.g. v0.62.0). The 'v' prefix is stripped to + produce the version string (v0.63.0 -> 0.63.0). + +examples: + # Publish from HEAD (version from latest tag) + uv run compat.py publish + uv run compat.py publish --dry-run + + # Publish using an older tag for version detection + uv run compat.py publish --git-ref v0.62.0 + + # Add new fixtures to an existing version (hash-verified) + uv run compat.py publish --update + uv run compat.py publish --update --dry-run + + # Generate locally without publishing + uv run compat.py generate --output /tmp/fixtures + uv run compat.py generate --output /tmp/fixtures --git-ref v0.62.0 + + # Check all versions, or specific ones + uv run compat.py check + uv run compat.py check --versions 0.62.0,0.63.0 + + # Inspect store contents + uv run compat.py list + uv run compat.py list 0.62.0 + + # Validate additive-only manifest property + uv run compat.py validate-manifest +""" + + +# --------------------------------------------------------------------------- +# Store abstraction +# --------------------------------------------------------------------------- + + +class Store: + """Abstract base for fixture stores.""" + + def read(self, key: str) -> bytes | None: + raise NotImplementedError + + def write(self, key: str, data: bytes) -> None: + raise NotImplementedError + + def write_file(self, key: str, local_path: Path) -> None: + raise NotImplementedError + + def list_versions(self) -> list[str]: + raise NotImplementedError + + def display_name(self) -> str: + raise NotImplementedError + + +class LocalStore(Store): + """Fixture store backed by a local directory.""" + + def __init__(self, root: Path): + self.root = root + + def read(self, key: str) -> bytes | None: + path = self.root / key + if not path.exists(): + return None + return path.read_bytes() + + def write(self, key: str, data: bytes) -> None: + path = self.root / key + path.parent.mkdir(parents=True, exist_ok=True) + path.write_bytes(data) + + def write_file(self, key: str, local_path: Path) -> None: + dest = self.root / key + dest.parent.mkdir(parents=True, exist_ok=True) + shutil.copy2(local_path, dest) + + def list_versions(self) -> list[str]: + versions_data = self.read("versions.json") + if versions_data: + return json.loads(versions_data) + if not self.root.exists(): + return [] + # Fall back to directory listing. + versions = [] + for entry in self.root.iterdir(): + if entry.is_dir() and entry.name.startswith("v"): + manifest = entry / "arrays" / "manifest.json" + if manifest.exists(): + versions.append(entry.name[1:]) # strip 'v' prefix + versions.sort(key=_version_sort_key) + return versions + + def display_name(self) -> str: + return str(self.root) + + +class S3Store(Store): + """Fixture store backed by an S3 bucket (public reads, aws cli writes).""" + + def __init__(self, bucket: str): + self.bucket = bucket + self.https_base = f"https://{bucket}.s3.amazonaws.com" + + def read(self, key: str) -> bytes | None: + url = f"{self.https_base}/{key}" + try: + with urlopen(url) as resp: + return resp.read() + except HTTPError as e: + if e.code in (403, 404): + return None + raise + + def write(self, key: str, data: bytes) -> None: + with tempfile.NamedTemporaryFile(delete=False) as f: + f.write(data) + tmp_path = f.name + try: + self.write_file(key, Path(tmp_path)) + finally: + os.unlink(tmp_path) + + def write_file(self, key: str, local_path: Path) -> None: + dest = f"s3://{self.bucket}/{key}" + _info(f" {local_path.name} -> {dest}") + subprocess.run( + ["aws", "s3", "cp", str(local_path), dest], + check=True, + ) + + def list_versions(self) -> list[str]: + data = self.read("versions.json") + if data: + return json.loads(data) + return [] + + def display_name(self) -> str: + return f"s3://{self.bucket}" + + +def _parse_store(spec: str) -> Store: + """Parse a store specification into a Store instance.""" + if spec.startswith("s3://"): + return S3Store(spec[5:]) + return LocalStore(Path(spec)) + + +# --------------------------------------------------------------------------- +# Version detection +# --------------------------------------------------------------------------- + + +def _version_from_ref(git_ref: str | None = None) -> str: + """Derive a version string from a git ref. + + If git_ref is None, uses HEAD. Finds the nearest tag and strips the 'v' prefix. + For example, tag 'v0.63.0' yields version '0.63.0'. + """ + cmd = ["git", "describe", "--tags", "--abbrev=0"] + if git_ref: + cmd.append(git_ref) + result = subprocess.run(cmd, capture_output=True, text=True) + if result.returncode != 0: + ref_msg = f" ref '{git_ref}'" if git_ref else "" + print( + f"error: could not detect version from git{ref_msg}: {result.stderr.strip()}", + file=sys.stderr, + ) + sys.exit(1) + tag = result.stdout.strip() + # Strip 'v' prefix if present. + version = re.sub(r"^v", "", tag) + _info(f"detected version {version} (from tag {tag})") + return version + + +# --------------------------------------------------------------------------- +# Manifest helpers +# --------------------------------------------------------------------------- + + +MANIFEST_SCHEMA = { + "type": "object", + "required": ["version", "generated_at", "fixtures"], + "properties": { + "version": {"type": "string"}, + "generated_at": {"type": "string"}, + "fixtures": { + "type": "array", + "items": { + "type": "object", + "required": ["name"], + "properties": { + "name": {"type": "string"}, + "description": {"type": "string"}, + "sha256": {"type": "string"}, + }, + }, + }, + }, +} + + +def _validate_manifest(manifest: dict, version: str) -> None: + """Validate manifest against the JSON schema.""" + try: + jsonschema.validate(manifest, MANIFEST_SCHEMA) + except jsonschema.ValidationError as e: + raise ValueError( + f"v{version} manifest: {e.message} (at path: {'/'.join(str(p) for p in e.absolute_path)})" + ) from e + + +def _read_manifest(store: Store, version: str) -> dict | None: + # Try new path first, then legacy path. + data = store.read(f"v{version}/arrays/manifest.json") + prefix = f"v{version}/arrays" + if data is None: + data = store.read(f"v{version}/manifest.json") + prefix = f"v{version}" + if data is None: + return None + manifest = json.loads(data) + + # Upgrade legacy format: flat list of filenames -> new object format. + if isinstance(manifest.get("fixtures"), list) and manifest["fixtures"] and isinstance(manifest["fixtures"][0], str): + _info(f" upgrading legacy manifest format for v{version}") + manifest["fixtures"] = [{"name": n, "description": "", "sha256": ""} for n in manifest["fixtures"]] + + _validate_manifest(manifest, version) + # Stash the prefix so callers know where to fetch fixture files. + manifest["_prefix"] = prefix + return manifest + + +def _merge_manifest( + store: Store, + fixtures_json: dict, + version: str, + prev_version: str | None, +) -> dict: + """Build a manifest for `version`, using sha256 from Rust-generated fixtures.json.""" + entries = [] + prev_names: set[str] = set() + + if prev_version: + prev_manifest = _read_manifest(store, prev_version) + if prev_manifest: + prev_names = {e["name"] for e in prev_manifest["fixtures"]} + + for f in fixtures_json["fixtures"]: + entries.append({"name": f["name"], "description": f["description"], "sha256": f["sha256"]}) + + # Additive-only enforcement. + current_names = {e["name"] for e in entries} + missing = [n for n in prev_names if n not in current_names] + if missing: + print( + f"error: fixtures removed since v{prev_version}: {', '.join(missing)}", + file=sys.stderr, + ) + print("Fixtures must never be removed.", file=sys.stderr) + sys.exit(1) + + return { + "version": version, + "generated_at": datetime.now(UTC).isoformat(), + "fixtures": entries, + } + + +# --------------------------------------------------------------------------- +# Commands +# --------------------------------------------------------------------------- + + +def cmd_generate(args: argparse.Namespace) -> None: + """Generate fixtures locally, then write a proper manifest.""" + output = Path(args.output) + version = _version_from_ref(args.git_ref) + + _run_rust_generate(output) + + # Read fixtures.json (with sha256 from Rust) and write a versioned manifest. + fixtures_json = json.loads((output / "fixtures.json").read_text()) + entries = [] + for f in fixtures_json["fixtures"]: + entries.append({"name": f["name"], "description": f["description"], "sha256": f["sha256"]}) + manifest = { + "version": version, + "generated_at": datetime.now(UTC).isoformat(), + "fixtures": entries, + } + (output / "manifest.json").write_text(json.dumps(manifest, indent=2) + "\n") + _info(f"wrote manifest.json for v{version}") + + +def cmd_publish(args: argparse.Namespace) -> None: + """Generate fixtures and publish to a store.""" + store = _parse_store(args.store) + git_ref = args.git_ref + version = _version_from_ref(git_ref) + + versions = store.list_versions() + + if args.update: + _publish_update(store, version, versions, args) + else: + _publish_full(store, version, versions, args) + + +def _publish_full( + store: Store, + version: str, + versions: list[str], + args: argparse.Namespace, +) -> None: + """Full publish: upload all fixtures for a new version.""" + if version in versions and not args.force: + _info(f"error: v{version} already exists in {store.display_name()}") + _info("use --force to overwrite an existing version") + sys.exit(1) + + with tempfile.TemporaryDirectory() as tmpdir: + output = Path(tmpdir) / "fixtures" + + _info("generating fixtures...") + _run_rust_generate(output) + + fixtures_json = json.loads((output / "fixtures.json").read_text()) + + prev = _find_prev_version(versions, version) + if prev: + _info(f"previous version: {prev}") + + manifest = _merge_manifest(store, fixtures_json, version, prev) + manifest_json = json.dumps(manifest, indent=2) + "\n" + + if args.dry_run: + _info(f"dry run — would publish to {store.display_name()}") + existing = _read_manifest(store, version) + if existing: + existing.pop("_prefix", None) + existing_names = {e["name"] for e in existing["fixtures"]} + new_names = {e["name"] for e in manifest["fixtures"]} + added = new_names - existing_names + removed = existing_names - new_names + if added: + _info(f" new fixtures: {', '.join(sorted(added))}") + if removed: + _info(f" removed fixtures: {', '.join(sorted(removed))}") + if not added and not removed: + _info(f" same {len(new_names)} fixtures as existing") + _info(" target paths:") + for entry in manifest["fixtures"]: + _info(f" {store.display_name()}/v{version}/arrays/{entry['name']}") + _info(f" {store.display_name()}/v{version}/arrays/manifest.json") + _info(f" {store.display_name()}/versions.json") + if version not in versions: + updated_versions = sorted(versions + [version], key=_version_sort_key) + _info(f" versions.json would update: {versions} -> {updated_versions}") + else: + _info(f" versions.json unchanged: {versions}") + return + + if not args.yes: + _info(f"\nabout to upload {len(manifest['fixtures'])} fixtures for v{version} to {store.display_name()}") + answer = input("proceed? [y/N] ").strip().lower() + if answer not in ("y", "yes"): + _info("aborted") + sys.exit(1) + + _info(f"uploading {len(manifest['fixtures'])} fixtures to {store.display_name()}...") + _parallel_upload( + store, + [(f"v{version}/arrays/{e['name']}", output / e["name"]) for e in manifest["fixtures"]], + ) + + store.write(f"v{version}/arrays/manifest.json", manifest_json.encode()) + _info(" uploaded manifest.json") + + if version not in versions: + versions.append(version) + versions.sort(key=_version_sort_key) + store.write("versions.json", (json.dumps(versions, indent=2) + "\n").encode()) + _info(" updated versions.json") + + _info(f"\ndone: {len(manifest['fixtures'])} fixtures for v{version} published to {store.display_name()}") + + +def _publish_update( + store: Store, + version: str, + versions: list[str], + args: argparse.Namespace, +) -> None: + """Incremental update: add new fixtures to an existing version (hash-verified).""" + if version not in versions: + _info(f"error: v{version} not found in {store.display_name()}, use publish without --update for new versions") + sys.exit(1) + + existing_manifest = _read_manifest(store, version) + if existing_manifest is None: + _info(f"error: v{version} has no manifest in {store.display_name()}") + sys.exit(1) + + prefix = existing_manifest.pop("_prefix") + + with tempfile.TemporaryDirectory() as tmpdir: + output = Path(tmpdir) / "fixtures" + + _info("generating fixtures...") + _run_rust_generate(output) + + fixtures_json = json.loads((output / "fixtures.json").read_text()) + + # Compare each generated fixture against the store. + new_fixtures: list[str] = [] + for f in fixtures_json["fixtures"]: + name = f["name"] + local_path = output / name + local_hash = hashlib.sha256(local_path.read_bytes()).hexdigest() + remote_data = store.read(f"{prefix}/{name}") + + if remote_data is not None: + remote_hash = hashlib.sha256(remote_data).hexdigest() + if local_hash != remote_hash: + _info(f"error: hash mismatch for {name}: local={local_hash[:12]} remote={remote_hash[:12]}") + sys.exit(1) + else: + _info(f" {name}: unchanged (sha256 match)") + else: + new_fixtures.append(name) + _info(f" {name}: NEW") + + if not new_fixtures: + _info("no new fixtures to add") + return + + if args.dry_run: + _info(f"dry run — would upload {len(new_fixtures)} new fixture(s):") + for name in new_fixtures: + _info(f" {store.display_name()}/{prefix}/{name}") + _info(f" {store.display_name()}/{prefix}/manifest.json (updated)") + return + + if not args.yes: + _info(f"\nabout to upload {len(new_fixtures)} new fixture(s) for v{version} to {store.display_name()}") + answer = input("proceed? [y/N] ").strip().lower() + if answer not in ("y", "yes"): + _info("aborted") + sys.exit(1) + + # Upload only new fixture files. + new_fixture_names = set(new_fixtures) + _parallel_upload( + store, + [(f"{prefix}/{name}", output / name) for name in new_fixtures], + ) + + # Merge manifest: keep existing entries, add new ones. + new_entries = existing_manifest["fixtures"][:] + for f in fixtures_json["fixtures"]: + if f["name"] in new_fixture_names: + new_entries.append({"name": f["name"], "description": f["description"], "sha256": f["sha256"]}) + + updated_manifest = { + "version": version, + "generated_at": datetime.now(UTC).isoformat(), + "fixtures": new_entries, + } + store.write(f"{prefix}/manifest.json", (json.dumps(updated_manifest, indent=2) + "\n").encode()) + _info(" updated manifest.json") + + _info(f"\ndone: added {len(new_fixtures)} new fixture(s) to v{version} in {store.display_name()}") + + +def cmd_check(args: argparse.Namespace) -> None: + """Download fixtures from store and check with Rust binary.""" + store = _parse_store(args.store) + + if args.versions: + versions = [v.strip() for v in args.versions.split(",")] + else: + versions = store.list_versions() + + if not versions: + _info("no versions found in store") + sys.exit(1) + + _info(f"found versions.json at {store.display_name()}/versions.json: {versions}") + _info(f"checking {len(versions)} version(s): {', '.join(versions)}") + + total_passed = 0 + total_failed = 0 + total_skipped = 0 + all_failures: list[tuple[str, str, str]] = [] + + for version in versions: + _info(f"\n--- v{version} ---") + manifest = _read_manifest(store, version) + if manifest is None: + _info(f" v{version}: no manifest found at v{version}/arrays/manifest.json or v{version}/manifest.json") + all_failures.append((version, "(manifest)", "manifest not found")) + total_failed += 1 + continue + + prefix = manifest.pop("_prefix", f"v{version}/arrays") + _info(f" manifest: {prefix}/manifest.json ({len(manifest['fixtures'])} fixtures)") + + with tempfile.TemporaryDirectory() as tmpdir: + tmppath = Path(tmpdir) + + for entry in manifest["fixtures"]: + name = entry["name"] + data = store.read(f"{prefix}/{name}") + if data is None: + _info(f" v{version}: {name} not found at {prefix}/{name}") + all_failures.append((version, name, "fixture file not found in store")) + total_failed += 1 + continue + (tmppath / name).write_bytes(data) + _info(f" downloaded {name} ({len(data)} bytes)") + + result = _run_rust_check(tmppath, mode="subset") + + passed = len(result.get("passed", [])) + failed_list = result.get("failed", []) + skipped = len(result.get("skipped", [])) + total_passed += passed + total_failed += len(failed_list) + total_skipped += skipped + + if failed_list: + _info(f" v{version}: {passed} passed, {len(failed_list)} FAILED, {skipped} skipped") + for f in failed_list: + _info(f" FAIL {f['name']}: {f['error']}") + all_failures.append((version, f["name"], f["error"])) + else: + _info(f" v{version}: {passed} passed, {skipped} skipped") + + _info(f"\nresult: {total_passed} passed, {total_failed} failed, {total_skipped} skipped") + if all_failures: + sys.exit(1) + + +def cmd_list(args: argparse.Namespace) -> None: + """List versions or show a version's manifest.""" + store = _parse_store(args.store) + + if args.version: + manifest = _read_manifest(store, args.version) + if manifest is None: + print(f"no manifest found for v{args.version}", file=sys.stderr) + sys.exit(1) + print(json.dumps(manifest, indent=2)) + else: + versions = store.list_versions() + if not versions: + _info("(no versions)") + for v in versions: + print(v) + + +def cmd_verify(args: argparse.Namespace) -> None: + """Verify fixture file integrity against manifest sha256 hashes.""" + store = _parse_store(args.store) + versions = store.list_versions() + + if not versions: + _info("no versions found") + sys.exit(1) + + _info(f"verifying {len(versions)} version(s) in {store.display_name()}...") + + total_ok = 0 + errors: list[str] = [] + + for version in versions: + _info(f"\n--- v{version} ---") + manifest = _read_manifest(store, version) + if manifest is None: + msg = f"v{version}: manifest not found" + _info(f" FAIL: {msg}") + errors.append(msg) + continue + + prefix = manifest.pop("_prefix", f"v{version}/arrays") + + for entry in manifest["fixtures"]: + name = entry["name"] + expected_hash = entry.get("sha256") + data = store.read(f"{prefix}/{name}") + + if data is None: + msg = f"v{version}/{name}: file missing from store" + _info(f" FAIL: {msg}") + errors.append(msg) + continue + + if expected_hash is None: + msg = f"v{version}/{name}: no sha256 in manifest" + _info(f" FAIL: {msg}") + errors.append(msg) + continue + + actual_hash = hashlib.sha256(data).hexdigest() + if actual_hash != expected_hash: + msg = f"v{version}/{name}: sha256 mismatch expected={expected_hash[:12]} actual={actual_hash[:12]}" + _info(f" FAIL: {msg}") + errors.append(msg) + else: + _info(f" {name}: ok ({len(data)} bytes)") + total_ok += 1 + + _info(f"\nresult: {total_ok} ok, {len(errors)} failed") + if errors: + for e in errors: + _info(f" {e}") + sys.exit(1) + else: + _info("all fixtures verified.") + + +def cmd_validate_manifest(args: argparse.Namespace) -> None: + """Check that manifests are additive-only across all versions.""" + store = _parse_store(args.store) + versions = store.list_versions() + + if not versions: + _info("no versions found") + return + + _info(f"validating {len(versions)} version(s)...") + + prev_names: set[str] | None = None + prev_version: str | None = None + errors: list[str] = [] + + for version in versions: + manifest = _read_manifest(store, version) + if manifest is None: + _info(f" v{version}: no manifest, skipping") + continue + names = {e["name"] for e in manifest["fixtures"]} + + if prev_names is not None: + missing = prev_names - names + if missing: + msg = f"v{version} missing from v{prev_version}: {', '.join(sorted(missing))}" + _info(f" FAIL: {msg}") + errors.append(msg) + else: + new = len(names) - len(prev_names) + extra = f" (+{new} new)" if new > 0 else "" + _info(f" v{prev_version} -> v{version}: ok ({len(names)} fixtures{extra})") + else: + _info(f" v{version}: {len(names)} fixtures (first)") + + prev_names = names + prev_version = version + + if errors: + _info(f"\n{len(errors)} error(s)") + sys.exit(1) + else: + _info("\nall manifests are additive-only.") + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _parallel_upload(store: Store, items: list[tuple[str, Path]], max_workers: int = 8) -> None: + """Upload files to the store in parallel.""" + with ThreadPoolExecutor(max_workers=max_workers) as pool: + futures = {pool.submit(store.write_file, key, local): key for key, local in items} + for future in as_completed(futures): + future.result() + + +def _run_rust_generate(output: Path) -> None: + """Run `vortex-compat generate --output `.""" + cmd = _cargo_run_cmd() + ["generate", "--output", str(output)] + _run_cmd(cmd, check=True) + + +def _run_rust_check(dir: Path, mode: str = "subset") -> dict: + """Run `vortex-compat check --dir --mode ` and parse JSON stdout.""" + cmd = _cargo_run_cmd() + ["check", "--dir", str(dir), "--mode", mode] + result = subprocess.run(cmd, capture_output=True, text=True) + if result.stderr: + print(result.stderr, end="", file=sys.stderr) + + if result.stdout.strip(): + return json.loads(result.stdout) + + if result.returncode != 0: + return { + "passed": [], + "failed": [{"name": "(all)", "error": "check process failed"}], + "skipped": [], + } + return {"passed": [], "failed": [], "skipped": []} + + +def _cargo_run_cmd() -> list[str]: + """Build the command to invoke vortex-compat (pre-built binary or cargo run).""" + bin_path = os.environ.get("VORTEX_COMPAT_BIN") + if bin_path: + return [bin_path] + return ["cargo", "run", "-p", CARGO_BIN, "--release", "--"] + + +def _run_cmd(cmd: list[str], check: bool = False, cwd: Path | None = None) -> subprocess.CompletedProcess: + _info(f" $ {' '.join(cmd)}") + return subprocess.run(cmd, check=check, cwd=cwd) + + +def _find_prev_version(versions: list[str], current: str) -> str | None: + """Find the highest version strictly less than `current`.""" + current_key = _version_sort_key(current) + prev = None + for v in versions: + if _version_sort_key(v) < current_key: + prev = v + return prev + + +def _version_sort_key(v: str) -> list[int]: + parts = [] + for p in v.split("."): + try: + parts.append(int(p)) + except ValueError: + parts.append(0) + return parts + + +def _info(msg: str) -> None: + print(msg, file=sys.stderr) + + +# --------------------------------------------------------------------------- +# CLI +# --------------------------------------------------------------------------- + + +def main() -> None: + parser = argparse.ArgumentParser( + prog="compat.py", + description="Vortex backward-compatibility fixture orchestrator", + epilog=EPILOG, + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + sub = parser.add_subparsers(dest="command", metavar="COMMAND") + + # -- generate -- + p = sub.add_parser( + "generate", + help="Generate fixtures locally", + description=( + "Build all fixture .vortex files using the current binary and write\n" + "them to a directory. Version is auto-detected from the nearest git\n" + "tag at HEAD (or at --git-ref if specified)." + ), + epilog=( + "examples:\n" + " uv run compat.py generate --output ./out\n" + " uv run compat.py generate --output ./out --git-ref v0.62.0" + ), + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + p.add_argument("--output", required=True, help="Output directory") + p.add_argument( + "--git-ref", + help="Git ref for version detection (e.g. v0.62.0). " + "Version is derived from the nearest tag at this ref. " + "Fixtures are always built with the current binary.", + ) + + # -- publish -- + p = sub.add_parser( + "publish", + help="Generate and publish fixtures to a store", + description=( + "Generate fixture files, merge the manifest with the previous version,\n" + "and upload everything to the store. Version is auto-detected from the\n" + "nearest git tag at HEAD (or at --git-ref)." + ), + epilog=( + "examples:\n" + " uv run compat.py publish\n" + " uv run compat.py publish --dry-run\n" + " uv run compat.py publish --git-ref v0.62.0\n" + " uv run compat.py publish --store /tmp/store" + ), + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + p.add_argument("--store", default=DEFAULT_STORE, help="Store spec (default: %(default)s)") + p.add_argument( + "--git-ref", + help="Git ref for version detection (e.g. v0.62.0). " + "Version is derived from the nearest tag at this ref. " + "Fixtures are always built with the current binary.", + ) + p.add_argument( + "--dry-run", + action="store_true", + help="Generate and show manifest, but don't upload", + ) + p.add_argument( + "--force", + action="store_true", + help="Overwrite an existing version in the store", + ) + p.add_argument( + "--update", + action="store_true", + help="Incremental update: add new fixtures to an existing version " + "(hash-verified, skips unchanged files, errors on mismatches)", + ) + p.add_argument( + "--yes", + "-y", + action="store_true", + help="Skip confirmation prompt", + ) + + # -- check -- + p = sub.add_parser( + "check", + help="Validate fixtures from a store against current code", + description=( + "Download fixtures for each version from the store, then use the\n" + "current vortex-compat binary to verify they can still be read and\n" + "match expectations." + ), + epilog=( + "examples:\n" + " uv run compat.py check\n" + " uv run compat.py check --versions 0.62.0,0.63.0\n" + " uv run compat.py check --store /tmp/store" + ), + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + p.add_argument("--store", default=DEFAULT_STORE, help="Store spec (default: %(default)s)") + p.add_argument( + "--versions", + help="Comma-separated versions to check (default: all)", + ) + + # -- list -- + p = sub.add_parser( + "list", + help="List versions or show a version's manifest", + description="Inspect the contents of a fixture store.", + epilog=( + "examples:\n" + " uv run compat.py list\n" + " uv run compat.py list 0.62.0\n" + " uv run compat.py list --store /tmp/store" + ), + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + p.add_argument("--store", default=DEFAULT_STORE, help="Store spec (default: %(default)s)") + p.add_argument("version", nargs="?", help="Show manifest for this version") + + # -- verify -- + p = sub.add_parser( + "verify", + help="Verify fixture file integrity against manifest sha256 hashes", + description=( + "Download every fixture file for every version and verify its\n" + "SHA-256 hash matches the manifest. Also checks that all files\n" + "listed in manifests are present in the store." + ), + epilog=("examples:\n uv run compat.py verify\n uv run compat.py verify --store /tmp/store"), + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + p.add_argument("--store", default=DEFAULT_STORE, help="Store spec (default: %(default)s)") + + # -- validate-manifest -- + p = sub.add_parser( + "validate-manifest", + help="Check additive-only property across all versions", + description=( + "Verify that no fixtures were removed between consecutive versions.\n" + "New fixtures are allowed; removals are errors." + ), + epilog=( + "examples:\n uv run compat.py validate-manifest\n uv run compat.py validate-manifest --store /tmp/store" + ), + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + p.add_argument("--store", default=DEFAULT_STORE, help="Store spec (default: %(default)s)") + + args = parser.parse_args() + + if args.command is None: + parser.print_help() + sys.exit(1) + + commands = { + "generate": cmd_generate, + "publish": cmd_publish, + "check": cmd_check, + "list": cmd_list, + "verify": cmd_verify, + "validate-manifest": cmd_validate_manifest, + } + commands[args.command](args) + + +if __name__ == "__main__": + main() diff --git a/vortex-test/compat-gen/scripts/upload.py b/vortex-test/compat-gen/scripts/upload.py deleted file mode 100755 index 63b496158ee..00000000000 --- a/vortex-test/compat-gen/scripts/upload.py +++ /dev/null @@ -1,351 +0,0 @@ -#!/usr/bin/env python3 -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright the Vortex contributors - -"""Upload Vortex backward-compat fixtures to S3. - -Wraps the full upload lifecycle: - 1. Build + run compat-gen to produce fixture files and a naive manifest - 2. Fetch the previous version's manifest from S3 (via public HTTP) - 3. Merge `since` values: keep old `since` for existing fixtures, current - version for new ones - 4. Enforce additive-only: every fixture in the previous manifest must exist - in the generated output - 5. Upload the output directory to S3 - 6. Update versions.json with ETag-based optimistic locking - -Requires only Python 3 stdlib + `aws` CLI on PATH. -""" - -import argparse -import json -import os -import subprocess -import sys -import tempfile -import time -import urllib.error -import urllib.request - -S3_BUCKET = "vortex-compat-fixtures" -FIXTURES_URL = "https://vortex-compat-fixtures.s3.amazonaws.com" - - -# --------------------------------------------------------------------------- -# Helpers -# --------------------------------------------------------------------------- - - -def log(msg: str) -> None: - print(msg, file=sys.stderr) - - -def run(cmd: list[str], *, check: bool = True, **kwargs) -> subprocess.CompletedProcess: - log(f" $ {' '.join(cmd)}") - return subprocess.run(cmd, check=check, **kwargs) - - -def http_get(url: str) -> bytes | None: - """Fetch *url* over HTTPS. Returns None on 404, raises on other errors.""" - try: - with urllib.request.urlopen(url) as resp: - return resp.read() - except urllib.error.HTTPError as exc: - if exc.code == 404 or exc.code == 403: - return None - raise - - -def version_sort_key(v: str) -> list[int]: - return list(map(int, v.split("."))) - - -# --------------------------------------------------------------------------- -# S3 helpers (reuse head_etag / put_object pattern from scripts/s3-upload.py) -# --------------------------------------------------------------------------- - - -def head_etag(bucket: str, key: str) -> str | None: - """Fetch the current ETag for an S3 object, or None if missing.""" - result = subprocess.run( - [ - "aws", - "s3api", - "head-object", - "--bucket", - bucket, - "--key", - key, - "--query", - "ETag", - "--output", - "text", - ], - capture_output=True, - text=True, - ) - if result.returncode != 0: - return None - etag = result.stdout.strip() - if not etag or etag == "null": - return None - return etag - - -def put_object(bucket: str, key: str, body: str, if_match: str | None) -> bool: - """Upload a single object with optional ETag precondition.""" - cmd = [ - "aws", - "s3api", - "put-object", - "--bucket", - bucket, - "--key", - key, - "--body", - body, - ] - if if_match: - cmd.extend(["--if-match", if_match]) - result = subprocess.run(cmd, capture_output=True) - return result.returncode == 0 - - -def upload_versions_json(local_path: str, max_retries: int = 5) -> None: - """Upload versions.json with ETag-based optimistic locking + retry.""" - key = "versions.json" - for attempt in range(1, max_retries + 1): - etag = head_etag(S3_BUCKET, key) - if put_object(S3_BUCKET, key, local_path, etag): - log(" versions.json uploaded.") - return - - if attempt == max_retries: - break - - delay = min(2**attempt, 30) - log(f" versions.json upload failed (attempt {attempt}/{max_retries}), retrying in {delay}s...") - time.sleep(delay) - - log(f"ERROR: versions.json upload failed after {max_retries} attempts") - sys.exit(1) - - -# --------------------------------------------------------------------------- -# Core logic -# --------------------------------------------------------------------------- - - -def fetch_versions() -> list[str]: - """Fetch the current versions.json from S3 (public HTTP).""" - data = http_get(f"{FIXTURES_URL}/versions.json") - if data is None: - return [] - return json.loads(data) - - -def fetch_previous_manifest(versions: list[str], current_version: str) -> dict | None: - """Fetch the manifest.json for the latest version before *current_version*.""" - candidates = [v for v in versions if v != current_version] - if not candidates: - return None - candidates.sort(key=version_sort_key) - latest = candidates[-1] - log(f" previous version: {latest}") - data = http_get(f"{FIXTURES_URL}/v{latest}/manifest.json") - if data is None: - return None - return json.loads(data) - - -def normalize_manifest_fixtures(manifest: dict) -> list[dict]: - """Handle old manifest format where fixtures was a list of strings.""" - entries = manifest.get("fixtures", []) - normalized = [] - for entry in entries: - if isinstance(entry, str): - # Old format: just a filename string — no `since` info - normalized.append({"name": entry, "since": "unknown"}) - else: - normalized.append(entry) - return normalized - - -def merge_manifest( - generated_manifest_path: str, - previous_manifest: dict | None, - current_version: str, -) -> None: - """Merge `since` values from the previous manifest into the generated one. - - Also enforces the additive-only rule: every fixture in the previous manifest - must exist in the generated output. - """ - with open(generated_manifest_path) as f: - generated = json.load(f) - - if previous_manifest is None: - # First upload — nothing to merge. - return - - prev_fixtures = normalize_manifest_fixtures(previous_manifest) - prev_by_name = {e["name"]: e for e in prev_fixtures} - gen_by_name = {e["name"]: e for e in generated["fixtures"]} - - # Additive-only check: every previous fixture must still exist. - missing = sorted(set(prev_by_name) - set(gen_by_name)) - if missing: - log(f"ERROR: fixtures removed since previous version: {missing}") - log("Fixtures must never be removed — only added.") - sys.exit(1) - - # Merge: keep old `since` for existing fixtures, current version for new. - for entry in generated["fixtures"]: - name = entry["name"] - if name in prev_by_name: - entry["since"] = prev_by_name[name]["since"] - else: - entry["since"] = current_version - - with open(generated_manifest_path, "w") as f: - json.dump(generated, f, indent=2) - f.write("\n") - - log(f" merged manifest: {len(prev_by_name)} existing, {len(gen_by_name) - len(prev_by_name)} new fixtures") - - -def build_fixtures(version: str, output_dir: str) -> None: - """Run cargo to build and execute compat-gen.""" - run( - [ - "cargo", - "run", - "-p", - "vortex-compat", - "--release", - "--bin", - "compat-gen", - "--", - "--version", - version, - "--output", - output_dir, - ] - ) - - -def upload_fixtures(version: str, output_dir: str) -> None: - """Upload the output directory to S3.""" - run( - [ - "aws", - "s3", - "cp", - output_dir, - f"s3://{S3_BUCKET}/v{version}/", - "--recursive", - ] - ) - - -def update_versions(version: str, tmp_dir: str) -> None: - """Append version to versions.json and upload with optimistic locking.""" - versions = fetch_versions() - - if version not in versions: - versions.append(version) - versions.sort(key=version_sort_key) - - local_path = os.path.join(tmp_dir, "versions.json") - with open(local_path, "w") as f: - json.dump(versions, f, indent=2) - f.write("\n") - - upload_versions_json(local_path) - - -# --------------------------------------------------------------------------- -# CLI -# --------------------------------------------------------------------------- - - -def main() -> None: - parser = argparse.ArgumentParser( - description="Build, generate, and upload Vortex backward-compat fixtures.", - ) - parser.add_argument( - "--version", - required=True, - help='Version tag for this fixture set (e.g. "0.62.0").', - ) - parser.add_argument( - "--output", - help="Output directory for generated fixtures (default: temp dir).", - ) - parser.add_argument( - "--skip-build", - action="store_true", - help="Skip cargo build + compat-gen run (assumes --output already populated).", - ) - parser.add_argument( - "--dry-run", - action="store_true", - help="Generate and merge manifest but skip S3 upload.", - ) - args = parser.parse_args() - - # Resolve output directory. - if args.output: - output_dir = args.output - os.makedirs(output_dir, exist_ok=True) - owns_tmp = False - else: - tmp = tempfile.mkdtemp(prefix="compat-gen-") - output_dir = os.path.join(tmp, "fixtures") - os.makedirs(output_dir) - owns_tmp = True - - try: - # Step 1: Build + generate fixtures. - if not args.skip_build: - log(f"[1/4] Generating fixtures for v{args.version}...") - build_fixtures(args.version, output_dir) - else: - log(f"[1/4] Skipping build (--skip-build), using {output_dir}") - - # Step 2: Fetch previous manifest and merge `since` values. - log("[2/4] Fetching previous manifest...") - versions = fetch_versions() - prev_manifest = fetch_previous_manifest(versions, args.version) - manifest_path = os.path.join(output_dir, "manifest.json") - merge_manifest(manifest_path, prev_manifest, args.version) - - if args.dry_run: - log("[3/4] Dry run — skipping S3 upload.") - log("[4/4] Dry run — skipping versions.json update.") - log(f"\nGenerated fixtures in: {output_dir}") - with open(manifest_path) as f: - log(f"Manifest:\n{f.read()}") - return - - # Step 3: Upload fixtures to S3. - log(f"[3/4] Uploading fixtures to s3://{S3_BUCKET}/v{args.version}/...") - upload_fixtures(args.version, output_dir) - - # Step 4: Update versions.json. - log("[4/4] Updating versions.json...") - # Use the parent of output_dir for the temp versions.json file. - tmp_dir = os.path.dirname(output_dir) if owns_tmp else tempfile.mkdtemp() - update_versions(args.version, tmp_dir) - - log(f"\nDone: fixtures for v{args.version} uploaded.") - finally: - # Clean up temp dir if we created one. - if owns_tmp and not args.dry_run: - import shutil - - shutil.rmtree(os.path.dirname(output_dir), ignore_errors=True) - - -if __name__ == "__main__": - main() diff --git a/vortex-test/compat-gen/src/adapter.rs b/vortex-test/compat-gen/src/adapter.rs index b7b947dc079..bfdcedfd57a 100644 --- a/vortex-test/compat-gen/src/adapter.rs +++ b/vortex-test/compat-gen/src/adapter.rs @@ -18,15 +18,32 @@ use vortex::io::session::RuntimeSessionExt; use vortex::layout::LayoutStrategy; use vortex::layout::layouts::flat::writer::FlatLayoutStrategy; use vortex_array::ArrayRef; +use vortex_array::ArrayVisitorExt; use vortex_array::DynArray; +use vortex_array::expr::stats::Stat; use vortex_array::stream::ArrayStreamAdapter; use vortex_array::stream::ArrayStreamExt; use vortex_buffer::ByteBuffer; use vortex_error::VortexResult; +use vortex_error::vortex_err; use vortex_session::VortexSession; fn runtime() -> VortexResult { - Runtime::new().map_err(|e| vortex_error::vortex_err!("failed to create tokio runtime: {e}")) + Runtime::new().map_err(|e| vortex_err!("failed to create tokio runtime: {e}")) +} + +/// Compute all statistics on every node in the array tree. +/// +/// The flat layout writer does not compute stats itself — it only serializes stats already +/// cached on each array node. This function walks the entire tree and forces computation of +/// all stats so they are present in the serialized output. +pub fn compute_all_stats(array: &ArrayRef) -> VortexResult<()> { + let all_stats: Vec = Stat::all().collect(); + for node in array.depth_first_traversal() { + let computed = node.statistics().compute_all(&all_stats)?; + node.statistics().set_iter(computed.into_iter()); + } + Ok(()) } /// Write a sequence of array chunks as a `.vortex` file with no compression. @@ -34,40 +51,12 @@ fn runtime() -> VortexResult { /// Uses `FlatLayoutStrategy` directly — no repartitioning, no zone maps, no dictionary /// encoding, no compression. Each chunk is serialized as a single flat segment. pub fn write_file(path: &Path, chunk: ArrayRef) -> VortexResult<()> { - let stream = ArrayStreamAdapter::new(chunk.dtype().clone(), stream::iter([Ok(chunk)])); - - let strategy: Arc = Arc::new(FlatLayoutStrategy::default()); - - runtime()?.block_on(async { - let session = VortexSession::default().with_tokio(); - let mut file = tokio::fs::File::create(path) - .await - .map_err(|e| vortex_error::vortex_err!("failed to create {}: {e}", path.display()))?; - let _summary = session - .write_options() - .with_strategy(strategy) - .write(&mut file, stream) - .await?; - Ok(()) - }) + write_compressed(path, chunk, Arc::new(FlatLayoutStrategy::default())) } /// Write a sequence of array chunks to an in-memory `.vortex` byte buffer with no compression. pub fn write_file_to_bytes(chunk: ArrayRef) -> VortexResult { - let stream = ArrayStreamAdapter::new(chunk.dtype().clone(), stream::iter([Ok(chunk)])); - - let strategy: Arc = Arc::new(FlatLayoutStrategy::default()); - - runtime()?.block_on(async { - let session = VortexSession::default().with_tokio(); - let mut bytes = Vec::new(); - let _summary = session - .write_options() - .with_strategy(strategy) - .write(&mut bytes, stream) - .await?; - Ok(ByteBuffer::from(bytes)) - }) + write_compressed_to_bytes(chunk, Arc::new(FlatLayoutStrategy::default())) } /// Write a `.vortex` file using a caller-provided layout strategy (compressor pipeline). @@ -82,7 +71,7 @@ pub fn write_compressed( let session = VortexSession::default().with_tokio(); let mut file = tokio::fs::File::create(path) .await - .map_err(|e| vortex_error::vortex_err!("failed to create {}: {e}", path.display()))?; + .map_err(|e| vortex_err!("failed to create {}: {e}", path.display()))?; let _summary = session .write_options() .with_strategy(strategy) diff --git a/vortex-test/compat-gen/src/check.rs b/vortex-test/compat-gen/src/check.rs new file mode 100644 index 00000000000..f8dd4e8f52d --- /dev/null +++ b/vortex-test/compat-gen/src/check.rs @@ -0,0 +1,208 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use std::path::Path; + +use clap::ValueEnum; +use serde::Serialize; +use vortex_array::assert_arrays_eq; +use vortex_buffer::ByteBuffer; +use vortex_error::VortexResult; +use vortex_error::vortex_bail; +use vortex_error::vortex_err; + +use crate::adapter; +use crate::fixtures::all_fixtures; + +/// How to handle mismatches between directory and known fixtures. +#[derive(Clone, ValueEnum)] +pub enum Mode { + /// Directory must match fixtures exactly. + Exact, + /// Directory may have extra files (skip), but all known must be present. + Subset, + /// Directory may be missing files (skip), but no unknown files allowed. + Superset, +} + +#[derive(Serialize)] +struct CheckResult { + passed: Vec, + failed: Vec, + skipped: Vec, +} + +#[derive(Serialize)] +struct FailedFixture { + name: String, + error: String, +} + +/// Check `.vortex` files in `dir` against in-memory fixtures. +/// +/// For each known fixture, generates fresh files in a temp directory via +/// `fixture.write(tmp_dir)`, then reads both the stored file and fresh file, +/// decodes them, and compares the arrays. +/// +/// Prints JSON result to stdout, human-readable progress to stderr. +/// Returns error if any fixture failed or if mode constraints are violated. +pub fn check(dir: &Path, mode: Mode, exclude: &[String]) -> VortexResult<()> { + let fixtures = all_fixtures(); + let fixtures: Vec<_> = fixtures + .into_iter() + .filter(|f| { + let name = f.name(); + !exclude.iter().any(|pat| name.contains(pat.as_str())) + }) + .collect(); + + if !exclude.is_empty() { + eprintln!("excluding: {}", exclude.join(", ")); + } + + // Generate fresh fixtures into a temp directory. + let tmp_dir = tempfile::tempdir().map_err(|e| vortex_err!("failed to create temp dir: {e}"))?; + + eprintln!("generating fresh fixtures for comparison..."); + for fixture in &fixtures { + fixture.write(tmp_dir.path())?; + } + + // Collect .vortex files in the check directory. + let dir_files: Vec = std::fs::read_dir(dir) + .map_err(|e| vortex_err!("failed to read dir {}: {e}", dir.display()))? + .filter_map(|entry| { + let entry = entry.ok()?; + let name = entry.file_name().to_string_lossy().to_string(); + name.ends_with(".vortex").then_some(name) + }) + .collect(); + + // Collect all fixture names (each fixture may produce multiple files). + let fresh_files: Vec = std::fs::read_dir(tmp_dir.path()) + .map_err(|e| vortex_err!("failed to read tmp dir: {e}"))? + .filter_map(|entry| { + let entry = entry.ok()?; + let name = entry.file_name().to_string_lossy().to_string(); + name.ends_with(".vortex").then_some(name) + }) + .collect(); + + let mut result = CheckResult { + passed: Vec::new(), + failed: Vec::new(), + skipped: Vec::new(), + }; + + // Check for unknown files in the directory. + for file_name in &dir_files { + if !fresh_files.contains(file_name) { + match mode { + Mode::Exact | Mode::Superset => { + result.failed.push(FailedFixture { + name: file_name.clone(), + error: "unknown fixture (not in current fixture set)".to_string(), + }); + } + Mode::Subset => { + eprintln!(" skip {file_name} (unknown)"); + result.skipped.push(file_name.clone()); + } + } + } + } + + // Check each known fixture file. + for fresh_name in &fresh_files { + let stored_path = dir.join(fresh_name); + if !stored_path.exists() { + match mode { + Mode::Exact | Mode::Subset => { + result.failed.push(FailedFixture { + name: fresh_name.clone(), + error: "file missing from directory".to_string(), + }); + } + Mode::Superset => { + eprintln!(" skip {fresh_name} (missing)"); + result.skipped.push(fresh_name.clone()); + } + } + continue; + } + + eprintln!(" checking {fresh_name}..."); + + // Read the stored file. + let stored_bytes = match std::fs::read(&stored_path) { + Ok(b) => b, + Err(e) => { + result.failed.push(FailedFixture { + name: fresh_name.clone(), + error: format!("failed to read stored file: {e}"), + }); + continue; + } + }; + let stored_array = match adapter::read_file(ByteBuffer::from(stored_bytes)) { + Ok(a) => a, + Err(e) => { + result.failed.push(FailedFixture { + name: fresh_name.clone(), + error: format!("failed to decode stored vortex file: {e}"), + }); + continue; + } + }; + + // Read the fresh file. + let fresh_path = tmp_dir.path().join(fresh_name); + let fresh_bytes = match std::fs::read(&fresh_path) { + Ok(b) => b, + Err(e) => { + result.failed.push(FailedFixture { + name: fresh_name.clone(), + error: format!("failed to read fresh file: {e}"), + }); + continue; + } + }; + let fresh_array = match adapter::read_file(ByteBuffer::from(fresh_bytes)) { + Ok(a) => a, + Err(e) => { + result.failed.push(FailedFixture { + name: fresh_name.clone(), + error: format!("failed to decode fresh vortex file: {e}"), + }); + continue; + } + }; + + // Compare arrays. + assert_arrays_eq!(stored_array, fresh_array); + eprintln!(" pass {fresh_name}"); + result.passed.push(fresh_name.clone()); + } + + // Print JSON result to stdout. + let json = serde_json::to_string_pretty(&result) + .map_err(|e| vortex_err!("failed to serialize result: {e}"))?; + println!("{json}"); + + // Summary to stderr. + eprintln!( + "\nresult: {} passed, {} failed, {} skipped", + result.passed.len(), + result.failed.len(), + result.skipped.len() + ); + + if !result.failed.is_empty() { + for f in &result.failed { + eprintln!(" FAIL {}: {}", f.name, f.error); + } + vortex_bail!("{} fixture(s) failed", result.failed.len()); + } + + Ok(()) +} diff --git a/vortex-test/compat-gen/src/fixtures/arrays/datasets/clickbench.rs b/vortex-test/compat-gen/src/fixtures/arrays/datasets/clickbench.rs index d97896df58c..7959f475c6b 100644 --- a/vortex-test/compat-gen/src/fixtures/arrays/datasets/clickbench.rs +++ b/vortex-test/compat-gen/src/fixtures/arrays/datasets/clickbench.rs @@ -90,7 +90,7 @@ fn download_with_retries(url: &str) -> VortexResult { )) } -#[allow(clippy::cast_possible_truncation)] +#[expect(clippy::cast_possible_truncation)] fn sample_and_write(source_bytes: &[u8], dest: &std::path::Path) -> VortexResult<()> { let source_bytes = Bytes::copy_from_slice(source_bytes); let builder = ParquetRecordBatchReaderBuilder::try_new(source_bytes.clone()) diff --git a/vortex-test/compat-gen/src/fixtures/mod.rs b/vortex-test/compat-gen/src/fixtures/mod.rs index 9ead6f40bb6..181ac5ad516 100644 --- a/vortex-test/compat-gen/src/fixtures/mod.rs +++ b/vortex-test/compat-gen/src/fixtures/mod.rs @@ -14,6 +14,7 @@ use vortex_error::VortexResult; use vortex_error::vortex_bail; use crate::adapter; +use crate::adapter::compute_all_stats; use crate::manifest::FixtureEntry; /// Top-level trait that the runner (compat-gen / compat-validate) interacts with. @@ -84,6 +85,7 @@ impl Fixture for FlatLayoutAdapter { fn write(&self, dir: &Path) -> VortexResult> { let array = self.0.build()?; check_expected_encodings(&array, self.0.as_ref())?; + compute_all_stats(&array)?; let path = dir.join(self.name()); adapter::write_file(&path, array)?; Ok(vec![FixtureEntry { diff --git a/vortex-test/compat-gen/src/generate.rs b/vortex-test/compat-gen/src/generate.rs new file mode 100644 index 00000000000..1393866c757 --- /dev/null +++ b/vortex-test/compat-gen/src/generate.rs @@ -0,0 +1,87 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use std::path::Path; + +use serde::Serialize; +use sha2::Digest; +use sha2::Sha256; +use vortex_error::VortexResult; +use vortex_error::vortex_err; + +use crate::fixtures::all_fixtures; + +#[derive(Serialize)] +struct FixturesJson { + fixtures: Vec, +} + +#[derive(Serialize)] +pub struct FixtureInfo { + pub name: String, + pub description: String, + pub sha256: String, +} + +/// Write all fixture files into `output_dir`, returning name, description, and sha256 for each. +pub fn write_fixtures(output_dir: &Path, exclude: &[String]) -> VortexResult> { + let fixtures = all_fixtures(); + let fixtures: Vec<_> = fixtures + .into_iter() + .filter(|f| { + let name = f.name(); + !exclude.iter().any(|pat| name.contains(pat.as_str())) + }) + .collect(); + + if !exclude.is_empty() { + eprintln!("excluding: {}", exclude.join(", ")); + } + + std::fs::create_dir_all(output_dir) + .map_err(|e| vortex_err!("failed to create output dir: {e}"))?; + + eprintln!("generating {} fixtures...", fixtures.len()); + + let mut infos = Vec::new(); + for fixture in &fixtures { + let entries = fixture.write(output_dir)?; + for entry in entries { + let path = output_dir.join(&entry.name); + let file_bytes = std::fs::read(&path) + .map_err(|e| vortex_err!("failed to read back {}: {e}", path.display()))?; + let sha256 = format!("{:x}", Sha256::digest(&file_bytes)); + eprintln!(" wrote {}", entry.name); + infos.push(FixtureInfo { + name: entry.name, + description: entry.description, + sha256, + }); + } + } + + Ok(infos) +} + +/// Write the `fixtures.json` manifest from previously collected fixture info. +pub fn write_manifest(output_dir: &Path, infos: Vec) -> VortexResult<()> { + let fixtures_json = FixturesJson { fixtures: infos }; + let json = serde_json::to_string_pretty(&fixtures_json) + .map_err(|e| vortex_err!("failed to serialize fixtures.json: {e}"))?; + std::fs::write(output_dir.join("fixtures.json"), format!("{json}\n")) + .map_err(|e| vortex_err!("failed to write fixtures.json: {e}"))?; + eprintln!(" wrote fixtures.json"); + + eprintln!( + "\ndone: {} fixtures in {}", + fixtures_json.fixtures.len(), + output_dir.display() + ); + Ok(()) +} + +/// Generate all fixtures into `output_dir` and write the manifest. +pub fn generate(output_dir: &Path, exclude: &[String]) -> VortexResult<()> { + let infos = write_fixtures(output_dir, exclude)?; + write_manifest(output_dir, infos) +} diff --git a/vortex-test/compat-gen/src/lib.rs b/vortex-test/compat-gen/src/lib.rs index 6b758691198..d5010905605 100644 --- a/vortex-test/compat-gen/src/lib.rs +++ b/vortex-test/compat-gen/src/lib.rs @@ -2,6 +2,7 @@ // SPDX-FileCopyrightText: Copyright the Vortex contributors pub mod adapter; +pub mod check; pub mod fixtures; +pub mod generate; pub mod manifest; -pub mod validate; diff --git a/vortex-test/compat-gen/src/main.rs b/vortex-test/compat-gen/src/main.rs index 9a2649fdddc..c80d62e8d63 100644 --- a/vortex-test/compat-gen/src/main.rs +++ b/vortex-test/compat-gen/src/main.rs @@ -1,78 +1,101 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -use std::fs; use std::path::PathBuf; -use chrono::Utc; use clap::Parser; -use vortex_compat::fixtures::all_fixtures; -use vortex_compat::manifest::Manifest; +use clap::Subcommand; +use vortex_compat::check; +use vortex_compat::generate; use vortex_error::VortexResult; #[derive(Parser)] #[command( - name = "compat-gen", - about = "Generate Vortex backward-compat fixture files" + name = "vortex-compat", + about = "Generate and check Vortex backward-compatibility fixtures", + long_about = "\ +Thin Rust binary for backward-compatibility testing.\n\ +\n\ +This tool generates .vortex fixture files from in-memory test data and \ +checks that existing .vortex files can still be read and match expectations. \ +It is designed to be called by the compat.py orchestrator, which handles \ +versioning, S3 storage, and manifest management.\n\ +\n\ +Output protocol:\n\ + - Progress / diagnostics go to stderr\n\ + - Structured JSON results go to stdout (check command only)", + after_help = "\ +EXAMPLES:\n\ + Generate fixtures into a directory:\n\ + vortex-compat generate --output /tmp/fixtures\n\ +\n\ + Check fixtures (allow extra files from older versions):\n\ + vortex-compat check --dir /tmp/v0.62.0 --mode subset\n\ +\n\ + Check fixtures (strict, must match exactly):\n\ + vortex-compat check --dir /tmp/v0.63.0 --mode exact\n\ +\n\ + Build and run:\n\ + cargo run -p vortex-compat --release -- generate --output ./out" )] struct Cli { - /// Version tag for this fixture set (e.g. "0.62.0"). - #[arg(long)] - version: String, - - /// Output directory for generated fixture files. - #[arg(long)] - output: PathBuf, + #[command(subcommand)] + command: Commands, } -fn main() -> VortexResult<()> { - let cli = Cli::parse(); +#[derive(Subcommand)] +enum Commands { + /// Generate all fixture files into a directory. + /// + /// Writes one .vortex file per fixture plus a fixtures.json manifest + /// listing all generated files. The output directory is created if needed. + /// + /// Progress is printed to stderr. On success, the output directory + /// contains everything needed for `check` to validate. + Generate { + /// Output directory for .vortex files and fixtures.json. + #[arg(long, value_name = "DIR")] + output: PathBuf, - if cli.output.exists() { - let is_empty = cli - .output - .read_dir() - .map_err(|e| vortex_error::vortex_err!("failed to read output dir: {e}"))? - .next() - .is_none(); - if !is_empty { - vortex_error::vortex_bail!( - "output directory '{}' is not empty; use a fresh directory", - cli.output.display() - ); - } - } else { - fs::create_dir_all(&cli.output) - .map_err(|e| vortex_error::vortex_err!("failed to create output dir: {e}"))?; - } + /// Fixture name substrings to exclude (comma-separated, e.g. "clickbench,tpch"). + #[arg(long, value_delimiter = ',', value_name = "PATTERNS")] + exclude: Vec, + }, - let fixtures = all_fixtures(); - let mut entries = Vec::new(); + /// Check .vortex files in a directory against in-memory fixtures. + /// + /// For each .vortex file, rebuilds the expected array from current code + /// and compares it to the file contents. Results are printed as JSON to + /// stdout (for machine consumption) and as human-readable summaries to + /// stderr. + /// + /// The --mode flag controls how mismatches between directory contents + /// and the current fixture set are handled. + Check { + /// Directory containing .vortex files to check. + #[arg(long, value_name = "DIR")] + dir: PathBuf, - for fixture in &fixtures { - let new_entries = fixture.write(&cli.output)?; - for entry in &new_entries { - eprintln!(" generated file: {}", entry.name); - } - entries.extend(new_entries); - } + /// How to handle mismatches between directory contents and known fixtures. + /// + /// subset — directory may have extra files (skipped), all known must be present. + /// Best for checking old versions that may have since-removed fixtures. + /// exact — directory must match current fixtures 1:1. No extras, no missing. + /// superset — directory may be missing files (skipped), no unknown files allowed. + #[arg(long, default_value = "subset", value_name = "MODE")] + mode: check::Mode, - let manifest = Manifest { - version: cli.version.clone(), - generated_at: Utc::now(), - fixtures: entries, - }; - let manifest_path = cli.output.join("manifest.json"); - let manifest_json = serde_json::to_string_pretty(&manifest) - .map_err(|e| vortex_error::vortex_err!("failed to serialize manifest: {e}"))?; - fs::write(&manifest_path, manifest_json) - .map_err(|e| vortex_error::vortex_err!("failed to write manifest: {e}"))?; - eprintln!(" wrote manifest.json"); + /// Fixture name substrings to exclude from checking (comma-separated). + #[arg(long, value_delimiter = ',', value_name = "PATTERNS")] + exclude: Vec, + }, +} - eprintln!( - "done: {} fixtures for v{}", - manifest.fixtures.len(), - cli.version - ); - Ok(()) +fn main() -> VortexResult<()> { + let cli = Cli::parse(); + + match cli.command { + Commands::Generate { output, exclude } => generate::generate(&output, &exclude), + Commands::Check { dir, mode, exclude } => check::check(&dir, mode, &exclude), + } } diff --git a/vortex-test/compat-gen/src/manifest.rs b/vortex-test/compat-gen/src/manifest.rs index 726b98c7417..060760c88d2 100644 --- a/vortex-test/compat-gen/src/manifest.rs +++ b/vortex-test/compat-gen/src/manifest.rs @@ -1,20 +1,10 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -use chrono::DateTime; -use chrono::Utc; use serde::Deserialize; use serde::Serialize; -/// Manifest listing all fixtures generated for a given version. -#[derive(Debug, Serialize, Deserialize)] -pub struct Manifest { - pub version: String, - pub generated_at: DateTime, - pub fixtures: Vec, -} - -/// One entry in the manifest's fixture list. +/// One entry in the fixture manifest. #[derive(Debug, Serialize, Deserialize)] pub struct FixtureEntry { /// Filename, e.g. "primitives.vortex". diff --git a/vortex-test/compat-gen/src/validate.rs b/vortex-test/compat-gen/src/validate.rs deleted file mode 100644 index 5d84ce48717..00000000000 --- a/vortex-test/compat-gen/src/validate.rs +++ /dev/null @@ -1,186 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -// SPDX-FileCopyrightText: Copyright the Vortex contributors - -use std::fs; -use std::path::Path; -use std::path::PathBuf; - -use vortex_array::assert_arrays_eq; -use vortex_buffer::ByteBuffer; -use vortex_error::VortexResult; -use vortex_error::vortex_bail; -use vortex_error::vortex_err; -use vortex_utils::aliases::hash_set::HashSet; - -use crate::adapter; -use crate::fixtures::all_fixtures; -use crate::manifest::Manifest; - -/// Result of validating one version's fixtures. -pub struct VersionResult { - pub version: String, - pub passed: usize, - pub skipped: usize, - pub failed: Vec<(String, String)>, -} - -/// Validate all versions' fixtures against the current reader. -pub fn validate_all( - source: &FixtureSource, - versions: &[String], -) -> VortexResult> { - let fixtures = all_fixtures(); - - // Generate fresh fixtures into a temp dir. - let tmp_dir = tempfile::tempdir().map_err(|e| vortex_err!("failed to create temp dir: {e}"))?; - let mut fresh_names: Vec = Vec::new(); - for fixture in &fixtures { - let entries = fixture.write(tmp_dir.path())?; - for entry in entries { - fresh_names.push(entry.name); - } - } - - let fresh_set: HashSet<&str> = fresh_names.iter().map(|n| n.as_str()).collect(); - - let mut results = Vec::new(); - for version in versions { - let result = validate_version(source, version, tmp_dir.path(), &fresh_set)?; - results.push(result); - } - Ok(results) -} - -fn validate_version( - source: &FixtureSource, - version: &str, - fresh_dir: &Path, - fresh_set: &HashSet<&str>, -) -> VortexResult { - let manifest = source.fetch_manifest(version)?; - let mut passed = 0; - let mut skipped = 0; - let mut failed = Vec::new(); - - for entry in &manifest.fixtures { - if !fresh_set.contains(entry.name.as_str()) { - eprintln!( - " warn: unknown fixture {} in v{version}, skipping", - entry.name - ); - skipped += 1; - continue; - } - - eprintln!(" checking {} from v{version}...", entry.name); - let stored_bytes = source.fetch_fixture(version, &entry.name)?; - let fresh_path = fresh_dir.join(&entry.name); - let fresh_bytes = fs::read(&fresh_path).map_err(|e| { - vortex_err!("failed to read fresh fixture {}: {e}", fresh_path.display()) - })?; - - match validate(stored_bytes, ByteBuffer::from(fresh_bytes)) { - Ok(()) => passed += 1, - Err(e) => { - eprintln!(" FAIL: {} from v{version}: {e}", entry.name); - failed.push((entry.name.clone(), e.to_string())); - } - } - } - - Ok(VersionResult { - version: version.to_string(), - passed, - skipped, - failed, - }) -} - -fn validate(stored_bytes: ByteBuffer, fresh_bytes: ByteBuffer) -> VortexResult<()> { - let stored_array = adapter::read_file(stored_bytes)?; - let fresh_array = adapter::read_file(fresh_bytes)?; - - assert_arrays_eq!(stored_array, fresh_array); - Ok(()) -} - -/// Source for fetching fixture files -- either HTTPS or local directory. -pub enum FixtureSource { - Url(String), - Dir(PathBuf), -} - -impl FixtureSource { - fn fetch_manifest(&self, version: &str) -> VortexResult { - let json = match self { - FixtureSource::Url(base) => { - let url = format!("{base}/v{version}/manifest.json"); - http_get_bytes(&url)? - } - FixtureSource::Dir(dir) => { - let path = dir.join(format!("v{version}")).join("manifest.json"); - fs::read(&path) - .map_err(|e| vortex_err!("failed to read {}: {e}", path.display()))? - } - }; - serde_json::from_slice(&json) - .map_err(|e| vortex_err!("failed to parse manifest for v{version}: {e}")) - } - - fn fetch_fixture(&self, version: &str, name: &str) -> VortexResult { - let bytes = match self { - FixtureSource::Url(base) => { - let url = format!("{base}/v{version}/{name}"); - http_get_bytes(&url)? - } - FixtureSource::Dir(dir) => { - let path = dir.join(format!("v{version}")).join(name); - fs::read(&path) - .map_err(|e| vortex_err!("failed to read {}: {e}", path.display()))? - } - }; - Ok(ByteBuffer::from(bytes)) - } -} - -/// Discover versions from a versions.json file, or from local directory listing. -pub fn discover_versions(source: &FixtureSource) -> VortexResult> { - match source { - FixtureSource::Url(base) => { - let url = format!("{base}/versions.json"); - let bytes = http_get_bytes(&url)?; - let versions: Vec = serde_json::from_slice(&bytes) - .map_err(|e| vortex_err!("failed to parse versions.json: {e}"))?; - Ok(versions) - } - FixtureSource::Dir(dir) => { - let mut versions = Vec::new(); - for entry in fs::read_dir(dir) - .map_err(|e| vortex_err!("failed to read dir {}: {e}", dir.display()))? - { - let entry = entry.map_err(|e| vortex_err!("failed to read dir entry: {e}"))?; - let name = entry.file_name(); - let name = name.to_string_lossy(); - if let Some(version) = name.strip_prefix('v') - && entry.path().join("manifest.json").exists() - { - versions.push(version.to_string()); - } - } - versions.sort(); - Ok(versions) - } - } -} - -fn http_get_bytes(url: &str) -> VortexResult> { - let response = reqwest::blocking::get(url) - .map_err(|e| vortex_err!("HTTP request failed for {url}: {e}"))?; - if !response.status().is_success() { - vortex_bail!("HTTP {} fetching {url}", response.status()); - } - response - .bytes() - .map(|b| b.to_vec()) - .map_err(|e| vortex_err!("failed to read response body from {url}: {e}")) -} diff --git a/vortex-test/compat-gen/src/validate_main.rs b/vortex-test/compat-gen/src/validate_main.rs deleted file mode 100644 index dbaf35ba822..00000000000 --- a/vortex-test/compat-gen/src/validate_main.rs +++ /dev/null @@ -1,94 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -// SPDX-FileCopyrightText: Copyright the Vortex contributors - -use std::path::PathBuf; - -use clap::Parser; -use vortex_compat::validate::FixtureSource; -use vortex_compat::validate::discover_versions; -use vortex_compat::validate::validate_all; -use vortex_error::VortexResult; - -#[derive(Parser)] -#[command( - name = "compat-validate", - about = "Validate Vortex backward-compat fixtures" -)] -struct Cli { - /// HTTPS base URL for the fixture bucket. - /// e.g. - #[arg(long)] - fixtures_url: Option, - - /// Local directory containing fixture versions (for development). - #[arg(long)] - fixtures_dir: Option, - - /// Explicit list of versions to test (comma-separated). - /// If omitted, discovers versions from versions.json or directory listing. - #[arg(long, value_delimiter = ',')] - versions: Option>, -} - -fn main() -> VortexResult<()> { - let cli = Cli::parse(); - - let source = match (&cli.fixtures_url, &cli.fixtures_dir) { - (Some(url), None) => FixtureSource::Url(url.clone()), - (None, Some(dir)) => FixtureSource::Dir(dir.clone()), - _ => { - vortex_error::vortex_bail!("specify exactly one of --fixtures-url or --fixtures-dir"); - } - }; - - let versions = match cli.versions { - Some(v) => v, - None => { - eprintln!("discovering versions..."); - discover_versions(&source)? - } - }; - - eprintln!( - "testing {} version(s): {}", - versions.len(), - versions.join(", ") - ); - - let results = validate_all(&source, &versions)?; - - let mut total_passed = 0; - let mut total_failed = 0; - let mut total_skipped = 0; - - for r in &results { - total_passed += r.passed; - total_failed += r.failed.len(); - total_skipped += r.skipped; - if r.failed.is_empty() { - eprintln!( - " v{}: {} passed, {} skipped", - r.version, r.passed, r.skipped - ); - } else { - eprintln!( - " v{}: {} passed, {} FAILED, {} skipped", - r.version, - r.passed, - r.failed.len(), - r.skipped - ); - for (name, err) in &r.failed { - eprintln!(" FAIL {name}: {err}"); - } - } - } - - eprintln!("\nresult: {total_passed} passed, {total_failed} failed, {total_skipped} skipped"); - - if total_failed > 0 { - vortex_error::vortex_bail!("{total_failed} fixture(s) failed validation"); - } - - Ok(()) -}