Skip to content

Commit 4fd1814

Browse files
fix[compat]: Update the action to use the correct check command (#7016)
This adds a check to CI that ensures that files generated by the last release are still readable in this PR. --------- Signed-off-by: Joe Isaacs <joe.isaacs@live.co.uk>
1 parent 13417e9 commit 4fd1814

5 files changed

Lines changed: 200 additions & 58 deletions

File tree

.github/workflows/ci.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -846,6 +846,12 @@ jobs:
846846
enable-sccache: "false"
847847
- run: cargo build --package vortex-jni
848848

849+
compat-check:
850+
name: "Compat check"
851+
uses: ./.github/workflows/compat-validation.yml
852+
with:
853+
mode: last
854+
849855
rust-publish-dry-run:
850856
name: "Rust publish dry-run"
851857
timeout-minutes: 40

.github/workflows/compat-validation.yml

Lines changed: 26 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -3,32 +3,44 @@ name: Compat Validation
33
on:
44
schedule:
55
- cron: "0 6 * * 1" # Monday 6am UTC
6+
workflow_call:
7+
inputs:
8+
mode:
9+
description: "Validation mode"
10+
required: true
11+
default: "all"
12+
type: string
613
workflow_dispatch:
714
inputs:
815
mode:
916
description: "Validation mode"
1017
required: true
11-
default: "last"
18+
default: "all"
1219
type: choice
1320
options:
14-
- last
1521
- all
16-
17-
env:
18-
FIXTURES_URL: https://vortex-compat-fixtures.s3.amazonaws.com
22+
- last
1923

2024
jobs:
2125
compat-test:
22-
runs-on: ubuntu-latest
26+
runs-on: >-
27+
${{ github.repository == 'vortex-data/vortex'
28+
&& format('runs-on={0}/runner=amd64-medium/image=ubuntu24-full-x64-pre/tag=compat-validation', github.run_id)
29+
|| 'ubuntu-latest' }}
30+
timeout-minutes: 120
2331
steps:
24-
- uses: actions/checkout@v4
25-
26-
- uses: dtolnay/rust-toolchain@stable
27-
28-
- uses: Swatinem/rust-cache@v2
29-
32+
- uses: runs-on/action@v2
33+
if: github.repository == 'vortex-data/vortex'
34+
with:
35+
sccache: s3
36+
- uses: actions/checkout@v6
37+
- uses: ./.github/actions/setup-prebuild
38+
- name: Install uv
39+
uses: spiraldb/actions/.github/actions/setup-uv@0.18.5
40+
with:
41+
sync: false
3042
- name: Run compat tests
3143
run: |
32-
MODE="${{ inputs.mode || 'last' }}"
33-
python3 vortex-test/compat-gen/scripts/compat.py check \
44+
MODE="${{ inputs.mode || 'all' }}"
45+
uv run vortex-test/compat-gen/scripts/compat.py check \
3446
--mode "$MODE"

vortex-test/compat-gen/scripts/compat.py

Lines changed: 108 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
#!/usr/bin/env python3
22
# SPDX-License-Identifier: Apache-2.0
33
# SPDX-FileCopyrightText: Copyright the Vortex contributors
4+
# /// script
5+
# dependencies = ["jsonschema"]
6+
# ///
47

58
"""
69
Vortex backward-compatibility orchestrator.
@@ -79,6 +82,7 @@
7982
8083
# Check all versions, or specific ones
8184
uv run compat.py check
85+
uv run compat.py check --mode last
8286
uv run compat.py check --versions 0.62.0,0.63.0
8387
8488
# Inspect store contents
@@ -341,7 +345,7 @@ def cmd_generate(args: argparse.Namespace) -> None:
341345
output = Path(args.output)
342346
version = _version_from_ref(args.git_ref)
343347

344-
_run_rust_generate(output)
348+
_run_rust_generate(output, profile=args.profile)
345349

346350
# Read fixtures.json (with sha256 from Rust) and write a versioned manifest.
347351
fixtures_json = json.loads((output / "fixtures.json").read_text())
@@ -387,7 +391,7 @@ def _publish_full(
387391
output = Path(tmpdir) / "fixtures"
388392

389393
_info("generating fixtures...")
390-
_run_rust_generate(output)
394+
_run_rust_generate(output, profile=args.profile)
391395

392396
fixtures_json = json.loads((output / "fixtures.json").read_text())
393397

@@ -472,7 +476,7 @@ def _publish_update(
472476
output = Path(tmpdir) / "fixtures"
473477

474478
_info("generating fixtures...")
475-
_run_rust_generate(output)
479+
_run_rust_generate(output, profile=args.profile)
476480

477481
fixtures_json = json.loads((output / "fixtures.json").read_text())
478482

@@ -541,10 +545,16 @@ def cmd_check(args: argparse.Namespace) -> None:
541545
"""Download fixtures from store and check with Rust binary."""
542546
store = _parse_store(args.store)
543547

548+
if args.versions and args.mode != "all":
549+
print("error: --versions and --mode are mutually exclusive", file=sys.stderr)
550+
sys.exit(1)
551+
544552
if args.versions:
545553
versions = [v.strip() for v in args.versions.split(",")]
546554
else:
547555
versions = store.list_versions()
556+
if args.mode == "last" and versions:
557+
versions = versions[-1:]
548558

549559
if not versions:
550560
_info("no versions found in store")
@@ -573,18 +583,15 @@ def cmd_check(args: argparse.Namespace) -> None:
573583
with tempfile.TemporaryDirectory() as tmpdir:
574584
tmppath = Path(tmpdir)
575585

576-
for entry in manifest["fixtures"]:
577-
name = entry["name"]
578-
data = store.read(f"{prefix}/{name}")
579-
if data is None:
580-
_info(f" v{version}: {name} not found at {prefix}/{name}")
581-
all_failures.append((version, name, "fixture file not found in store"))
582-
total_failed += 1
583-
continue
584-
(tmppath / name).write_bytes(data)
585-
_info(f" downloaded {name} ({len(data)} bytes)")
586-
587-
result = _run_rust_check(tmppath, mode="subset")
586+
_info(f" downloading {len(manifest['fixtures'])} fixtures...")
587+
download_failures = _parallel_download(store, manifest["fixtures"], prefix, tmppath)
588+
for name, error in download_failures:
589+
_info(f" v{version}: {name} {error}")
590+
all_failures.append((version, name, error))
591+
total_failed += 1
592+
593+
_info(f" checking v{version}...")
594+
result = _run_rust_check(tmppath, mode="subset", profile=args.profile)
588595

589596
passed = len(result.get("passed", []))
590597
failed_list = result.get("failed", [])
@@ -742,18 +749,78 @@ def _parallel_upload(store: Store, items: list[tuple[str, Path]], max_workers: i
742749
future.result()
743750

744751

745-
def _run_rust_generate(output: Path) -> None:
752+
def _parallel_download(
753+
store: Store,
754+
fixtures: list[dict],
755+
prefix: str,
756+
dest: Path,
757+
max_workers: int = 8,
758+
) -> list[tuple[str, str]]:
759+
"""Download fixture files from the store in parallel.
760+
761+
Returns a list of (name, error) for any failures.
762+
"""
763+
failures: list[tuple[str, str]] = []
764+
total_bytes = 0
765+
766+
def _download_one(entry: dict) -> tuple[str, bytes | None]:
767+
name = entry["name"]
768+
data = store.read(f"{prefix}/{name}")
769+
return name, data
770+
771+
with ThreadPoolExecutor(max_workers=max_workers) as pool:
772+
futures = {pool.submit(_download_one, entry): entry["name"] for entry in fixtures}
773+
for future in as_completed(futures):
774+
name, data = future.result()
775+
if data is None:
776+
failures.append((name, f"not found at {prefix}/{name}"))
777+
else:
778+
(dest / name).write_bytes(data)
779+
total_bytes += len(data)
780+
781+
_info(f" downloaded {len(fixtures) - len(failures)} fixtures ({total_bytes} bytes)")
782+
return failures
783+
784+
785+
def _build_compat_bin(profile: str = "release") -> str:
786+
"""Build vortex-compat and return the path to the binary.
787+
788+
If VORTEX_COMPAT_BIN is set, skips the build and returns that path.
789+
Otherwise runs `cargo build` with visible output, then locates the binary.
790+
"""
791+
bin_path = os.environ.get("VORTEX_COMPAT_BIN")
792+
if bin_path:
793+
return bin_path
794+
795+
_info(f"building vortex-compat ({profile})...")
796+
_run_cmd(["cargo", "build", "-p", CARGO_BIN, "--profile", profile], check=True)
797+
798+
# Ask cargo where the binary is.
799+
result = subprocess.run(
800+
["cargo", "metadata", "--format-version=1", "--no-deps"],
801+
capture_output=True,
802+
text=True,
803+
check=True,
804+
)
805+
target_dir = json.loads(result.stdout)["target_directory"]
806+
# Cargo puts "dev" profile binaries in "debug/", all others in "<profile>/".
807+
dir_name = "debug" if profile == "dev" else profile
808+
bin_path = str(Path(target_dir) / dir_name / CARGO_BIN)
809+
return bin_path
810+
811+
812+
def _run_rust_generate(output: Path, profile: str = "release") -> None:
746813
"""Run `vortex-compat generate --output <dir>`."""
747-
cmd = _cargo_run_cmd() + ["generate", "--output", str(output)]
748-
_run_cmd(cmd, check=True)
814+
bin_path = _build_compat_bin(profile)
815+
_run_cmd([bin_path, "generate", "--output", str(output)], check=True)
749816

750817

751-
def _run_rust_check(dir: Path, mode: str = "subset") -> dict:
818+
def _run_rust_check(dir: Path, mode: str = "subset", profile: str = "release") -> dict:
752819
"""Run `vortex-compat check --dir <dir> --mode <mode>` and parse JSON stdout."""
753-
cmd = _cargo_run_cmd() + ["check", "--dir", str(dir), "--mode", mode]
754-
result = subprocess.run(cmd, capture_output=True, text=True)
755-
if result.stderr:
756-
print(result.stderr, end="", file=sys.stderr)
820+
bin_path = _build_compat_bin(profile)
821+
cmd = [bin_path, "check", "--dir", str(dir), "--mode", mode]
822+
_info(f" $ {' '.join(cmd)}")
823+
result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=None, text=True) # noqa: UP022
757824

758825
if result.stdout.strip():
759826
return json.loads(result.stdout)
@@ -767,17 +834,12 @@ def _run_rust_check(dir: Path, mode: str = "subset") -> dict:
767834
return {"passed": [], "failed": [], "skipped": []}
768835

769836

770-
def _cargo_run_cmd() -> list[str]:
771-
"""Build the command to invoke vortex-compat (pre-built binary or cargo run)."""
772-
bin_path = os.environ.get("VORTEX_COMPAT_BIN")
773-
if bin_path:
774-
return [bin_path]
775-
return ["cargo", "run", "-p", CARGO_BIN, "--release", "--"]
776-
777-
778837
def _run_cmd(cmd: list[str], check: bool = False, cwd: Path | None = None) -> subprocess.CompletedProcess:
779838
_info(f" $ {' '.join(cmd)}")
780-
return subprocess.run(cmd, check=check, cwd=cwd)
839+
result = subprocess.run(cmd, check=False, cwd=cwd)
840+
if check and result.returncode != 0:
841+
raise subprocess.CalledProcessError(result.returncode, cmd)
842+
return result
781843

782844

783845
def _find_prev_version(versions: list[str], current: str) -> str | None:
@@ -816,6 +878,11 @@ def main() -> None:
816878
epilog=EPILOG,
817879
formatter_class=argparse.RawDescriptionHelpFormatter,
818880
)
881+
parser.add_argument(
882+
"--profile",
883+
default="release",
884+
help="Cargo build profile (default: release). Use 'dev' for faster builds.",
885+
)
819886
sub = parser.add_subparsers(dest="command", metavar="COMMAND")
820887

821888
# -- generate --
@@ -902,6 +969,7 @@ def main() -> None:
902969
epilog=(
903970
"examples:\n"
904971
" uv run compat.py check\n"
972+
" uv run compat.py check --mode last\n"
905973
" uv run compat.py check --versions 0.62.0,0.63.0\n"
906974
" uv run compat.py check --store /tmp/store"
907975
),
@@ -910,7 +978,14 @@ def main() -> None:
910978
p.add_argument("--store", default=DEFAULT_STORE, help="Store spec (default: %(default)s)")
911979
p.add_argument(
912980
"--versions",
913-
help="Comma-separated versions to check (default: all)",
981+
help="Comma-separated versions to check (mutually exclusive with --mode)",
982+
)
983+
p.add_argument(
984+
"--mode",
985+
choices=["all", "last"],
986+
default="all",
987+
help="Which versions to check: 'all' (default) or 'last' (most recent only). "
988+
"Mutually exclusive with --versions.",
914989
)
915990

916991
# -- list --

vortex-test/compat-gen/src/adapter.rs

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,13 @@ use vortex::file::OpenOptionsSessionExt;
1616
use vortex::file::WriteOptionsSessionExt;
1717
use vortex::io::session::RuntimeSessionExt;
1818
use vortex::layout::LayoutStrategy;
19+
use vortex::layout::layouts::flat::Flat;
1920
use vortex::layout::layouts::flat::writer::FlatLayoutStrategy;
2021
use vortex_array::ArrayRef;
2122
use vortex_array::ArrayVisitorExt;
2223
use vortex_array::DynArray;
24+
use vortex_array::MaskFuture;
25+
use vortex_array::expr::root;
2326
use vortex_array::expr::stats::Stat;
2427
use vortex_array::stream::ArrayStreamAdapter;
2528
use vortex_array::stream::ArrayStreamExt;
@@ -108,3 +111,38 @@ pub fn read_file(bytes: ByteBuffer) -> VortexResult<ArrayRef> {
108111
file.scan()?.into_array_stream()?.read_all().await
109112
})
110113
}
114+
115+
/// Open a `.vortex` file and fully decode every array in the layout tree, including
116+
/// auxiliary data like zone maps and dictionaries.
117+
///
118+
/// Walks the entire layout tree and for each leaf `FlatLayout`, reads the segment
119+
/// and calls `ArrayParts::decode()` to fully deserialize the array. This exercises
120+
/// every segment in the file — not just the data path that a plain `scan()` touches.
121+
/// If any segment is corrupt or any array fails to decode, this will error.
122+
pub fn read_layout_tree(bytes: ByteBuffer) -> VortexResult<()> {
123+
runtime()?.block_on(async {
124+
let session = VortexSession::default().with_tokio();
125+
let file = session.open_options().open_buffer(bytes)?;
126+
let root_layout = file.footer().layout().clone();
127+
let segment_source = file.segment_source();
128+
129+
for layout_result in root_layout.depth_first_traversal() {
130+
let layout = layout_result?;
131+
if layout.as_opt::<Flat>().is_none() {
132+
continue;
133+
}
134+
let row_count = layout.row_count();
135+
if row_count == 0 {
136+
continue;
137+
}
138+
let reader = layout.new_reader("".into(), segment_source.clone(), &session)?;
139+
let len =
140+
usize::try_from(row_count).map_err(|e| vortex_err!("row count overflow: {e}"))?;
141+
reader
142+
.projection_evaluation(&(0..row_count), &root(), MaskFuture::new_true(len))?
143+
.await?;
144+
}
145+
146+
Ok(())
147+
})
148+
}

0 commit comments

Comments
 (0)