Skip to content

Commit 9baa257

Browse files
committed
wip
Signed-off-by: Joe Isaacs <joe.isaacs@live.co.uk>
1 parent 937d2d3 commit 9baa257

12 files changed

Lines changed: 116 additions & 252 deletions

File tree

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ dev = [
3636
"urllib3>=2.6.3",
3737
"filelock>=3.20.3",
3838
"protobuf>=6.33.5",
39+
"jsonschema>=4.0",
3940
]
4041

4142
[tool.uv]

uv.lock

Lines changed: 2 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

vortex-test/compat-gen/DESIGN.md

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -201,13 +201,13 @@ Uses the Epoch C API (`session.write_options()` /
201201
1. Detect version from nearest git tag at HEAD (or `<REF>`)
202202
2. Generate fixtures (from current tree, or from a worktree at `<REF>`)
203203
3. Fetch previous version's manifest, merge `since` values, enforce additive-only
204-
4. Upload `.vortex` files + `manifest.json` to `v{version}/`
204+
4. Upload `.vortex` files + `manifest.json` to `v{version}/arrays/`
205205
5. Update `versions.json`
206206

207207
#### `check [--versions <CSV>] [--store <SPEC>] [--exclude <CSV>]`
208208

209209
1. Read `versions.json` from store
210-
2. For each version, download `manifest.json` + all `.vortex` files
210+
2. For each version, download `arrays/manifest.json` + all `.vortex` files
211211
3. Run `vortex-compat check --dir <tmpdir> --mode subset`
212212
4. Aggregate results, exit 1 if any failures
213213

@@ -266,16 +266,19 @@ A JSON array of version strings:
266266
store/
267267
├── versions.json
268268
├── v0.62.0/
269-
│ ├── manifest.json
270-
│ ├── primitives.vortex
271-
│ └── ...
269+
│ └── arrays/
270+
│ ├── manifest.json
271+
│ ├── primitives.vortex
272+
│ └── ...
272273
└── v0.63.0/
273-
├── manifest.json
274-
└── ...
274+
└── arrays/
275+
├── manifest.json
276+
└── ...
275277
```
276278

277-
Each version gets a directory named `v{version}`. Re-publishing overwrites
278-
the existing directory.
279+
Each version gets a directory named `v{version}` with an `arrays/`
280+
subdirectory for fixture files. Re-publishing overwrites the existing
281+
directory.
279282

280283
### Per-version manifest
281284

vortex-test/compat-gen/scripts/compat.py

Lines changed: 69 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -13,13 +13,13 @@
1313
1414
Quick start:
1515
# Generate + publish for HEAD (version auto-detected from latest tag)
16-
python compat.py publish
16+
uv run compat.py publish
1717
1818
# Publish from an older tag
19-
python compat.py publish --git-ref v0.62.0
19+
uv run compat.py publish --git-ref v0.62.0
2020
2121
# Check all published versions against current code
22-
python compat.py check
22+
uv run compat.py check
2323
"""
2424

2525
from __future__ import annotations
@@ -37,6 +37,8 @@
3737
from urllib.error import HTTPError
3838
from urllib.request import urlopen
3939

40+
import jsonschema
41+
4042
DEFAULT_STORE = "s3://vortex-compat-fixtures"
4143
CARGO_BIN = "vortex-compat"
4244

@@ -60,26 +62,26 @@
6062
6163
examples:
6264
# Publish from HEAD (version from latest tag)
63-
python compat.py publish
64-
python compat.py publish --dry-run
65+
uv run compat.py publish
66+
uv run compat.py publish --dry-run
6567
6668
# Publish from an older tag via worktree
67-
python compat.py publish --git-ref v0.62.0
69+
uv run compat.py publish --git-ref v0.62.0
6870
6971
# Generate locally without publishing
70-
python compat.py generate --output /tmp/fixtures
71-
python compat.py generate --output /tmp/fixtures --git-ref v0.62.0
72+
uv run compat.py generate --output /tmp/fixtures
73+
uv run compat.py generate --output /tmp/fixtures --git-ref v0.62.0
7274
7375
# Check all versions, or specific ones
74-
python compat.py check
75-
python compat.py check --versions 0.62.0,0.63.0
76+
uv run compat.py check
77+
uv run compat.py check --versions 0.62.0,0.63.0
7678
7779
# Inspect store contents
78-
python compat.py list
79-
python compat.py list --version 0.62.0
80+
uv run compat.py list
81+
uv run compat.py list --version 0.62.0
8082
8183
# Validate additive-only manifest property
82-
python compat.py validate-manifest
84+
uv run compat.py validate-manifest
8385
"""
8486

8587

@@ -139,7 +141,7 @@ def list_versions(self) -> list[str]:
139141
versions = []
140142
for entry in self.root.iterdir():
141143
if entry.is_dir() and entry.name.startswith("v"):
142-
manifest = entry / "manifest.json"
144+
manifest = entry / "arrays" / "manifest.json"
143145
if manifest.exists():
144146
versions.append(entry.name[1:]) # strip 'v' prefix
145147
versions.sort(key=_version_sort_key)
@@ -233,11 +235,43 @@ def _version_from_ref(git_ref: str | None = None) -> str:
233235
# ---------------------------------------------------------------------------
234236

235237

238+
MANIFEST_SCHEMA = {
239+
"type": "object",
240+
"required": ["version", "generated_at", "fixtures"],
241+
"properties": {
242+
"version": {"type": "string"},
243+
"generated_at": {"type": "string"},
244+
"fixtures": {
245+
"type": "array",
246+
"items": {
247+
"type": "object",
248+
"required": ["name", "since"],
249+
"properties": {
250+
"name": {"type": "string"},
251+
"description": {"type": "string"},
252+
"since": {"type": "string"},
253+
},
254+
},
255+
},
256+
},
257+
}
258+
259+
260+
def _validate_manifest(manifest: dict, version: str) -> None:
261+
"""Validate manifest against the JSON schema."""
262+
try:
263+
jsonschema.validate(manifest, MANIFEST_SCHEMA)
264+
except jsonschema.ValidationError as e:
265+
raise ValueError(f"v{version} manifest: {e.message} (at path: {'/'.join(str(p) for p in e.absolute_path)})") from e
266+
267+
236268
def _read_manifest(store: Store, version: str) -> dict | None:
237-
data = store.read(f"v{version}/manifest.json")
269+
data = store.read(f"v{version}/arrays/manifest.json")
238270
if data is None:
239271
return None
240-
return json.loads(data)
272+
manifest = json.loads(data)
273+
_validate_manifest(manifest, version)
274+
return manifest
241275

242276

243277
def _merge_manifest(
@@ -255,10 +289,7 @@ def _merge_manifest(
255289
for f in fixtures_json["fixtures"]:
256290
name = f["name"]
257291
since = prev_since.get(name, version)
258-
entry = {"name": name, "description": f["description"], "since": since}
259-
if "expected_encodings" in f:
260-
entry["expected_encodings"] = f["expected_encodings"]
261-
entries.append(entry)
292+
entries.append({"name": name, "description": f["description"], "since": since})
262293

263294
# Additive-only enforcement.
264295
current_names = {e["name"] for e in entries}
@@ -347,10 +378,7 @@ def cmd_generate(args: argparse.Namespace) -> None:
347378
fixtures_json = json.loads((output / "fixtures.json").read_text())
348379
entries = []
349380
for f in fixtures_json["fixtures"]:
350-
entry = {"name": f["name"], "description": f["description"], "since": version}
351-
if "expected_encodings" in f:
352-
entry["expected_encodings"] = f["expected_encodings"]
353-
entries.append(entry)
381+
entries.append({"name": f["name"], "description": f["description"], "since": version})
354382
manifest = {
355383
"version": version,
356384
"generated_at": datetime.now(timezone.utc).isoformat(),
@@ -397,11 +425,11 @@ def cmd_publish(args: argparse.Namespace) -> None:
397425
for entry in manifest["fixtures"]:
398426
name = entry["name"]
399427
local = output / name
400-
key = f"v{version}/{name}"
428+
key = f"v{version}/arrays/{name}"
401429
store.write_file(key, local)
402430
_info(f" uploaded {name}")
403431

404-
store.write(f"v{version}/manifest.json", manifest_json.encode())
432+
store.write(f"v{version}/arrays/manifest.json", manifest_json.encode())
405433
_info(" uploaded manifest.json")
406434

407435
if version not in versions:
@@ -448,7 +476,7 @@ def cmd_check(args: argparse.Namespace) -> None:
448476

449477
for entry in manifest["fixtures"]:
450478
name = entry["name"]
451-
data = store.read(f"v{version}/{name}")
479+
data = store.read(f"v{version}/arrays/{name}")
452480
if data is None:
453481
_info(f" v{version}: {name} not found in store")
454482
continue
@@ -652,8 +680,8 @@ def main() -> None:
652680
),
653681
epilog=(
654682
"examples:\n"
655-
" python compat.py generate --output ./out\n"
656-
" python compat.py generate --output ./out --git-ref v0.62.0"
683+
" uv run compat.py generate --output ./out\n"
684+
" uv run compat.py generate --output ./out --git-ref v0.62.0"
657685
),
658686
formatter_class=argparse.RawDescriptionHelpFormatter,
659687
)
@@ -679,10 +707,10 @@ def main() -> None:
679707
),
680708
epilog=(
681709
"examples:\n"
682-
" python compat.py publish\n"
683-
" python compat.py publish --dry-run\n"
684-
" python compat.py publish --git-ref v0.62.0\n"
685-
" python compat.py publish --store /tmp/store"
710+
" uv run compat.py publish\n"
711+
" uv run compat.py publish --dry-run\n"
712+
" uv run compat.py publish --git-ref v0.62.0\n"
713+
" uv run compat.py publish --store /tmp/store"
686714
),
687715
formatter_class=argparse.RawDescriptionHelpFormatter,
688716
)
@@ -715,9 +743,9 @@ def main() -> None:
715743
),
716744
epilog=(
717745
"examples:\n"
718-
" python compat.py check\n"
719-
" python compat.py check --versions 0.62.0,0.63.0\n"
720-
" python compat.py check --store /tmp/store"
746+
" uv run compat.py check\n"
747+
" uv run compat.py check --versions 0.62.0,0.63.0\n"
748+
" uv run compat.py check --store /tmp/store"
721749
),
722750
formatter_class=argparse.RawDescriptionHelpFormatter,
723751
)
@@ -740,9 +768,9 @@ def main() -> None:
740768
description="Inspect the contents of a fixture store.",
741769
epilog=(
742770
"examples:\n"
743-
" python compat.py list\n"
744-
" python compat.py list --version 0.62.0\n"
745-
" python compat.py list --store /tmp/store"
771+
" uv run compat.py list\n"
772+
" uv run compat.py list --version 0.62.0\n"
773+
" uv run compat.py list --store /tmp/store"
746774
),
747775
formatter_class=argparse.RawDescriptionHelpFormatter,
748776
)
@@ -761,8 +789,8 @@ def main() -> None:
761789
),
762790
epilog=(
763791
"examples:\n"
764-
" python compat.py validate-manifest\n"
765-
" python compat.py validate-manifest --store /tmp/store"
792+
" uv run compat.py validate-manifest\n"
793+
" uv run compat.py validate-manifest --store /tmp/store"
766794
),
767795
formatter_class=argparse.RawDescriptionHelpFormatter,
768796
)

vortex-test/compat-gen/src/adapter.rs

Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,6 @@
11
// SPDX-License-Identifier: Apache-2.0
22
// SPDX-FileCopyrightText: Copyright the Vortex contributors
33

4-
// Epoch C adapter — for Vortex v0.58.0 through HEAD
5-
//
6-
// Write: session.write_options(), returns WriteSummary, takes &mut sink
7-
// Read: session.open_options().open_buffer(buf) (sync), into_array_stream() (async)
8-
94
use std::path::Path;
105
use std::sync::Arc;
116

@@ -22,10 +17,11 @@ use vortex_array::stream::ArrayStreamAdapter;
2217
use vortex_array::stream::ArrayStreamExt;
2318
use vortex_buffer::ByteBuffer;
2419
use vortex_error::VortexResult;
20+
use vortex_error::vortex_err;
2521
use vortex_session::VortexSession;
2622

2723
fn runtime() -> VortexResult<Runtime> {
28-
Runtime::new().map_err(|e| vortex_error::vortex_err!("failed to create tokio runtime: {e}"))
24+
Runtime::new().map_err(|e| vortex_err!("failed to create tokio runtime: {e}"))
2925
}
3026

3127
/// Write a sequence of array chunks as a `.vortex` file with no compression.
@@ -41,12 +37,8 @@ pub fn write_file(path: &Path, chunk: ArrayRef) -> VortexResult<()> {
4137
let session = VortexSession::default().with_tokio();
4238
let mut file = tokio::fs::File::create(path)
4339
.await
44-
.map_err(|e| vortex_error::vortex_err!("failed to create {}: {e}", path.display()))?;
45-
let _summary = session
46-
.write_options()
47-
.with_strategy(strategy)
48-
.write(&mut file, stream)
49-
.await?;
40+
.map_err(|e| vortex_err!("failed to create {}: {e}", path.display()))?;
41+
let _summary = session.write_options().write(&mut file, stream).await?;
5042
Ok(())
5143
})
5244
}

vortex-test/compat-gen/src/check.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
use std::path::Path;
55

6+
use clap::ValueEnum;
67
use serde::Serialize;
78
use vortex_array::IntoArray;
89
use vortex_array::arrays::ChunkedArray;
@@ -16,6 +17,7 @@ use crate::adapter;
1617
use crate::fixtures::all_fixtures;
1718

1819
/// How to handle mismatches between directory and known fixtures.
20+
#[derive(Clone, ValueEnum)]
1921
pub enum Mode {
2022
/// Directory must match fixtures exactly.
2123
Exact,

vortex-test/compat-gen/src/fixtures/clickbench.rs

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -5,19 +5,16 @@ use std::path::Path;
55

66
use arrow_array::RecordBatch;
77
use parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder;
8-
use vortex::layout::LayoutId;
98
use vortex_array::ArrayRef;
109
use vortex_array::IntoArray;
1110
use vortex_array::arrays::ChunkedArray;
1211
use vortex_array::arrays::Primitive;
1312
use vortex_array::arrays::Struct;
1413
use vortex_array::arrays::VarBin;
1514
use vortex_array::arrow::FromArrowArray;
16-
use vortex_array::vtable::ArrayId;
1715
use vortex_error::VortexResult;
1816
use vortex_error::vortex_err;
1917

20-
use super::ExpectedEncoding;
2118
use super::Fixture;
2219

2320
/// First partition of ClickBench hits, limited to 1000 rows.
@@ -37,16 +34,6 @@ impl ArrayFixture for ClickBenchHits1kFixture {
3734
"First 1000 rows of ClickBench hits_0 partition (wide real-world schema)"
3835
}
3936

40-
fn expected_encodings(&self) -> Vec<ExpectedEncoding> {
41-
vec![
42-
ExpectedEncoding::Array(ArrayId::new_ref("vortex.primitive")),
43-
ExpectedEncoding::Array(ArrayId::new_ref("vortex.varbin")),
44-
ExpectedEncoding::Array(ArrayId::new_ref("vortex.struct")),
45-
ExpectedEncoding::Layout(LayoutId::new_ref("vortex.flat")),
46-
ExpectedEncoding::Layout(LayoutId::new_ref("vortex.struct")),
47-
]
48-
}
49-
5037
fn setup(&self, tmp_dir: &Path) -> VortexResult<()> {
5138
let parquet_path = tmp_dir.join(PARQUET_FILENAME);
5239
if parquet_path.exists() {

0 commit comments

Comments
 (0)