Skip to content

Commit 4b707d9

Browse files
committed
fix
Signed-off-by: Joe Isaacs <joe.isaacs@live.co.uk>
1 parent 5415281 commit 4b707d9

12 files changed

Lines changed: 136 additions & 201 deletions

File tree

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

vortex-test/compat-gen/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ tpchgen = { workspace = true }
3838
tpchgen-arrow = { workspace = true }
3939

4040
# ClickBench parquet reading
41+
bytes = { workspace = true }
4142
parquet = { workspace = true }
4243

4344
# Async runtime

vortex-test/compat-gen/src/fixtures/arrays/datasets/clickbench.rs

Lines changed: 10 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
// SPDX-FileCopyrightText: Copyright the Vortex contributors
33

44
use arrow_array::RecordBatch;
5+
use bytes::Bytes;
56
use parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder;
67
use vortex_array::ArrayRef;
78
use vortex_array::IntoArray;
@@ -12,31 +13,27 @@ use vortex_error::vortex_err;
1213

1314
use crate::fixtures::DatasetFixture;
1415

15-
/// First partition of ClickBench hits, limited to 1000 rows.
16-
const CLICKBENCH_URL: &str =
17-
"https://pub-3ba949c0f0354ac18db1f0f14f0a2c52.r2.dev/clickbench/parquet_many/hits_0.parquet";
16+
/// 5×1000 rows sampled from deterministic random offsets in ClickBench hits partition 0.
17+
/// Offsets (seed=42): [26225, 116739, 288389, 670487, 777572].
18+
const CLICKBENCH_PARQUET: &[u8] = include_bytes!("../../../../data/clickbench_hits_5k.parquet");
1819

19-
struct ClickBenchHits1kFixture;
20+
struct ClickBenchHits5kFixture;
2021

21-
impl DatasetFixture for ClickBenchHits1kFixture {
22+
impl DatasetFixture for ClickBenchHits5kFixture {
2223
fn name(&self) -> &str {
23-
"clickbench_hits_1k"
24+
"clickbench_hits_5k"
2425
}
2526

2627
fn description(&self) -> &str {
27-
"First 1000 rows of ClickBench hits dataset with wide schema of primitives and strings"
28+
"5000 rows (5x1000 from random offsets) of ClickBench hits dataset with wide schema of primitives and strings"
2829
}
2930

3031
fn build(&self) -> VortexResult<ArrayRef> {
31-
let bytes = reqwest::blocking::get(CLICKBENCH_URL)
32-
.map_err(|e| vortex_err!("failed to download ClickBench parquet: {e}"))?
33-
.bytes()
34-
.map_err(|e| vortex_err!("failed to read ClickBench response body: {e}"))?;
32+
let bytes = Bytes::from_static(CLICKBENCH_PARQUET);
3533

3634
let reader = ParquetRecordBatchReaderBuilder::try_new(bytes)
3735
.map_err(|e| vortex_err!("failed to open parquet: {e}"))?
3836
.with_batch_size(1000)
39-
.with_limit(1000)
4037
.build()
4138
.map_err(|e| vortex_err!("failed to build parquet reader: {e}"))?;
4239

@@ -55,5 +52,5 @@ impl DatasetFixture for ClickBenchHits1kFixture {
5552
}
5653

5754
pub fn fixtures() -> Vec<Box<dyn DatasetFixture>> {
58-
vec![Box::new(ClickBenchHits1kFixture)]
55+
vec![Box::new(ClickBenchHits5kFixture)]
5956
}

vortex-test/compat-gen/src/fixtures/arrays/datasets/mod.rs

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,3 +14,42 @@ pub fn fixtures() -> Vec<Box<dyn DatasetFixture>> {
1414
fixtures.extend(clickbench::fixtures());
1515
fixtures
1616
}
17+
18+
#[cfg(test)]
19+
mod tests {
20+
use vortex::file::WriteStrategyBuilder;
21+
22+
use super::fixtures;
23+
use crate::adapter;
24+
25+
fn is_clickbench_fixture(name: &str) -> bool {
26+
name.contains("clickbench")
27+
}
28+
29+
#[test]
30+
fn roundtrip_non_clickbench_fixtures_to_bytes() {
31+
for dataset in fixtures()
32+
.into_iter()
33+
.filter(|fixture| !is_clickbench_fixture(fixture.name()))
34+
{
35+
eprintln!("--- writing {} regular to bytes ---", dataset.name());
36+
let array = dataset.build().unwrap();
37+
let regular_bytes = adapter::write_compressed_to_bytes(
38+
array.clone(),
39+
WriteStrategyBuilder::default().build(),
40+
)
41+
.unwrap();
42+
let _regular = adapter::read_file(regular_bytes).unwrap();
43+
44+
eprintln!("--- writing {} compact to bytes ---", dataset.name());
45+
let compact_bytes = adapter::write_compressed_to_bytes(
46+
array,
47+
WriteStrategyBuilder::default()
48+
.with_compact_encodings()
49+
.build(),
50+
)
51+
.unwrap();
52+
let _compact = adapter::read_file(compact_bytes).unwrap();
53+
}
54+
}
55+
}

vortex-test/compat-gen/src/fixtures/arrays/synthetic/encodings/bytebool.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ impl FlatLayoutFixture for ByteBoolFixture {
4444
let sparse_true: Vec<bool> = (0..N).map(|i| i % 127 == 0).collect();
4545
let edge_null_vals: Vec<bool> = (0..N).map(|i| i % 4 == 0).collect();
4646
let edge_null_validity = Validity::from(
47-
BoolArray::from_iter((0..N).map(|i| i >= 8 && i < N - 8)).to_bit_buffer(),
47+
BoolArray::from_iter((0..N).map(|i| (8..N - 8).contains(&i))).to_bit_buffer(),
4848
);
4949

5050
let arr = StructArray::try_new(

vortex-test/compat-gen/src/fixtures/arrays/synthetic/encodings/fsst.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ impl FlatLayoutFixture for FsstFixture {
9090
let all_null_clustered = VarBinArray::from(
9191
(0..N)
9292
.map(|i| {
93-
if i < 16 || i >= N - 16 {
93+
if !(16..N - 16).contains(&i) {
9494
None
9595
} else {
9696
Some("clustered-null-middle")

vortex-test/compat-gen/src/fixtures/arrays/synthetic/encodings/pco.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ impl FlatLayoutFixture for PcoFixture {
5252
if i % 257 == 0 {
5353
1_000_000.0 + i as f64
5454
} else {
55-
3.14159
55+
std::f64::consts::PI
5656
}
5757
})
5858
.collect();

vortex-test/compat-gen/src/fixtures/arrays/synthetic/encodings/sparse.rs

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -58,13 +58,9 @@ impl FlatLayoutFixture for SparseFixture {
5858
(0..N as i32).map(|i| if i % 75 == 0 { Some(99) } else { Some(10) }),
5959
);
6060
let all_default = ConstantArray::new(10i32, N).into_array();
61-
let clustered_edges = PrimitiveArray::from_option_iter((0..N as i64).map(|i| {
62-
if i < 8 || i >= N as i64 - 8 {
63-
Some(i * 9)
64-
} else {
65-
None
66-
}
67-
}));
61+
let clustered_edges = PrimitiveArray::from_option_iter(
62+
(0..N as i64).map(|i| (i < 8 || i >= N as i64 - 8).then(|| i * 9)),
63+
);
6864
let almost_dense = PrimitiveArray::from_option_iter(
6965
(0..N as i32).map(|i| if i % 32 == 0 { None } else { Some((i % 5) + 1) }),
7066
);

vortex-test/compat-gen/src/fixtures/arrays/synthetic/encodings/zstd.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ impl FlatLayoutFixture for ZstdFixture {
3939
if i % 257 == 0 {
4040
100_000.0 + i as f64
4141
} else {
42-
3.14159
42+
std::f64::consts::PI
4343
}
4444
})
4545
.collect();

vortex-test/compat-gen/src/fixtures/arrays/synthetic/mod.rs

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,3 +14,81 @@ pub fn fixtures() -> Vec<Box<dyn FlatLayoutFixture>> {
1414
fixtures.extend(encodings::fixtures());
1515
fixtures
1616
}
17+
18+
#[cfg(test)]
19+
mod tests {
20+
use vortex_array::IntoArray;
21+
use vortex_array::arrays::BoolArray;
22+
use vortex_array::arrays::PrimitiveArray;
23+
use vortex_array::arrays::StructArray;
24+
use vortex_array::arrays::VarBinViewArray;
25+
use vortex_array::assert_arrays_eq;
26+
use vortex_array::dtype::FieldNames;
27+
use vortex_array::validity::Validity;
28+
29+
use super::fixtures;
30+
use crate::adapter;
31+
use crate::fixtures::check_expected_encodings;
32+
33+
fn boundary_length_array(len: usize) -> vortex_error::VortexResult<vortex_array::ArrayRef> {
34+
let ints = PrimitiveArray::from_iter((0..i32::try_from(len)?).map(|i| i - 17));
35+
let nullable_ints = PrimitiveArray::from_option_iter(
36+
(0..len as i64).map(|i| if i % 5 == 0 { None } else { Some(i * 3 - 7) }),
37+
);
38+
let bools = BoolArray::from_iter((0..len).map(|i| i % 3 == 0));
39+
let strings = VarBinViewArray::from_iter_nullable_str((0..len).map(|i| match i % 5 {
40+
0 => None,
41+
1 => Some(""),
42+
2 => Some("edge"),
43+
3 => Some("boundary-length-string"),
44+
_ => Some("zz"),
45+
}));
46+
47+
Ok(StructArray::try_new(
48+
FieldNames::from(["ints", "nullable_ints", "bools", "strings"]),
49+
vec![
50+
ints.into_array(),
51+
nullable_ints.into_array(),
52+
bools.into_array(),
53+
strings.into_array(),
54+
],
55+
len,
56+
Validity::NonNullable,
57+
)?
58+
.into_array())
59+
}
60+
61+
#[test]
62+
fn roundtrip_fixtures_to_bytes() {
63+
for fixture in fixtures() {
64+
eprintln!("--- writing {} to bytes ---", fixture.name());
65+
let array = fixture.build().unwrap();
66+
check_expected_encodings(&array, fixture.as_ref()).unwrap();
67+
let bytes = adapter::write_file_to_bytes(array.clone()).unwrap();
68+
let roundtripped = adapter::read_file(bytes).unwrap();
69+
assert_arrays_eq!(array, roundtripped);
70+
eprintln!(" OK: {}", fixture.name());
71+
}
72+
}
73+
74+
#[test]
75+
fn roundtrip_boundary_lengths_to_bytes() {
76+
const BOUNDARY_LENGTHS: [usize; 15] = [
77+
0, 1, 2, 31, 32, 63, 64, 127, 128, 255, 256, 511, 512, 1023, 1025,
78+
];
79+
80+
for len in BOUNDARY_LENGTHS {
81+
eprintln!(
82+
"--- writing shared boundary fixture length {} to bytes ---",
83+
len
84+
);
85+
let boundary_array = boundary_length_array(len).unwrap();
86+
if len == 0 {
87+
assert!(adapter::write_file_to_bytes(boundary_array).is_err());
88+
continue;
89+
}
90+
let bytes = adapter::write_file_to_bytes(boundary_array).unwrap();
91+
let _array = adapter::read_file(bytes).unwrap();
92+
}
93+
}
94+
}

0 commit comments

Comments
 (0)