Skip to content

Commit a6d2f31

Browse files
committed
Add unit tests for sparse bitmap keys and the Java key range limit
1 parent eb81422 commit a6d2f31

1 file changed

Lines changed: 18 additions & 0 deletions

File tree

tests/table/test_puffin.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,18 @@ def test_puffin_round_trip(tmp_path: Path) -> None:
120120
assert read_vectors[file_path].to_pylist() == sorted(set(deletions))
121121

122122

123+
def test_puffin_round_trip_with_sparse_bitmap_keys(tmp_path: Path) -> None:
124+
# High bits 0 and 2 are present while 1 is absent; the writer must emit sorted keys
125+
# and the reader pads the missing key with an empty bitmap.
126+
positions = [3, (2 << 32) + 4]
127+
writer, puffin_path = _new_writer(tmp_path)
128+
writer.set_blob(positions=positions, referenced_data_file="file.parquet")
129+
writer.finish()
130+
131+
vectors = PuffinFile(puffin_path.read_bytes()).to_vector()
132+
assert vectors["file.parquet"].to_pylist() == positions
133+
134+
123135
def test_write_and_read_puffin_file(tmp_path: Path) -> None:
124136
writer, puffin_path = _new_writer(tmp_path)
125137
writer.set_blob(positions=[1, 2, 3], referenced_data_file="file1.parquet")
@@ -203,3 +215,9 @@ def test_set_blob_rejects_empty_positions(tmp_path: Path) -> None:
203215
writer, _ = _new_writer(tmp_path)
204216
with pytest.raises(ValueError, match="Deletion vector must contain at least one position"):
205217
writer.set_blob(positions=[], referenced_data_file="file.parquet")
218+
219+
220+
def test_set_blob_rejects_position_exceeding_java_key_range(tmp_path: Path) -> None:
221+
writer, _ = _new_writer(tmp_path)
222+
with pytest.raises(ValueError, match="Key 2147483648 is too large, max 2147483647"):
223+
writer.set_blob(positions=[(2**31) << 32], referenced_data_file="file.parquet")

0 commit comments

Comments
 (0)