@@ -120,6 +120,18 @@ def test_puffin_round_trip(tmp_path: Path) -> None:
120120 assert read_vectors [file_path ].to_pylist () == sorted (set (deletions ))
121121
122122
123+ def test_puffin_round_trip_with_sparse_bitmap_keys (tmp_path : Path ) -> None :
124+ # High bits 0 and 2 are present while 1 is absent; the writer must emit sorted keys
125+ # and the reader pads the missing key with an empty bitmap.
126+ positions = [3 , (2 << 32 ) + 4 ]
127+ writer , puffin_path = _new_writer (tmp_path )
128+ writer .set_blob (positions = positions , referenced_data_file = "file.parquet" )
129+ writer .finish ()
130+
131+ vectors = PuffinFile (puffin_path .read_bytes ()).to_vector ()
132+ assert vectors ["file.parquet" ].to_pylist () == positions
133+
134+
123135def test_write_and_read_puffin_file (tmp_path : Path ) -> None :
124136 writer , puffin_path = _new_writer (tmp_path )
125137 writer .set_blob (positions = [1 , 2 , 3 ], referenced_data_file = "file1.parquet" )
@@ -203,3 +215,9 @@ def test_set_blob_rejects_empty_positions(tmp_path: Path) -> None:
203215 writer , _ = _new_writer (tmp_path )
204216 with pytest .raises (ValueError , match = "Deletion vector must contain at least one position" ):
205217 writer .set_blob (positions = [], referenced_data_file = "file.parquet" )
218+
219+
220+ def test_set_blob_rejects_position_exceeding_java_key_range (tmp_path : Path ) -> None :
221+ writer , _ = _new_writer (tmp_path )
222+ with pytest .raises (ValueError , match = "Key 2147483648 is too large, max 2147483647" ):
223+ writer .set_blob (positions = [(2 ** 31 ) << 32 ], referenced_data_file = "file.parquet" )
0 commit comments