Skip to content

Commit 6d9ddbd

Browse files
authored
fix: don't raise an error if reading a fragment with a null reader and deleted rows (#3741)
The null reader is applied before deletions happen and so it needs to generate rows based on the physical row count and not the logical row count.
1 parent a083db9 commit 6d9ddbd

2 files changed

Lines changed: 16 additions & 2 deletions

File tree

python/python/tests/test_dataset.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1578,6 +1578,19 @@ def test_flat_vector_search_with_delete(tmp_path: Path):
15781578
)
15791579

15801580

1581+
def test_null_reader_with_deletes(tmp_path: Path):
1582+
full_schema = pa.schema(
1583+
[
1584+
pa.field("id", pa.int64()),
1585+
pa.field("other", pa.int64()),
1586+
]
1587+
)
1588+
ds = lance.write_dataset([], tmp_path, schema=full_schema, mode="create")
1589+
ds.insert(pa.table({"id": [1, 2, 3, 4, 5]}))
1590+
ds.delete("id in (1, 2)")
1591+
ds.to_table()
1592+
1593+
15811594
def test_merge_insert_conditional_upsert_example(tmp_path: Path):
15821595
table = pa.Table.from_pydict(
15831596
{

rust/lance/src/dataset/fragment.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -939,8 +939,9 @@ impl FileFragment {
939939
}
940940
}
941941

942-
// This should return immediately on modern datasets.
943-
let num_rows = self.count_rows(None).await?;
942+
// This should return immediately on modern datasets. Need to use physical_rows because
943+
// deletions will be applied later
944+
let num_rows = self.physical_rows().await?;
944945

945946
// Check if there are any fields that are not in any data files
946947
let field_ids_in_files = opened_files

0 commit comments

Comments
 (0)