Skip to content

Commit 167a362

Browse files
[python] Fix index_manifest not inherited from previous snapshot with append commit (#7662)
1 parent cda71db commit 167a362

3 files changed

Lines changed: 73 additions & 1 deletion

File tree

paimon-python/pypaimon/tests/e2e/java_py_read_write_test.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -393,6 +393,7 @@ def test_read_btree_index_table(self):
393393
self._test_read_btree_index_generic("test_btree_index_bigint", 2000, pa.int64())
394394
self._test_read_btree_index_large()
395395
self._test_read_btree_index_null()
396+
self._test_index_manifest_inherited_after_write()
396397

397398
def _test_read_btree_index_generic(self, table_name: str, k, k_type):
398399
table = self.catalog.get_table('default.' + table_name)
@@ -491,6 +492,29 @@ def _test_read_btree_index_null(self):
491492
})
492493
self.assertEqual(expected, actual)
493494

495+
def _test_index_manifest_inherited_after_write(self):
496+
table = self.catalog.get_table('default.test_btree_index_string')
497+
498+
snapshot_before = table.snapshot_manager().get_latest_snapshot()
499+
self.assertIsNotNone(snapshot_before.index_manifest,
500+
"Index manifest should exist before Python write")
501+
502+
write_builder = table.new_batch_write_builder()
503+
write = write_builder.new_write()
504+
commit = write_builder.new_commit()
505+
data = pa.table({'k': ['k4'], 'v': ['v4']})
506+
write.write_arrow(data)
507+
commit.commit(write.prepare_commit())
508+
write.close()
509+
commit.close()
510+
511+
snapshot_after = table.snapshot_manager().get_latest_snapshot()
512+
self.assertGreater(snapshot_after.id, snapshot_before.id)
513+
self.assertIsNotNone(
514+
snapshot_after.index_manifest,
515+
"index_manifest lost after Python data write - indexes become invisible"
516+
)
517+
494518
@parameterized.expand([('json',), ('csv',)])
495519
def test_read_compressed_text_append_table(self, file_format):
496520
table = self.catalog.get_table(

paimon-python/pypaimon/tests/file_store_commit_test.py

Lines changed: 44 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818

1919
import unittest
2020
from datetime import datetime
21-
from unittest.mock import Mock, patch
21+
from unittest.mock import MagicMock, Mock, patch
2222

2323
from pypaimon.manifest.schema.data_file_meta import DataFileMeta
2424
from pypaimon.manifest.schema.manifest_entry import ManifestEntry
@@ -404,6 +404,49 @@ def test_generate_partition_statistics_empty_commit_messages(
404404
# Verify results
405405
self.assertEqual(len(statistics), 0)
406406

407+
def test_append_commit_inherits_index_manifest(
408+
self, mock_manifest_list_manager, mock_manifest_file_manager, mock_snapshot_manager):
409+
file_store_commit = self._create_file_store_commit()
410+
411+
self.mock_table.identifier = 'default.test_table'
412+
self.mock_table.table_schema = Mock()
413+
self.mock_table.table_schema.id = 7
414+
self.mock_table.options.row_tracking_enabled.return_value = False
415+
416+
snapshot_commit = MagicMock()
417+
snapshot_commit.__enter__.return_value = snapshot_commit
418+
snapshot_commit.__exit__.return_value = False
419+
snapshot_commit.commit.return_value = True
420+
file_store_commit.snapshot_commit = snapshot_commit
421+
422+
file_store_commit._write_manifest_file = Mock(return_value=Mock())
423+
file_store_commit._generate_partition_statistics = Mock(return_value=[])
424+
file_store_commit.manifest_list_manager.read_all.return_value = []
425+
426+
latest_snapshot = Mock()
427+
latest_snapshot.id = 3
428+
latest_snapshot.total_record_count = 10
429+
latest_snapshot.index_manifest = "index-manifest-existing"
430+
431+
commit_entry = Mock()
432+
commit_entry.kind = 0
433+
commit_entry.file = Mock()
434+
commit_entry.file.row_count = 2
435+
436+
result = file_store_commit._try_commit_once(
437+
retry_result=None,
438+
commit_kind="APPEND",
439+
commit_entries=[commit_entry],
440+
commit_identifier=11,
441+
latest_snapshot=latest_snapshot
442+
)
443+
444+
self.assertTrue(result.is_success())
445+
self.assertEqual(
446+
"index-manifest-existing",
447+
snapshot_commit.commit.call_args[0][0].index_manifest
448+
)
449+
407450
def test_null_partition_value(
408451
self, mock_manifest_list_manager, mock_manifest_file_manager, mock_snapshot_manager):
409452
from pypaimon.data.timestamp import Timestamp

paimon-python/pypaimon/write/file_store_commit.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -381,6 +381,10 @@ def _try_commit_once(self, retry_result: Optional[RetryResult], commit_kind: str
381381
delta_record_count -= entry.file.row_count
382382

383383
total_record_count += delta_record_count
384+
index_manifest = None
385+
if latest_snapshot and commit_kind == "APPEND":
386+
index_manifest = latest_snapshot.index_manifest
387+
384388
snapshot_data = Snapshot(
385389
version=3,
386390
id=new_snapshot_id,
@@ -394,6 +398,7 @@ def _try_commit_once(self, retry_result: Optional[RetryResult], commit_kind: str
394398
commit_kind=commit_kind,
395399
time_millis=int(time.time() * 1000),
396400
next_row_id=next_row_id,
401+
index_manifest=index_manifest,
397402
)
398403
# Generate partition statistics for the commit
399404
statistics = self._generate_partition_statistics(commit_entries)

0 commit comments

Comments
 (0)