From 9fff040d58c89fd2c63a4349e00459028045e177 Mon Sep 17 00:00:00 2001 From: Mrityunjay Raj Date: Fri, 26 Jun 2026 01:46:32 +0530 Subject: [PATCH 1/5] comments: drop N=1/N>1 pack-files scaffolding, describe the general case These comments were phrased around development milestones (N=1, N>1, pack_id == chunk_id); with multi-object packs in place that framing is stale. Reword for the general case. Comment-only, no behavior change. --- src/borg/archive.py | 10 ++++---- src/borg/archiver/debug_cmd.py | 3 ++- src/borg/cache.py | 2 +- src/borg/manifest.py | 2 +- src/borg/repository.py | 24 +++++++++---------- src/borg/testsuite/archiver/__init__.py | 4 ++-- src/borg/testsuite/archiver/check_cmd_test.py | 6 ++--- .../testsuite/archiver/compact_cmd_test.py | 2 +- src/borg/testsuite/cache_test.py | 2 +- src/borg/testsuite/hashindex_test.py | 2 +- src/borg/testsuite/repository_test.py | 18 +++++++------- 11 files changed, 38 insertions(+), 37 deletions(-) diff --git a/src/borg/archive.py b/src/borg/archive.py index 07a1932941..2677589be9 100644 --- a/src/borg/archive.py +++ b/src/borg/archive.py @@ -1842,9 +1842,9 @@ def verify_data(self): if defect_chunks: if self.repair: # We would remove the defect chunks here, but single-object delete is not implemented - # yet (Repository.delete is a no-op: dropping a chunk means dropping its whole pack, - # which at N>1 takes good chunks with it). So we recheck each chunk and only report - # the ones that keep failing; real removal will come via compact once delete works at N>1. + # yet (Repository.delete is a no-op: a pack holds multiple objects, so dropping the + # whole pack to drop one chunk would take the good ones with it). So we recheck each + # chunk and only report the ones that keep failing; real removal will come via compact. logger.warning("Found defect chunks. They can not be removed yet and are only reported.") for defect_chunk in defect_chunks: # remote repo (ssh): retry might help for strange network / NIC / RAM errors @@ -1858,10 +1858,10 @@ def verify_data(self): self.repo_objs.parse(defect_chunk, encrypted_data, decompress=True, ro_type=ROBJ_DONTCARE) except IntegrityErrorBase: # failed twice -> we would like to get rid of this defect chunk. We must not - # drop its whole pack here: at N>1 the pack holds other, good chunks too. + # drop its whole pack here: the pack holds other, good chunks too. # Repository.delete is a no-op for now (it just logs); real single-object # removal will happen via compact. - # TODO: actually remove the defect chunk once delete works at N>1. + # TODO: actually remove the defect chunk once single-object delete exists. self.repository.delete(defect_chunk) else: logger.warning("chunk %s not deleted, did not consistently fail.", bin_to_hex(defect_chunk)) diff --git a/src/borg/archiver/debug_cmd.py b/src/borg/archiver/debug_cmd.py index 3ff831b88c..f9eb4fb176 100644 --- a/src/borg/archiver/debug_cmd.py +++ b/src/borg/archiver/debug_cmd.py @@ -297,7 +297,8 @@ def do_debug_delete_obj(self, args, repository): if entry is None: print("object %s not found." % hex_id) else: - # N=1: one chunk per pack, so dropping the pack removes just this object; N>1 needs compaction. + # Drops the whole pack holding this object, so any other objects in it go too. + # A debug tool; for a surgical single-object removal, use compact. try: repository.store_delete("packs/" + bin_to_hex(entry.pack_id)) except StoreObjectNotFound: diff --git a/src/borg/cache.py b/src/borg/cache.py index 5b983f96a9..0a51c0e9e9 100644 --- a/src/borg/cache.py +++ b/src/borg/cache.py @@ -656,7 +656,7 @@ def build_chunkindex_from_repo( # Read each pack's object headers with borgstore range requests, fetching only the fixed-size # headers and skipping the (much larger) encrypted payloads. Don't call Repository.list() here: # it iterates this same index we are building, so it would recurse. The headers also give each - # object's real (chunk_id, offset, size), so this is not limited to one object per pack. + # object's real (chunk_id, offset, size), so every object in a pack is indexed individually. for info in repository.store_list("packs"): # PackReader uses the store directly, so refresh the lock here; a full rebuild can be slow. repository._lock_refresh() diff --git a/src/borg/manifest.py b/src/borg/manifest.py index 5c6df5b802..7a84a343a6 100644 --- a/src/borg/manifest.py +++ b/src/borg/manifest.py @@ -329,7 +329,7 @@ def create(self, name, id, ts, *, overwrite=False): if isinstance(ts, datetime): ts = ts.isoformat(timespec="microseconds") assert isinstance(ts, str) - # flush buffered packs first: the pointer must not reference objects still buffered at N>1. + # flush buffered packs first: the pointer must not reference objects still sitting in the pack writer. self.repository.flush() # we only create a directory entry, its name points to the archive item: self.repository.store_store(f"archives/{bin_to_hex(id)}", b"") diff --git a/src/borg/repository.py b/src/borg/repository.py index babfd8495e..1cecd51075 100644 --- a/src/borg/repository.py +++ b/src/borg/repository.py @@ -736,10 +736,10 @@ def get(self, id, read_data=True, raise_missing=True): raise self.ObjectNotFound(id, str(self._location)) return None if entry.pack_id == UNKNOWN_BYTES32: - # chunk is buffered in PackWriter, not yet flushed to a pack. at N=1 put() flushes - # immediately, so reaching here points at a flush / index-update ordering bug, not a - # genuinely missing object. this is a code bug, so we crash loudly regardless of - # raise_missing instead of pretending the object is absent. + # chunk is buffered in PackWriter, not yet flushed to a pack. Everything must be flushed + # before it can be read back, so reaching here points at a flush / index-update ordering + # bug, not a genuinely missing object. this is a code bug, so we crash loudly regardless + # of raise_missing instead of pretending the object is absent. raise self.PackLocationUnknown(id, str(self._location)) pack_id, obj_offset, obj_size = entry.pack_id, entry.obj_offset, entry.obj_size id_hex = bin_to_hex(id) @@ -753,10 +753,10 @@ def get(self, id, read_data=True, raise_missing=True): hdr_size = RepoObj.obj_header.size extra_size = 1024 - hdr_size # load a bit more, 1024b, reduces round trips load_size = hdr_size + extra_size - # keep the read inside this object: at N>1 a pack holds neighbouring objects, so - # don't pull bytes past obj_size into the next one. (an overshoot would be harmless - # -- parse_meta uses the header's length and ignores trailing bytes -- this is just - # tidy.) obj_size comes from the same index we already route with. + # keep the read inside this object: a pack holds neighbouring objects, so don't pull + # bytes past obj_size into the next one. (an overshoot would be harmless -- parse_meta + # uses the header's length and ignores trailing bytes -- this is just tidy.) obj_size + # comes from the same index we already route with. load_size = min(load_size, obj_size) obj = self.store.load(key, offset=obj_offset, size=load_size) hdr = obj[0:hdr_size] @@ -787,9 +787,9 @@ def get_many(self, ids, read_data=True, raise_missing=True): def put(self, id, data): """put a repo object - Returns a list of (chunk_id, pack_id, obj_offset, obj_size) tuples for - every chunk written to disk this call. At max_count=1 this is always - one entry. + Buffers the chunk in the pack writer. When the chunk fills the pack and + triggers a flush, returns a list of (chunk_id, pack_id, obj_offset, obj_size) + tuples, one per chunk written to disk by that flush; otherwise returns None. """ self._lock_refresh() data_size = len(data) @@ -804,7 +804,7 @@ def delete(self, id): # We can not remove one object by dropping its whole pack without losing the pack's other # objects; real removal is store_delete at the pack level (compact). For now just check the # object exists (ObjectNotFound contract), log, and do nothing. - # TODO: delete a single object once a pack can hold more than one (N>1). + # TODO: implement single-object delete (today removal only happens at the pack level, via compact). entry = self.chunks.get(id) if entry is None: raise self.ObjectNotFound(id, str(self._location)) diff --git a/src/borg/testsuite/archiver/__init__.py b/src/borg/testsuite/archiver/__init__.py index b9d3d93e77..8993fefe94 100644 --- a/src/borg/testsuite/archiver/__init__.py +++ b/src/borg/testsuite/archiver/__init__.py @@ -184,8 +184,8 @@ def delete_chunk(repository, id): """Drop the pack holding chunk `id` (test damage helper). Repository.delete is a no-op now, so tests that need a chunk to really vanish drop its whole - pack at the store level. Works at N=1 (one chunk per pack). The pack is resolved through the - chunk index, since the pack file name is the pack_id, which need not equal the chunk_id. + pack at the store level (any other chunks sharing that pack go too). The pack is resolved + through the chunk index, which maps the chunk to its pack file. """ entry = repository.chunks.get(id) if entry is not None: diff --git a/src/borg/testsuite/archiver/check_cmd_test.py b/src/borg/testsuite/archiver/check_cmd_test.py index 029d55aeac..bdf41f1dfc 100644 --- a/src/borg/testsuite/archiver/check_cmd_test.py +++ b/src/borg/testsuite/archiver/check_cmd_test.py @@ -308,7 +308,7 @@ def test_manifest_rebuild_corrupted_chunk(archivers, request): # framing rather than the authenticated payload, which made the repair flaky on Windows. corrupted_chunk = corrupt(chunk, len(chunk) // 2) repository.put(archive.id, corrupted_chunk) - repository.flush() # N>1: make the put durable before close()/the check below + repository.flush() # make the put durable before close()/the check below cmd(archiver, "check", exit_code=1) output = cmd(archiver, "check", "-v", "--repair", exit_code=0) assert "archive2" in output @@ -367,7 +367,7 @@ def test_spoofed_archive(archivers, request): ro_type=ROBJ_FILE_STREAM, # a real archive is stored with ROBJ_ARCHIVE_META ), ) - repository.flush() # N>1: make the put durable before close()/the check below + repository.flush() # make the put durable before close()/the check below cmd(archiver, "check", exit_code=1) cmd(archiver, "check", "--repair", "--debug", exit_code=0) output = cmd(archiver, "repo-list") @@ -386,7 +386,7 @@ def test_extra_chunks(archivers, request): key = b"01234567890123456789012345678901" chunk = fchunk(b"xxxx", chunk_id=key) repository.put(key, chunk) - repository.flush() # N>1: make the put durable before close()/the check below + repository.flush() # make the put durable before close()/the check below cmd(archiver, "check", "-v", exit_code=0) # check does not deal with orphans anymore diff --git a/src/borg/testsuite/archiver/compact_cmd_test.py b/src/borg/testsuite/archiver/compact_cmd_test.py index c0d3f94829..b36fbb7ceb 100644 --- a/src/borg/testsuite/archiver/compact_cmd_test.py +++ b/src/borg/testsuite/archiver/compact_cmd_test.py @@ -150,7 +150,7 @@ def test_compact_packs_respects_threshold(tmp_path): # Two multi-object packs in one repo, then pack-level compaction at a 40% threshold. The pack that # wastes 2/3 of its bytes is rewritten down to its single survivor (and its old file deleted); the # pack that wastes only 1/3 stays untouched, since copying its survivors to reclaim that little is - # not worth it. This covers the rewrite, leave-alone and keep/drop split that N=1 can't reach. + # not worth it. This covers the rewrite, leave-alone and keep/drop split unique to multi-object packs. from ...archiver.compact_cmd import ArchiveGarbageCollector location = os.fspath(tmp_path / "repo") diff --git a/src/borg/testsuite/cache_test.py b/src/borg/testsuite/cache_test.py index 152c022b93..626cc062b3 100644 --- a/src/borg/testsuite/cache_test.py +++ b/src/borg/testsuite/cache_test.py @@ -20,7 +20,7 @@ def repository(self, tmpdir): self.repository_location = os.path.join(str(tmpdir), "repository") with Repository(self.repository_location, exclusive=True, create=True) as repository: repository.put(H(1), b"1234") - repository.flush() # N>1: the lone put would stay buffered; make it durable + repository.flush() # the lone put would stay buffered; make it durable yield repository repository.flush() # flush anything a test buffered via the cache before close() diff --git a/src/borg/testsuite/hashindex_test.py b/src/borg/testsuite/hashindex_test.py index b503a7c615..af4ab64d61 100644 --- a/src/borg/testsuite/hashindex_test.py +++ b/src/borg/testsuite/hashindex_test.py @@ -45,7 +45,7 @@ def test_chunkindex_update_pack_info(): assert chunks[x2].obj_offset == UNKNOWN_INT32 pack_id = H2(3) - # Both chunks land in the same pack (N>1 scenario): batch update in one call. + # Both chunks land in the same pack: batch update in one call. chunks.update_pack_info([(x1, pack_id, 0, 50), (x2, pack_id, 50, 60)]) # Location fields updated; flags and size must be unchanged. assert chunks[x1] == ChunkIndexEntry(flags=ChunkIndex.F_USED, size=10, pack_id=pack_id, obj_offset=0, obj_size=50) diff --git a/src/borg/testsuite/repository_test.py b/src/borg/testsuite/repository_test.py index 12b0de1fbb..baf18ba54c 100644 --- a/src/borg/testsuite/repository_test.py +++ b/src/borg/testsuite/repository_test.py @@ -95,14 +95,14 @@ def test_basic_operations(repo_fixtures, request): def test_chunk_index_persisted_on_close(tmp_path): # close() must serialize the live chunk index into the repo cache, so a freshly opened # repo can resolve pack locations without any manual hand-off. This proves the round-trip - # by reading the persisted index back directly (not via a repo rescan, which at N=1 would - # reconstruct the same entries and so could mask a broken persist step). + # by reading the persisted index back directly (not via a repo rescan, which would reconstruct + # the same entries from the pack headers and so could mask a broken persist step). from ..cache import list_chunkindex_hashes, read_chunkindex_from_repo location = os.fspath(tmp_path / "repo") with Repository(location, exclusive=True, create=True) as repository: for x in range(10): - repository.put(H(x), fchunk(b"DATA")) # N=1: each put() flushes immediately + repository.put(H(x), fchunk(b"DATA")) # reopen and read the cached fragments straight from disk with Repository(location, exclusive=True) as repository: persisted = ChunkIndex() @@ -125,7 +125,7 @@ def test_read_data(repo_fixtures, request): chunk_complete = hdr + meta + data chunk_short = hdr + meta repository.put(H(0), chunk_complete) - repository.flush() # N>1: make the buffered pack durable before get() + repository.flush() # make the buffered pack durable before get() assert repository.get(H(0)) == chunk_complete assert repository.get(H(0), read_data=True) == chunk_complete assert repository.get(H(0), read_data=False) == chunk_short @@ -134,7 +134,7 @@ def test_read_data(repo_fixtures, request): def test_consistency(repo_fixtures, request): with get_repository_from_fixture(repo_fixtures, request) as repository: repository.put(H(0), fchunk(b"foo")) - repository.flush() # N>1: flush before reading the just-put chunk back + repository.flush() # flush before reading the just-put chunk back assert pdchunk(repository.get(H(0))) == b"foo" repository.put(H(0), fchunk(b"foo2")) repository.flush() @@ -260,7 +260,7 @@ def test_max_data_size(repo_fixtures, request): with get_repository_from_fixture(repo_fixtures, request) as repository: max_data = b"x" * (MAX_DATA_SIZE - RepoObj.obj_header.size) repository.put(H(0), fchunk(max_data)) - repository.flush() # N>1: make the buffered pack durable before get() + repository.flush() # make the buffered pack durable before get() assert pdchunk(repository.get(H(0))) == max_data with pytest.raises(IntegrityError): repository.put(H(1), fchunk(max_data + b"x")) @@ -434,7 +434,7 @@ def test_get_uses_chunk_index_location(tmp_path): def test_put_marks_id_in_chunk_index(tmp_path): - # At N>1, put() marks the id pending (pack_id=UNKNOWN_BYTES32); flush() then fills in the + # put() marks the id pending (pack_id=UNKNOWN_BYTES32); flush() then fills in the # real pack location for the current session. with Repository(str(tmp_path / "repo"), exclusive=True, create=True) as repository: id1 = H(1) @@ -481,8 +481,8 @@ def test_check_detects_index_corruption(tmp_path): def test_check_intact_multi_object_pack_passes(tmp_path): - # An intact pack with several objects (the N>1 case) passes: it is hashed as a whole, so the - # object count does not matter. + # An intact pack with several objects passes: it is hashed as a whole, so the object count + # does not matter. pack = fchunk(b"A", chunk_id=H(1)) + fchunk(b"BB", chunk_id=H(2)) + fchunk(b"CCC", chunk_id=H(3)) pack_name = "packs/" + bin_to_hex(sha256(pack).digest()) with Repository(str(tmp_path / "repo"), exclusive=True, create=True) as repository: From 82c589f3dad38e3e537a3bc8ae9fb511b1cdf61f Mon Sep 17 00:00:00 2001 From: Mrityunjay Raj Date: Fri, 26 Jun 2026 03:40:51 +0530 Subject: [PATCH 2/5] debug delete-obj: make single-object delete a no-op instead of dropping the whole pack Add explicit flush() in two repository tests so they no longer rely on the put count filling whole packs before close()/get(). --- src/borg/archiver/debug_cmd.py | 16 +++++----------- src/borg/testsuite/archiver/debug_cmds_test.py | 8 +++++--- src/borg/testsuite/repository_test.py | 2 ++ 3 files changed, 12 insertions(+), 14 deletions(-) diff --git a/src/borg/archiver/debug_cmd.py b/src/borg/archiver/debug_cmd.py index f9eb4fb176..15dc049a55 100644 --- a/src/borg/archiver/debug_cmd.py +++ b/src/borg/archiver/debug_cmd.py @@ -13,7 +13,7 @@ from ..helpers.argparsing import ArgumentParser from ..manifest import Manifest from ..platform import get_process_id -from ..repository import Repository, LIST_SCAN_LIMIT, StoreObjectNotFound, repo_lister +from ..repository import Repository, LIST_SCAN_LIMIT, repo_lister from ..repoobj import RepoObj from ._common import with_repository, Highlander @@ -297,16 +297,10 @@ def do_debug_delete_obj(self, args, repository): if entry is None: print("object %s not found." % hex_id) else: - # Drops the whole pack holding this object, so any other objects in it go too. - # A debug tool; for a surgical single-object removal, use compact. - try: - repository.store_delete("packs/" + bin_to_hex(entry.pack_id)) - except StoreObjectNotFound: - # index points at an already-gone pack (stale entry) - print("object %s not found." % hex_id) - else: - del repository.chunks[id] - print("object %s deleted." % hex_id) + # We can not drop a single object by removing its whole pack without losing the + # pack's other objects, so do nothing for now, same as Repository.delete(). + # TODO: implement single-object delete (today removal only happens at the pack level, via compact). + print("ignoring deletion of object %s (single-object delete not implemented yet)." % hex_id) print("Done.") def do_debug_convert_profile(self, args): diff --git a/src/borg/testsuite/archiver/debug_cmds_test.py b/src/borg/testsuite/archiver/debug_cmds_test.py index a573811c9d..18101d6e0b 100644 --- a/src/borg/testsuite/archiver/debug_cmds_test.py +++ b/src/borg/testsuite/archiver/debug_cmds_test.py @@ -68,11 +68,13 @@ def test_debug_put_get_delete_obj(archivers, request): data_read = f.read() assert data == data_read + # delete-obj is a no-op for now: a single object can't be dropped without dropping its whole pack. output = cmd(archiver, "debug", "delete-obj", id_hash) - assert "deleted" in output + assert "ignoring deletion" in output - output = cmd(archiver, "debug", "delete-obj", id_hash) - assert "not found" in output + # the object is still there: the no-op did not remove it + output = cmd(archiver, "debug", "get-obj", id_hash, "output/file") + assert id_hash in output output = cmd(archiver, "debug", "delete-obj", "invalid") assert "is invalid" in output diff --git a/src/borg/testsuite/repository_test.py b/src/borg/testsuite/repository_test.py index baf18ba54c..df11d172a1 100644 --- a/src/borg/testsuite/repository_test.py +++ b/src/borg/testsuite/repository_test.py @@ -83,6 +83,7 @@ def test_basic_operations(repo_fixtures, request): with get_repository_from_fixture(repo_fixtures, request) as repository: for x in range(100): repository.put(H(x), fchunk(b"SOMEDATA", chunk_id=H(x))) # put() updates _chunks via PackWriter + repository.flush() # don't rely on the put count filling whole packs; flush before get()/close() key50 = H(50) assert pdchunk(repository.get(key50)) == b"SOMEDATA" # no manual hand-off of the index across reopen: close() persisted it to the repo cache, @@ -103,6 +104,7 @@ def test_chunk_index_persisted_on_close(tmp_path): with Repository(location, exclusive=True, create=True) as repository: for x in range(10): repository.put(H(x), fchunk(b"DATA")) + repository.flush() # don't rely on the put count filling whole packs; flush before close() # reopen and read the cached fragments straight from disk with Repository(location, exclusive=True) as repository: persisted = ChunkIndex() From 5747a19cf8cf69962c9fc6d0475bef017611e616 Mon Sep 17 00:00:00 2001 From: Mrityunjay Raj Date: Fri, 26 Jun 2026 04:05:32 +0530 Subject: [PATCH 3/5] tests: delete_chunk drops a single object via compact_pack, not the whole pack Rewrite the damage helper to rewrite the pack without the target chunk (neighbours survive) and persist the index, then revive three check tests that were skipped because the old whole-pack drop left an inconsistent index. --- src/borg/testsuite/archiver/__init__.py | 22 ++++++++++++------- src/borg/testsuite/archiver/check_cmd_test.py | 16 -------------- 2 files changed, 14 insertions(+), 24 deletions(-) diff --git a/src/borg/testsuite/archiver/__init__.py b/src/borg/testsuite/archiver/__init__.py index 8993fefe94..e063b849c6 100644 --- a/src/borg/testsuite/archiver/__init__.py +++ b/src/borg/testsuite/archiver/__init__.py @@ -17,10 +17,10 @@ from ... import xattr, platform from ...archive import Archive from ...archiver import Archiver, PURE_PYTHON_MSGPACK_WARNING +from ...cache import delete_chunkindex_from_repo, write_chunkindex_to_repo from ...constants import * # NOQA from ...helpers import Location, umount from ...helpers import EXIT_SUCCESS -from ...helpers import bin_to_hex from ...helpers import init_ec_warnings from ...logger import flush_logging from ...manifest import Manifest @@ -181,17 +181,23 @@ def open_archive(repo_path, name): def delete_chunk(repository, id): - """Drop the pack holding chunk `id` (test damage helper). + """Remove a single chunk from the repo, leaving the rest of its pack intact (test damage helper). - Repository.delete is a no-op now, so tests that need a chunk to really vanish drop its whole - pack at the store level (any other chunks sharing that pack go too). The pack is resolved - through the chunk index, which maps the chunk to its pack file. + A pack holds several objects, so dropping the whole pack would take innocent neighbours with it. + Route through Repository.compact_pack, which rewrites the pack without the target and re-points the + survivors, then persist the index the way `borg compact` does (invalidate the cached index before + the store change, write it back after) so a following borg process, e.g. `borg check`, sees it. """ entry = repository.chunks.get(id) - if entry is not None: - repository.store_delete("packs/" + bin_to_hex(entry.pack_id)) - else: + if entry is None: raise Repository.ObjectNotFound(id, repository) + pack_id = entry.pack_id + delete_chunkindex_from_repo(repository) # invalidate the cached index before changing the store + # the index now rebuilds from the packs; keep every object in this pack except the target + keep_ids = {cid for cid, e in repository.chunks.iteritems() if e.pack_id == pack_id} + keep_ids.discard(id) + repository.compact_pack(pack_id, keep_ids=keep_ids, drop_ids={id}) + write_chunkindex_to_repo(repository, repository.chunks, incremental=False, force_write=True, delete_other=True) def open_repository(archiver): diff --git a/src/borg/testsuite/archiver/check_cmd_test.py b/src/borg/testsuite/archiver/check_cmd_test.py index bdf41f1dfc..5658c49da6 100644 --- a/src/borg/testsuite/archiver/check_cmd_test.py +++ b/src/borg/testsuite/archiver/check_cmd_test.py @@ -151,12 +151,6 @@ def test_date_matching(archivers, request): assert archive not in output -@pytest.mark.skip( - reason="TODO: a non-repair check verifies index and packs by sha256, then runs the archive checks " - "(--archives-only) against that verified index instead of rebuilding it from the packs. A real missing " - "chunk would be a corrupted pack (caught by the sha256 pack check) or a borg index bug; detecting this " - "artificial one needs the index rebuild that --repair does. Rework with the index/repair redesign, refs #8572." -) def test_missing_file_chunk(archivers, request): archiver = request.getfixturevalue(archivers) check_cmd_setup(archiver) @@ -199,11 +193,6 @@ def test_missing_file_chunk(archivers, request): assert "Missing file chunk detected" not in output -@pytest.mark.skip( - reason="TODO: a non-repair check verifies index and packs by sha256 and uses that verified index (it " - "does not rebuild it); the index still lists chunks whose pack was removed here, so reading them raises " - "ObjectNotFound instead of being reported as missing. Needs the index/repair redesign, refs #8572." -) def test_missing_archive_item_chunk(archivers, request): archiver = request.getfixturevalue(archivers) check_cmd_setup(archiver) @@ -215,11 +204,6 @@ def test_missing_archive_item_chunk(archivers, request): cmd(archiver, "check", exit_code=0) -@pytest.mark.skip( - reason="TODO: a non-repair check verifies index and packs by sha256 and uses that verified index (it " - "does not rebuild it); the index still lists chunks whose pack was removed here, so reading them raises " - "ObjectNotFound instead of being reported as missing. Needs the index/repair redesign, refs #8572." -) def test_missing_archive_metadata(archivers, request): archiver = request.getfixturevalue(archivers) check_cmd_setup(archiver) From e74d7ef9a0d5843b24207d4df6bb37a66fdac7a7 Mon Sep 17 00:00:00 2001 From: Mrityunjay Raj Date: Fri, 26 Jun 2026 09:51:57 +0530 Subject: [PATCH 4/5] repository: make Repository.delete a real single-object delete Rewrite the object's pack via compact_pack and persist a full chunk index. Wire debug delete-obj and the delete_chunk test helper to it, and drop the store.info HEAD coverage check from compact_pack. --- src/borg/archiver/debug_cmd.py | 10 ++--- src/borg/repository.py | 39 ++++++++++++++----- src/borg/testsuite/archiver/__init__.py | 20 +--------- .../testsuite/archiver/debug_cmds_test.py | 9 ++--- src/borg/testsuite/repository_test.py | 5 ++- 5 files changed, 42 insertions(+), 41 deletions(-) diff --git a/src/borg/archiver/debug_cmd.py b/src/borg/archiver/debug_cmd.py index 15dc049a55..3e0fb26548 100644 --- a/src/borg/archiver/debug_cmd.py +++ b/src/borg/archiver/debug_cmd.py @@ -293,14 +293,12 @@ def do_debug_delete_obj(self, args, repository): except ValueError: print("object id %s is invalid." % hex_id) else: - entry = repository.chunks.get(id) - if entry is None: + try: + repository.delete(id) + except Repository.ObjectNotFound: print("object %s not found." % hex_id) else: - # We can not drop a single object by removing its whole pack without losing the - # pack's other objects, so do nothing for now, same as Repository.delete(). - # TODO: implement single-object delete (today removal only happens at the pack level, via compact). - print("ignoring deletion of object %s (single-object delete not implemented yet)." % hex_id) + print("object %s deleted." % hex_id) print("Done.") def do_debug_convert_profile(self, args): diff --git a/src/borg/repository.py b/src/borg/repository.py index 1cecd51075..7303273745 100644 --- a/src/borg/repository.py +++ b/src/borg/repository.py @@ -799,22 +799,39 @@ def put(self, id, data): return self._pack_writer.add(id, data) def delete(self, id): - """delete a repo object""" + """Delete a single repo object by rewriting its pack without it. + + A pack holds several objects, so we can not just drop the whole pack: that would take the + target's innocent neighbours with it. Route through compact_pack, which copies the survivors + into a new pack and repoints them in the chunk index. Rewriting a whole pack to remove one + object is slow, but delete is only used by special-purpose callers (tests, the debug command, + check --repair), never on a hot path, so the cost does not matter. + """ self._lock_refresh() - # We can not remove one object by dropping its whole pack without losing the pack's other - # objects; real removal is store_delete at the pack level (compact). For now just check the - # object exists (ObjectNotFound contract), log, and do nothing. - # TODO: implement single-object delete (today removal only happens at the pack level, via compact). + from .cache import write_chunkindex_to_repo + entry = self.chunks.get(id) if entry is None: raise self.ObjectNotFound(id, str(self._location)) - logger.warning("ignoring deletion of %s in %s", bin_to_hex(id), bin_to_hex(entry.pack_id)) + pack_id = entry.pack_id + # keep every other object in the same pack; compact_pack rewrites the pack without the target. + # work off the live index (each entry already carries its offset/size after flush): do not + # rebuild from the packs here, that could not tell the target apart from stale duplicate copies + # of the same id left in other packs by re-puts (build keeps an arbitrary one of them). + keep_ids = {cid for cid, e in self.chunks.iteritems() if e.pack_id == pack_id} + keep_ids.discard(id) + self.compact_pack(pack_id, keep_ids=keep_ids, drop_ids={id}) + # persist a full index so a following borg process sees the deletion; close() only writes new + # entries incrementally and would not record the removal. + write_chunkindex_to_repo(self, self.chunks, incremental=False, force_write=True, delete_other=True) def compact_pack(self, pack_id, *, keep_ids: set, drop_ids: set): """Rewrite pack , keeping and dropping , then delete the old pack. - keep_ids and drop_ids are sets of chunk ids that must together cover the whole pack (asserted: - their ranges tile it with no gap or overlap, and their intersection is empty). Kept objects are + keep_ids and drop_ids are sets of chunk ids that must together be every object of the pack, as + recorded in the chunk index. The caller guarantees that completeness (both callers build the two + sets by iterating the index for this pack_id); here we only assert their ranges tile contiguously + from offset 0 with no gap or overlap, and that their intersection is empty. Kept objects are copied into a new pack via store.defrag and repointed in the chunk index; dropped objects' index entries are removed. @@ -838,7 +855,10 @@ def compact_pack(self, pack_id, *, keep_ids: set, drop_ids: set): located.append((entry.obj_offset, obj_id, entry.obj_size, keep)) located.sort() - # keep + drop must tile the whole pack; collect the objects to keep in the same pass. + # keep + drop tile the pack contiguously from offset 0; collect the objects to keep in the same + # pass. we do not cross-check against the pack's on-disk size: that needs a store.info() HEAD, + # which the stdio rest backend mishandles (it blocks reading a body the HEAD response never + # carries), and the caller already guarantees the two sets are the pack's complete object set. kept = [] # (obj_offset, obj_id, obj_size), offset-ordered covered = 0 for offset, obj_id, size, keep in located: @@ -846,7 +866,6 @@ def compact_pack(self, pack_id, *, keep_ids: set, drop_ids: set): covered += size if keep: kept.append((offset, obj_id, size)) - assert covered == self.store.info(pack_key).size, f"pack {bin_to_hex(pack_id)} not fully covered" for drop_id in drop_ids: # remove dropped objects from the index; their bytes are not copied forward del self.chunks[drop_id] diff --git a/src/borg/testsuite/archiver/__init__.py b/src/borg/testsuite/archiver/__init__.py index e063b849c6..3631edd877 100644 --- a/src/borg/testsuite/archiver/__init__.py +++ b/src/borg/testsuite/archiver/__init__.py @@ -17,7 +17,6 @@ from ... import xattr, platform from ...archive import Archive from ...archiver import Archiver, PURE_PYTHON_MSGPACK_WARNING -from ...cache import delete_chunkindex_from_repo, write_chunkindex_to_repo from ...constants import * # NOQA from ...helpers import Location, umount from ...helpers import EXIT_SUCCESS @@ -181,23 +180,8 @@ def open_archive(repo_path, name): def delete_chunk(repository, id): - """Remove a single chunk from the repo, leaving the rest of its pack intact (test damage helper). - - A pack holds several objects, so dropping the whole pack would take innocent neighbours with it. - Route through Repository.compact_pack, which rewrites the pack without the target and re-points the - survivors, then persist the index the way `borg compact` does (invalidate the cached index before - the store change, write it back after) so a following borg process, e.g. `borg check`, sees it. - """ - entry = repository.chunks.get(id) - if entry is None: - raise Repository.ObjectNotFound(id, repository) - pack_id = entry.pack_id - delete_chunkindex_from_repo(repository) # invalidate the cached index before changing the store - # the index now rebuilds from the packs; keep every object in this pack except the target - keep_ids = {cid for cid, e in repository.chunks.iteritems() if e.pack_id == pack_id} - keep_ids.discard(id) - repository.compact_pack(pack_id, keep_ids=keep_ids, drop_ids={id}) - write_chunkindex_to_repo(repository, repository.chunks, incremental=False, force_write=True, delete_other=True) + """Remove a single chunk from the repo, leaving the rest of its pack intact (test damage helper).""" + repository.delete(id) def open_repository(archiver): diff --git a/src/borg/testsuite/archiver/debug_cmds_test.py b/src/borg/testsuite/archiver/debug_cmds_test.py index 18101d6e0b..22973575c2 100644 --- a/src/borg/testsuite/archiver/debug_cmds_test.py +++ b/src/borg/testsuite/archiver/debug_cmds_test.py @@ -68,13 +68,12 @@ def test_debug_put_get_delete_obj(archivers, request): data_read = f.read() assert data == data_read - # delete-obj is a no-op for now: a single object can't be dropped without dropping its whole pack. output = cmd(archiver, "debug", "delete-obj", id_hash) - assert "ignoring deletion" in output + assert "deleted" in output - # the object is still there: the no-op did not remove it - output = cmd(archiver, "debug", "get-obj", id_hash, "output/file") - assert id_hash in output + # the object is gone now: deleting it again reports it is not there + output = cmd(archiver, "debug", "delete-obj", id_hash) + assert "not found" in output output = cmd(archiver, "debug", "delete-obj", "invalid") assert "is invalid" in output diff --git a/src/borg/testsuite/repository_test.py b/src/borg/testsuite/repository_test.py index df11d172a1..71cae8e922 100644 --- a/src/borg/testsuite/repository_test.py +++ b/src/borg/testsuite/repository_test.py @@ -144,9 +144,10 @@ def test_consistency(repo_fixtures, request): repository.put(H(0), fchunk(b"bar")) repository.flush() assert pdchunk(repository.get(H(0))) == b"bar" - # delete is a no-op for now (see Repository.delete): the latest put still wins. + # delete removes the object the index points at; the stale earlier copies are not resurrected. repository.delete(H(0)) - assert pdchunk(repository.get(H(0))) == b"bar" + with pytest.raises(Repository.ObjectNotFound): + repository.get(H(0)) def test_multi_object_pack_roundtrip(repo_fixtures, request): From 106dfbadaee3f95c35390cc0686cf3bf05c36ade Mon Sep 17 00:00:00 2001 From: Mrityunjay Raj Date: Fri, 26 Jun 2026 23:24:57 +0530 Subject: [PATCH 5/5] tests: drop the delete_chunk wrapper, call Repository.delete directly Inline repository.delete at the check/extract/mount call sites. Drop the borgstore HEAD note from compact_pack now that it is fixed upstream. --- src/borg/repository.py | 5 ++--- src/borg/testsuite/archiver/__init__.py | 5 ----- src/borg/testsuite/archiver/check_cmd_test.py | 8 ++++---- src/borg/testsuite/archiver/extract_cmd_test.py | 3 +-- src/borg/testsuite/archiver/mount_cmds_test.py | 3 +-- 5 files changed, 8 insertions(+), 16 deletions(-) diff --git a/src/borg/repository.py b/src/borg/repository.py index 7303273745..591e5e3831 100644 --- a/src/borg/repository.py +++ b/src/borg/repository.py @@ -856,9 +856,8 @@ def compact_pack(self, pack_id, *, keep_ids: set, drop_ids: set): located.sort() # keep + drop tile the pack contiguously from offset 0; collect the objects to keep in the same - # pass. we do not cross-check against the pack's on-disk size: that needs a store.info() HEAD, - # which the stdio rest backend mishandles (it blocks reading a body the HEAD response never - # carries), and the caller already guarantees the two sets are the pack's complete object set. + # pass. we do not cross-check against the pack's on-disk size: the caller already guarantees the + # two sets are the pack's complete object set. kept = [] # (obj_offset, obj_id, obj_size), offset-ordered covered = 0 for offset, obj_id, size, keep in located: diff --git a/src/borg/testsuite/archiver/__init__.py b/src/borg/testsuite/archiver/__init__.py index 3631edd877..64422cb769 100644 --- a/src/borg/testsuite/archiver/__init__.py +++ b/src/borg/testsuite/archiver/__init__.py @@ -179,11 +179,6 @@ def open_archive(repo_path, name): return archive, repository -def delete_chunk(repository, id): - """Remove a single chunk from the repo, leaving the rest of its pack intact (test damage helper).""" - repository.delete(id) - - def open_repository(archiver): if archiver.get_kind() == "remote": return Repository(Location(archiver.repository_location), exclusive=True) diff --git a/src/borg/testsuite/archiver/check_cmd_test.py b/src/borg/testsuite/archiver/check_cmd_test.py index 5658c49da6..13f5d2af94 100644 --- a/src/borg/testsuite/archiver/check_cmd_test.py +++ b/src/borg/testsuite/archiver/check_cmd_test.py @@ -11,7 +11,7 @@ from ...manifest import Manifest from ...repository import Repository from ..repository_test import fchunk -from . import cmd, src_file, create_src_archive, open_archive, delete_chunk, generate_archiver_tests, RK_ENCRYPTION +from . import cmd, src_file, create_src_archive, open_archive, generate_archiver_tests, RK_ENCRYPTION pytest_generate_tests = lambda metafunc: generate_archiver_tests(metafunc, kinds="local,remote,binary") # NOQA @@ -162,7 +162,7 @@ def test_missing_file_chunk(archivers, request): if item.path.endswith(src_file): valid_chunks = item.chunks killed_chunk = valid_chunks[-1] - delete_chunk(repository, killed_chunk.id) + repository.delete(killed_chunk.id) break else: pytest.fail("should not happen") # convert 'fail' @@ -198,7 +198,7 @@ def test_missing_archive_item_chunk(archivers, request): check_cmd_setup(archiver) archive, repository = open_archive(archiver.repository_path, "archive1") with repository: - delete_chunk(repository, archive.metadata.items[0]) + repository.delete(archive.metadata.items[0]) cmd(archiver, "check", exit_code=1) cmd(archiver, "check", "--repair", exit_code=0) cmd(archiver, "check", exit_code=0) @@ -209,7 +209,7 @@ def test_missing_archive_metadata(archivers, request): check_cmd_setup(archiver) archive, repository = open_archive(archiver.repository_path, "archive1") with repository: - delete_chunk(repository, archive.id) + repository.delete(archive.id) cmd(archiver, "check", exit_code=1) cmd(archiver, "check", "--repair", exit_code=0) cmd(archiver, "check", exit_code=0) diff --git a/src/borg/testsuite/archiver/extract_cmd_test.py b/src/borg/testsuite/archiver/extract_cmd_test.py index d1b1683a11..7a19d46b5a 100644 --- a/src/borg/testsuite/archiver/extract_cmd_test.py +++ b/src/borg/testsuite/archiver/extract_cmd_test.py @@ -29,7 +29,6 @@ generate_archiver_tests, create_src_archive, open_archive, - delete_chunk, src_file, ) @@ -801,7 +800,7 @@ def test_extract_file_with_missing_chunk(archivers, request): for item in archive.iter_items(): if item.path.endswith(src_file): chunk = item.chunks[-1] - delete_chunk(repository, chunk.id) + repository.delete(chunk.id) break else: assert False # missed the file diff --git a/src/borg/testsuite/archiver/mount_cmds_test.py b/src/borg/testsuite/archiver/mount_cmds_test.py index f55b1e4bd9..c979ba4e3d 100644 --- a/src/borg/testsuite/archiver/mount_cmds_test.py +++ b/src/borg/testsuite/archiver/mount_cmds_test.py @@ -20,7 +20,6 @@ from .. import are_symlinks_supported, are_hardlinks_supported, are_fifos_supported from ..platform.platform_test import fakeroot_detected from . import RK_ENCRYPTION, cmd, assert_dirs_equal, create_regular_file, create_src_archive, open_archive, src_file -from . import delete_chunk from . import requires_hardlinks, _extract_hardlinks_setup, fuse_mount, create_test_files, generate_archiver_tests pytest_generate_tests = lambda metafunc: generate_archiver_tests(metafunc, kinds="local,remote,binary") # NOQA @@ -235,7 +234,7 @@ def test_fuse_allow_damaged_files(archivers, request): with repository: for item in archive.iter_items(): if item.path.endswith(src_file): - delete_chunk(repository, item.chunks[-1].id) + repository.delete(item.chunks[-1].id) path = item.path # store full path for later break else: