Skip to content

Commit f0fb796

Browse files
committed
Tests: Add coverage for in-place patching and metadata clearing
Signed-off-by: alighazi288 <51366992+alighazi288@users.noreply.github.com>
1 parent 41f3562 commit f0fb796

2 files changed

Lines changed: 236 additions & 2 deletions

File tree

src/borg/testsuite/archive_test.py

Lines changed: 210 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import json
22
import os
3+
import stat
34
from collections import OrderedDict
45
from datetime import datetime, timezone
56
from io import StringIO
@@ -10,9 +11,9 @@
1011
from . import rejected_dotdot_paths
1112
from ..crypto.key import PlaintextKey
1213
from ..archive import Archive, CacheChunkBuffer, RobustUnpacker, valid_msgpacked_dict, ITEM_KEYS, Statistics
13-
from ..archive import BackupOSError, backup_io, backup_io_iter, get_item_uid_gid
14+
from ..archive import BackupError, BackupOSError, backup_io, backup_io_iter, get_item_uid_gid
1415
from ..helpers import msgpack
15-
from ..item import Item, ArchiveItem
16+
from ..item import Item, ArchiveItem, ChunkListEntry
1617
from ..manifest import Archives, Manifest
1718
from ..platform import uid2user, gid2group, is_win32
1819

@@ -435,3 +436,210 @@ def test_archives_get_by_id_missing_returns_none():
435436
manifest = Mock()
436437
archives = Archives(repo, manifest)
437438
assert archives.get_by_id(b"\x01" * 32) is None
439+
440+
441+
# ---- borg extract: in-place chunk comparison / selective extraction (#5638) ----
442+
443+
CHUNK_SIZE = 4
444+
445+
446+
class FetchManyPipeline:
447+
"""Minimal pipeline stand-in that records which chunk ids fetch_many() requested."""
448+
449+
def __init__(self, objects):
450+
self.objects = objects # id -> data
451+
self.fetched = []
452+
453+
def fetch_many(self, chunks, ro_type=None):
454+
assert ro_type is not None
455+
for chunk in chunks:
456+
self.fetched.append(chunk.id)
457+
yield self.objects[chunk.id]
458+
459+
460+
@pytest.fixture
461+
def extractor(tmpdir):
462+
repository = Mock()
463+
key = PlaintextKey(repository)
464+
manifest = Manifest(key, repository)
465+
archive = Archive(manifest=manifest, name="test", create=True)
466+
archive.key = key
467+
archive.cwd = str(tmpdir)
468+
return archive
469+
470+
471+
def make_item(key, objects, data):
472+
"""Chunk *data* into CHUNK_SIZE pieces, register them in *objects*, return an Item."""
473+
chunks = []
474+
for i in range(0, len(data), CHUNK_SIZE):
475+
piece = data[i : i + CHUNK_SIZE]
476+
cid = key.id_hash(piece)
477+
chunks.append(ChunkListEntry(id=cid, size=len(piece)))
478+
objects[cid] = piece
479+
item = Item(path="test", mode=stat.S_IFREG | 0o644, size=len(data))
480+
item.chunks = chunks
481+
return item
482+
483+
484+
@pytest.mark.parametrize(
485+
"name, item_data, fs_data, expected_fetched",
486+
[
487+
("no_change", b"11112222", b"11112222", 0),
488+
("first_chunk", b"11112222", b"33332222", 1),
489+
("second_chunk", b"11112222", b"11113333", 1),
490+
("both_chunks", b"11112222", b"33334444", 2),
491+
("cross_boundary", b"11112222", b"11333322", 2),
492+
("partial_last_chunk", b"1111222233", b"1111222244", 1),
493+
("fs_shorter", b"11112222", b"111122", 1),
494+
("fs_longer", b"11112222", b"1111222233", 0),
495+
("empty_item", b"", b"11112222", 0),
496+
("empty_fs", b"11112222", b"", 2),
497+
],
498+
)
499+
def test_compare_and_extract_chunks(extractor, tmpdir, name, item_data, fs_data, expected_fetched):
500+
objects = {}
501+
item = make_item(extractor.key, objects, item_data)
502+
pipeline = FetchManyPipeline(objects)
503+
extractor.pipeline = pipeline
504+
# we only exercise the data path here; attribute (re)storing is covered elsewhere.
505+
extractor.clear_attrs = Mock()
506+
extractor.restore_attrs = Mock()
507+
508+
path = str(tmpdir.join("test"))
509+
with open(path, "wb") as f:
510+
f.write(fs_data)
511+
st = os.stat(path)
512+
513+
assert extractor.compare_and_extract_chunks(item, path, st=st)
514+
assert len(pipeline.fetched) == expected_fetched
515+
with open(path, "rb") as f:
516+
assert f.read() == item_data
517+
518+
519+
def test_compare_and_extract_chunks_fetches_only_differing(extractor, tmpdir):
520+
objects = {}
521+
item = make_item(extractor.key, objects, b"11112222")
522+
pipeline = FetchManyPipeline(objects)
523+
extractor.pipeline = pipeline
524+
extractor.clear_attrs = Mock()
525+
extractor.restore_attrs = Mock()
526+
527+
path = str(tmpdir.join("test"))
528+
with open(path, "wb") as f:
529+
f.write(b"1111XXXX") # only the second chunk differs
530+
531+
extractor.compare_and_extract_chunks(item, path, st=os.stat(path))
532+
# exactly the (differing) second chunk should have been fetched, not the first.
533+
assert pipeline.fetched == [item.chunks[1].id]
534+
535+
536+
@pytest.mark.parametrize("st_is_none", [True, False])
537+
def test_compare_and_extract_chunks_skips_non_regular(extractor, tmpdir, st_is_none):
538+
objects = {}
539+
item = make_item(extractor.key, objects, b"11112222")
540+
extractor.pipeline = FetchManyPipeline(objects)
541+
if st_is_none:
542+
st = None
543+
else:
544+
st = os.stat(str(tmpdir)) # a directory, not a regular file
545+
assert extractor.compare_and_extract_chunks(item, str(tmpdir.join("test")), st=st) is False
546+
547+
548+
def test_compare_and_extract_chunks_size_inconsistency(extractor, tmpdir):
549+
# if the archived item.size does not match the size implied by its chunks, we must raise
550+
# rather than silently produce a wrong file (parity with the normal extraction path).
551+
objects = {}
552+
item = make_item(extractor.key, objects, b"11112222")
553+
item.size = 9999 # deliberately wrong (the chunks add up to 8 bytes)
554+
extractor.pipeline = FetchManyPipeline(objects)
555+
extractor.clear_attrs = Mock()
556+
extractor.restore_attrs = Mock()
557+
path = str(tmpdir.join("test"))
558+
with open(path, "wb") as f:
559+
f.write(b"1111XXXX")
560+
with pytest.raises(BackupError):
561+
extractor.compare_and_extract_chunks(item, path, st=os.stat(path))
562+
563+
564+
def test_will_patch_in_place(extractor, tmpdir):
565+
objects = {}
566+
567+
# no file at the destination yet -> normal extraction
568+
item = make_item(extractor.key, objects, b"11112222") # item.path == "test", regular file
569+
assert extractor.will_patch_in_place(item) is False
570+
571+
# an existing regular file at the destination -> patch in place
572+
with open(str(tmpdir.join("test")), "wb") as f:
573+
f.write(b"11112222")
574+
assert extractor.will_patch_in_place(item) is True
575+
576+
# a hard-linked archive item is never patched in place (even if the file exists)
577+
hl_item = make_item(extractor.key, objects, b"11112222")
578+
hl_item.hlid = b"\x00" * 32
579+
assert extractor.will_patch_in_place(hl_item) is False
580+
581+
# a non-regular archive item (e.g. a directory) is never patched in place
582+
dir_item = make_item(extractor.key, objects, b"11112222")
583+
dir_item.mode = stat.S_IFDIR | 0o755
584+
assert extractor.will_patch_in_place(dir_item) is False
585+
586+
587+
def test_compare_and_extract_chunks_skips_hardlinks(extractor, tmpdir):
588+
objects = {}
589+
item = make_item(extractor.key, objects, b"11112222")
590+
item.hlid = b"\x00" * 32 # a hard link must use the normal (preloaded) extraction path
591+
path = str(tmpdir.join("test"))
592+
with open(path, "wb") as f:
593+
f.write(b"11112222")
594+
assert extractor.compare_and_extract_chunks(item, path, st=os.stat(path)) is False
595+
596+
597+
def test_compare_and_extract_chunks_skips_hardlinked_file(extractor, tmpdir):
598+
# a destination file with other hard links (st_nlink > 1) must not be patched in place,
599+
# as that would change the content seen through those other links.
600+
# We synthesize st_nlink=2 instead of calling os.link(), because whether a hard link
601+
# actually bumps st_nlink (or is supported at all) depends on the filesystem.
602+
objects = {}
603+
item = make_item(extractor.key, objects, b"11112222")
604+
extractor.pipeline = FetchManyPipeline(objects)
605+
path = str(tmpdir.join("test"))
606+
with open(path, "wb") as f:
607+
f.write(b"11112222")
608+
fields = list(os.stat(path)) # the 10 standard stat fields
609+
fields[3] = 2 # st_nlink
610+
st = os.stat_result(fields)
611+
assert extractor.compare_and_extract_chunks(item, path, st=st) is False
612+
613+
614+
def test_compare_and_extract_chunks_skips_file_with_extended_acl(extractor, tmpdir):
615+
# a file carrying an extended ACL must not be patched in place, because clear_attrs() does
616+
# not reset ACLs; such files fall back to normal extraction (fresh inode, clean metadata).
617+
objects = {}
618+
item = make_item(extractor.key, objects, b"11112222")
619+
extractor.pipeline = FetchManyPipeline(objects)
620+
extractor._fs_has_extended_acl = Mock(return_value=True)
621+
path = str(tmpdir.join("test"))
622+
with open(path, "wb") as f:
623+
f.write(b"11112222")
624+
assert extractor.compare_and_extract_chunks(item, path, st=os.stat(path)) is False
625+
626+
627+
@pytest.mark.skipif(is_win32, reason="xattrs/clear_attrs are POSIX-only")
628+
def test_compare_and_extract_chunks_clears_stale_xattr(extractor, tmpdir):
629+
from .. import xattr as xattr_mod
630+
631+
path = str(tmpdir.join("test")).encode()
632+
with open(path, "wb") as f:
633+
f.write(b"oldcontent")
634+
if not xattr_mod.is_enabled(str(tmpdir)):
635+
pytest.skip("xattrs not supported on this filesystem")
636+
xattr_mod.set_all(path, {b"user.stale": b"1"})
637+
638+
objects = {}
639+
item = make_item(extractor.key, objects, b"11112222")
640+
extractor.pipeline = FetchManyPipeline(objects)
641+
extractor.restore_attrs = Mock() # real clear_attrs, but skip restoring archived attrs
642+
643+
assert extractor.compare_and_extract_chunks(item, path.decode(), st=os.stat(path))
644+
# the stale xattr that was not part of the archive item must be gone.
645+
assert b"user.stale" not in xattr_mod.get_all(path)

src/borg/testsuite/archiver/extract_cmd_test.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -771,6 +771,32 @@ def test_extract_continue(archivers, request):
771771
assert f.read() == CONTENTS3
772772

773773

774+
def test_extract_patches_existing_file_in_place(archivers, request):
775+
# when extracting over an existing regular file, borg updates it in place (only fetching
776+
# the chunks that differ, see #5638) instead of unlinking and recreating it.
777+
archiver = request.getfixturevalue(archivers)
778+
# use a reasonably large, chunkable content so multiple chunks exist.
779+
contents = os.urandom(4 * 1024 * 1024)
780+
cmd(archiver, "repo-create", RK_ENCRYPTION)
781+
create_regular_file(archiver.input_path, "file", contents=contents)
782+
cmd(archiver, "create", "arch", "input")
783+
784+
with changedir("output"):
785+
cmd(archiver, "extract", "arch")
786+
st_before = os.stat("input/file")
787+
# locally modify a few bytes near the start, leaving the rest identical.
788+
with open("input/file", "rb+") as f:
789+
f.seek(5)
790+
f.write(b"DIFFERENT")
791+
# extract again (no --continue): the existing file should be patched in place.
792+
cmd(archiver, "extract", "arch")
793+
st_after = os.stat("input/file")
794+
if not is_win32:
795+
assert st_before.st_ino == st_after.st_ino # same inode -> updated in place
796+
with open("input/file", "rb") as f:
797+
assert f.read() == contents # content fully restored
798+
799+
774800
def test_dry_run_extraction_flags(archivers, request):
775801
archiver = request.getfixturevalue(archivers)
776802
cmd(archiver, "repo-create", RK_ENCRYPTION)

0 commit comments

Comments
 (0)