|
1 | 1 | import json |
2 | 2 | import os |
| 3 | +import stat |
3 | 4 | from collections import OrderedDict |
4 | 5 | from datetime import datetime, timezone |
5 | 6 | from io import StringIO |
|
10 | 11 | from . import rejected_dotdot_paths |
11 | 12 | from ..crypto.key import PlaintextKey |
12 | 13 | from ..archive import Archive, CacheChunkBuffer, RobustUnpacker, valid_msgpacked_dict, ITEM_KEYS, Statistics |
13 | | -from ..archive import BackupOSError, backup_io, backup_io_iter, get_item_uid_gid |
| 14 | +from ..archive import BackupError, BackupOSError, backup_io, backup_io_iter, get_item_uid_gid |
14 | 15 | from ..helpers import msgpack |
15 | | -from ..item import Item, ArchiveItem |
| 16 | +from ..item import Item, ArchiveItem, ChunkListEntry |
16 | 17 | from ..manifest import Archives, Manifest |
17 | 18 | from ..platform import uid2user, gid2group, is_win32 |
18 | 19 |
|
@@ -435,3 +436,210 @@ def test_archives_get_by_id_missing_returns_none(): |
435 | 436 | manifest = Mock() |
436 | 437 | archives = Archives(repo, manifest) |
437 | 438 | assert archives.get_by_id(b"\x01" * 32) is None |
| 439 | + |
| 440 | + |
| 441 | +# ---- borg extract: in-place chunk comparison / selective extraction (#5638) ---- |
| 442 | + |
| 443 | +CHUNK_SIZE = 4 |
| 444 | + |
| 445 | + |
| 446 | +class FetchManyPipeline: |
| 447 | + """Minimal pipeline stand-in that records which chunk ids fetch_many() requested.""" |
| 448 | + |
| 449 | + def __init__(self, objects): |
| 450 | + self.objects = objects # id -> data |
| 451 | + self.fetched = [] |
| 452 | + |
| 453 | + def fetch_many(self, chunks, ro_type=None): |
| 454 | + assert ro_type is not None |
| 455 | + for chunk in chunks: |
| 456 | + self.fetched.append(chunk.id) |
| 457 | + yield self.objects[chunk.id] |
| 458 | + |
| 459 | + |
| 460 | +@pytest.fixture |
| 461 | +def extractor(tmpdir): |
| 462 | + repository = Mock() |
| 463 | + key = PlaintextKey(repository) |
| 464 | + manifest = Manifest(key, repository) |
| 465 | + archive = Archive(manifest=manifest, name="test", create=True) |
| 466 | + archive.key = key |
| 467 | + archive.cwd = str(tmpdir) |
| 468 | + return archive |
| 469 | + |
| 470 | + |
| 471 | +def make_item(key, objects, data): |
| 472 | + """Chunk *data* into CHUNK_SIZE pieces, register them in *objects*, return an Item.""" |
| 473 | + chunks = [] |
| 474 | + for i in range(0, len(data), CHUNK_SIZE): |
| 475 | + piece = data[i : i + CHUNK_SIZE] |
| 476 | + cid = key.id_hash(piece) |
| 477 | + chunks.append(ChunkListEntry(id=cid, size=len(piece))) |
| 478 | + objects[cid] = piece |
| 479 | + item = Item(path="test", mode=stat.S_IFREG | 0o644, size=len(data)) |
| 480 | + item.chunks = chunks |
| 481 | + return item |
| 482 | + |
| 483 | + |
| 484 | +@pytest.mark.parametrize( |
| 485 | + "name, item_data, fs_data, expected_fetched", |
| 486 | + [ |
| 487 | + ("no_change", b"11112222", b"11112222", 0), |
| 488 | + ("first_chunk", b"11112222", b"33332222", 1), |
| 489 | + ("second_chunk", b"11112222", b"11113333", 1), |
| 490 | + ("both_chunks", b"11112222", b"33334444", 2), |
| 491 | + ("cross_boundary", b"11112222", b"11333322", 2), |
| 492 | + ("partial_last_chunk", b"1111222233", b"1111222244", 1), |
| 493 | + ("fs_shorter", b"11112222", b"111122", 1), |
| 494 | + ("fs_longer", b"11112222", b"1111222233", 0), |
| 495 | + ("empty_item", b"", b"11112222", 0), |
| 496 | + ("empty_fs", b"11112222", b"", 2), |
| 497 | + ], |
| 498 | +) |
| 499 | +def test_compare_and_extract_chunks(extractor, tmpdir, name, item_data, fs_data, expected_fetched): |
| 500 | + objects = {} |
| 501 | + item = make_item(extractor.key, objects, item_data) |
| 502 | + pipeline = FetchManyPipeline(objects) |
| 503 | + extractor.pipeline = pipeline |
| 504 | + # we only exercise the data path here; attribute (re)storing is covered elsewhere. |
| 505 | + extractor.clear_attrs = Mock() |
| 506 | + extractor.restore_attrs = Mock() |
| 507 | + |
| 508 | + path = str(tmpdir.join("test")) |
| 509 | + with open(path, "wb") as f: |
| 510 | + f.write(fs_data) |
| 511 | + st = os.stat(path) |
| 512 | + |
| 513 | + assert extractor.compare_and_extract_chunks(item, path, st=st) |
| 514 | + assert len(pipeline.fetched) == expected_fetched |
| 515 | + with open(path, "rb") as f: |
| 516 | + assert f.read() == item_data |
| 517 | + |
| 518 | + |
| 519 | +def test_compare_and_extract_chunks_fetches_only_differing(extractor, tmpdir): |
| 520 | + objects = {} |
| 521 | + item = make_item(extractor.key, objects, b"11112222") |
| 522 | + pipeline = FetchManyPipeline(objects) |
| 523 | + extractor.pipeline = pipeline |
| 524 | + extractor.clear_attrs = Mock() |
| 525 | + extractor.restore_attrs = Mock() |
| 526 | + |
| 527 | + path = str(tmpdir.join("test")) |
| 528 | + with open(path, "wb") as f: |
| 529 | + f.write(b"1111XXXX") # only the second chunk differs |
| 530 | + |
| 531 | + extractor.compare_and_extract_chunks(item, path, st=os.stat(path)) |
| 532 | + # exactly the (differing) second chunk should have been fetched, not the first. |
| 533 | + assert pipeline.fetched == [item.chunks[1].id] |
| 534 | + |
| 535 | + |
| 536 | +@pytest.mark.parametrize("st_is_none", [True, False]) |
| 537 | +def test_compare_and_extract_chunks_skips_non_regular(extractor, tmpdir, st_is_none): |
| 538 | + objects = {} |
| 539 | + item = make_item(extractor.key, objects, b"11112222") |
| 540 | + extractor.pipeline = FetchManyPipeline(objects) |
| 541 | + if st_is_none: |
| 542 | + st = None |
| 543 | + else: |
| 544 | + st = os.stat(str(tmpdir)) # a directory, not a regular file |
| 545 | + assert extractor.compare_and_extract_chunks(item, str(tmpdir.join("test")), st=st) is False |
| 546 | + |
| 547 | + |
| 548 | +def test_compare_and_extract_chunks_size_inconsistency(extractor, tmpdir): |
| 549 | + # if the archived item.size does not match the size implied by its chunks, we must raise |
| 550 | + # rather than silently produce a wrong file (parity with the normal extraction path). |
| 551 | + objects = {} |
| 552 | + item = make_item(extractor.key, objects, b"11112222") |
| 553 | + item.size = 9999 # deliberately wrong (the chunks add up to 8 bytes) |
| 554 | + extractor.pipeline = FetchManyPipeline(objects) |
| 555 | + extractor.clear_attrs = Mock() |
| 556 | + extractor.restore_attrs = Mock() |
| 557 | + path = str(tmpdir.join("test")) |
| 558 | + with open(path, "wb") as f: |
| 559 | + f.write(b"1111XXXX") |
| 560 | + with pytest.raises(BackupError): |
| 561 | + extractor.compare_and_extract_chunks(item, path, st=os.stat(path)) |
| 562 | + |
| 563 | + |
| 564 | +def test_will_patch_in_place(extractor, tmpdir): |
| 565 | + objects = {} |
| 566 | + |
| 567 | + # no file at the destination yet -> normal extraction |
| 568 | + item = make_item(extractor.key, objects, b"11112222") # item.path == "test", regular file |
| 569 | + assert extractor.will_patch_in_place(item) is False |
| 570 | + |
| 571 | + # an existing regular file at the destination -> patch in place |
| 572 | + with open(str(tmpdir.join("test")), "wb") as f: |
| 573 | + f.write(b"11112222") |
| 574 | + assert extractor.will_patch_in_place(item) is True |
| 575 | + |
| 576 | + # a hard-linked archive item is never patched in place (even if the file exists) |
| 577 | + hl_item = make_item(extractor.key, objects, b"11112222") |
| 578 | + hl_item.hlid = b"\x00" * 32 |
| 579 | + assert extractor.will_patch_in_place(hl_item) is False |
| 580 | + |
| 581 | + # a non-regular archive item (e.g. a directory) is never patched in place |
| 582 | + dir_item = make_item(extractor.key, objects, b"11112222") |
| 583 | + dir_item.mode = stat.S_IFDIR | 0o755 |
| 584 | + assert extractor.will_patch_in_place(dir_item) is False |
| 585 | + |
| 586 | + |
| 587 | +def test_compare_and_extract_chunks_skips_hardlinks(extractor, tmpdir): |
| 588 | + objects = {} |
| 589 | + item = make_item(extractor.key, objects, b"11112222") |
| 590 | + item.hlid = b"\x00" * 32 # a hard link must use the normal (preloaded) extraction path |
| 591 | + path = str(tmpdir.join("test")) |
| 592 | + with open(path, "wb") as f: |
| 593 | + f.write(b"11112222") |
| 594 | + assert extractor.compare_and_extract_chunks(item, path, st=os.stat(path)) is False |
| 595 | + |
| 596 | + |
| 597 | +def test_compare_and_extract_chunks_skips_hardlinked_file(extractor, tmpdir): |
| 598 | + # a destination file with other hard links (st_nlink > 1) must not be patched in place, |
| 599 | + # as that would change the content seen through those other links. |
| 600 | + # We synthesize st_nlink=2 instead of calling os.link(), because whether a hard link |
| 601 | + # actually bumps st_nlink (or is supported at all) depends on the filesystem. |
| 602 | + objects = {} |
| 603 | + item = make_item(extractor.key, objects, b"11112222") |
| 604 | + extractor.pipeline = FetchManyPipeline(objects) |
| 605 | + path = str(tmpdir.join("test")) |
| 606 | + with open(path, "wb") as f: |
| 607 | + f.write(b"11112222") |
| 608 | + fields = list(os.stat(path)) # the 10 standard stat fields |
| 609 | + fields[3] = 2 # st_nlink |
| 610 | + st = os.stat_result(fields) |
| 611 | + assert extractor.compare_and_extract_chunks(item, path, st=st) is False |
| 612 | + |
| 613 | + |
| 614 | +def test_compare_and_extract_chunks_skips_file_with_extended_acl(extractor, tmpdir): |
| 615 | + # a file carrying an extended ACL must not be patched in place, because clear_attrs() does |
| 616 | + # not reset ACLs; such files fall back to normal extraction (fresh inode, clean metadata). |
| 617 | + objects = {} |
| 618 | + item = make_item(extractor.key, objects, b"11112222") |
| 619 | + extractor.pipeline = FetchManyPipeline(objects) |
| 620 | + extractor._fs_has_extended_acl = Mock(return_value=True) |
| 621 | + path = str(tmpdir.join("test")) |
| 622 | + with open(path, "wb") as f: |
| 623 | + f.write(b"11112222") |
| 624 | + assert extractor.compare_and_extract_chunks(item, path, st=os.stat(path)) is False |
| 625 | + |
| 626 | + |
| 627 | +@pytest.mark.skipif(is_win32, reason="xattrs/clear_attrs are POSIX-only") |
| 628 | +def test_compare_and_extract_chunks_clears_stale_xattr(extractor, tmpdir): |
| 629 | + from .. import xattr as xattr_mod |
| 630 | + |
| 631 | + path = str(tmpdir.join("test")).encode() |
| 632 | + with open(path, "wb") as f: |
| 633 | + f.write(b"oldcontent") |
| 634 | + if not xattr_mod.is_enabled(str(tmpdir)): |
| 635 | + pytest.skip("xattrs not supported on this filesystem") |
| 636 | + xattr_mod.set_all(path, {b"user.stale": b"1"}) |
| 637 | + |
| 638 | + objects = {} |
| 639 | + item = make_item(extractor.key, objects, b"11112222") |
| 640 | + extractor.pipeline = FetchManyPipeline(objects) |
| 641 | + extractor.restore_attrs = Mock() # real clear_attrs, but skip restoring archived attrs |
| 642 | + |
| 643 | + assert extractor.compare_and_extract_chunks(item, path.decode(), st=os.stat(path)) |
| 644 | + # the stale xattr that was not part of the archive item must be gone. |
| 645 | + assert b"user.stale" not in xattr_mod.get_all(path) |
0 commit comments