|
3 | 3 | import pathlib |
4 | 4 | import queue |
5 | 5 | import shutil |
| 6 | +import hashlib |
| 7 | +import tarfile |
6 | 8 | import tempfile |
7 | 9 | import zipfile |
8 | 10 | from collections import defaultdict |
@@ -84,6 +86,11 @@ def __init__( |
84 | 86 | self._auditlog_queue = queue.Queue() |
85 | 87 | self._auditlog_listener = AuditLogListener(self._client, |
86 | 88 | self._auditlog_queue) |
| 89 | + self._written_archives: dict[str, dict[tuple[str, ...], str]] |
| 90 | + self._written_archives = { # track created entities archives |
| 91 | + 'plugins': {}, |
| 92 | + 'blueprints': {}, # will do for both blueprints and blueprint_revisions |
| 93 | + } |
87 | 94 |
|
88 | 95 | def create(self, timeout: float | None = None): |
89 | 96 | """Dumps manager's data and some metadata into a single zip file""" |
@@ -263,6 +270,7 @@ def _write_files( |
263 | 270 | if _should_append_entity(dump_type, entity): |
264 | 271 | self._auditlog_listener.append_entity( |
265 | 272 | tenant_name, dump_type, entity) |
| 273 | + self._update_written_archives(entity_id, dump_type, output_dir) |
266 | 274 | # Dump the data as JSON files |
267 | 275 | filenum = _get_max_filenum_in_dir(output_dir) or 0 |
268 | 276 | for (source, source_id), items in data_buckets.items(): |
@@ -308,17 +316,24 @@ def _create_archive(self): |
308 | 316 | ) as zf: |
309 | 317 | base_dir = os.path.join(root_dir, os.curdir) |
310 | 318 | base_dir = os.path.normpath(base_dir) |
311 | | - for dirpath, dirnames, filenames in os.walk(base_dir): |
| 319 | + for dirpath, dirnames, filenames in os.walk(base_dir, followlinks=False): |
| 320 | + root_path = Path(dirpath) |
312 | 321 | arcdirpath = os.path.relpath(dirpath, root_dir) |
313 | 322 | for name in sorted(dirnames): |
314 | 323 | path = os.path.join(dirpath, name) |
315 | 324 | arcname = os.path.join(arcdirpath, name) |
316 | 325 | zf.write(path, arcname) |
317 | 326 | for name in filenames: |
318 | | - path = os.path.join(dirpath, name) |
319 | | - path = os.path.normpath(path) |
320 | | - if os.path.isfile(path): |
321 | | - arcname = os.path.join(arcdirpath, name) |
| 327 | + path = root_path / name |
| 328 | + arcname = path.relative_to(root_dir) |
| 329 | + if path.is_symlink(): |
| 330 | + zip_info = zipfile.ZipInfo(str(arcname)) |
| 331 | + zip_info.create_system = 3 # Unix |
| 332 | + st = os.lstat(path) |
| 333 | + zip_info.external_attr = st.st_mode << 16 |
| 334 | + link_target = os.readlink(path) |
| 335 | + zf.writestr(zip_info, link_target) |
| 336 | + elif os.path.isfile(path): |
322 | 337 | zf.write(path, arcname) |
323 | 338 |
|
324 | 339 | def _upload_archive(self): |
@@ -392,6 +407,29 @@ def _update_snapshot_status(self, status, error=None): |
392 | 407 | error=error |
393 | 408 | ) |
394 | 409 |
|
| 410 | + def _update_written_archives(self, entity_id, dump_type, output_dir): |
| 411 | + dest_dir = (output_dir / f'{dump_type}').resolve() |
| 412 | + suffix = { |
| 413 | + 'plugins': '.zip', |
| 414 | + 'blueprints': '.tar.gz', |
| 415 | + }.get(dump_type) |
| 416 | + if not suffix: |
| 417 | + return |
| 418 | + entity_archive = dest_dir / f'{entity_id}{suffix}' |
| 419 | + if dump_type == 'plugins': |
| 420 | + content_hashes = get_zip_content_hashes(entity_archive) |
| 421 | + else: |
| 422 | + content_hashes = get_tar_gz_content_hashes(entity_archive) |
| 423 | + if existing_path := self._written_archives[dump_type].get(content_hashes): |
| 424 | + entity_archive.unlink(missing_ok=False) |
| 425 | + os.symlink( |
| 426 | + os.path.relpath(existing_path, entity_archive).split("/", 1)[-1], |
| 427 | + entity_archive, |
| 428 | + ) |
| 429 | + ctx.logger.debug("Created symlink: %s to %s", entity_archive, existing_path) |
| 430 | + return |
| 431 | + self._written_archives[dump_type][content_hashes] = entity_archive |
| 432 | + |
395 | 433 |
|
396 | 434 | def _prepare_temp_dir() -> Path: |
397 | 435 | """Prepare temporary (working) directory structure""" |
@@ -516,3 +554,54 @@ def get_all(method, kwargs=None): |
516 | 554 | kwargs['_offset'] = len(data) |
517 | 555 |
|
518 | 556 | return data |
| 557 | + |
| 558 | + |
| 559 | +def get_zip_content_hashes(zip_path) -> tuple[str, ...]: |
| 560 | + hashes: set[str] = set() |
| 561 | + all_dirs: set[str] = set() |
| 562 | + not_empty_dirs: set[str] = set() |
| 563 | + filenames: set[str] = set() |
| 564 | + with zipfile.ZipFile(zip_path, 'r') as archive: |
| 565 | + for info in archive.infolist(): |
| 566 | + if not info.is_dir(): |
| 567 | + filenames.add(info.filename) |
| 568 | + parts = info.filename.split('/') |
| 569 | + for i in range(1, len(parts)): |
| 570 | + not_empty_dirs.add('/'.join(parts[:i])) |
| 571 | + with archive.open(info) as fileobj: |
| 572 | + content = fileobj.read() |
| 573 | + sha256 = hashlib.sha256(content).hexdigest() |
| 574 | + hashes.add(sha256) |
| 575 | + else: |
| 576 | + all_dirs.add(info.filename.rstrip("/")) |
| 577 | + if filenames: |
| 578 | + hashes.add(hashlib.sha256(":".join(filenames).encode("utf-8")).hexdigest()) |
| 579 | + if empty_dirs := all_dirs - not_empty_dirs: |
| 580 | + hashes.add(hashlib.sha256(":".join(empty_dirs).encode("utf-8")).hexdigest()) |
| 581 | + return tuple(hashes) |
| 582 | + |
| 583 | + |
| 584 | +def get_tar_gz_content_hashes(tar_gz_path) -> tuple[str, ...]: |
| 585 | + hashes: set[str] = set() |
| 586 | + all_dirs: set[str] = set() |
| 587 | + not_empty_dirs: set[str] = set() |
| 588 | + filenames: set[str] = set() |
| 589 | + with tarfile.open(tar_gz_path, 'r:gz') as archive: |
| 590 | + for member in archive.getmembers(): |
| 591 | + if member.isfile(): |
| 592 | + filenames.add(member.name) |
| 593 | + parts = member.name.split('/') |
| 594 | + for i in range(1, len(parts)): |
| 595 | + not_empty_dirs.add('/'.join(parts[:i])) |
| 596 | + fileobj = archive.extractfile(member) |
| 597 | + if fileobj: |
| 598 | + content = fileobj.read() |
| 599 | + sha256 = hashlib.sha256(content).hexdigest() |
| 600 | + hashes.add(sha256) |
| 601 | + else: |
| 602 | + all_dirs.add(member.name.rstrip("/")) |
| 603 | + if filenames: |
| 604 | + hashes.add(hashlib.sha256(":".join(filenames).encode("utf-8")).hexdigest()) |
| 605 | + if empty_dirs := all_dirs - not_empty_dirs: |
| 606 | + hashes.add(hashlib.sha256(":".join(empty_dirs).encode("utf-8")).hexdigest()) |
| 607 | + return tuple(hashes) |
0 commit comments