|
38 | 38 | import binascii |
39 | 39 | import platform |
40 | 40 | from io import StringIO, BytesIO |
| 41 | +import posixpath as pp # POSIX-safe joins/normpaths |
41 | 42 | try: |
42 | 43 | from backports import tempfile |
43 | 44 | except ImportError: |
@@ -391,7 +392,7 @@ def decode_unicode_escape(value): |
391 | 392 | __version_date__ = str(__version_date_info__[0]) + "." + str( |
392 | 393 | __version_date_info__[1]).zfill(2) + "." + str(__version_date_info__[2]).zfill(2) |
393 | 394 | __revision__ = __version_info__[3] |
394 | | -__revision_id__ = "$Id: 9911cc0b37bf3b39652ce61ec29c1e2180f46e43 $" |
| 395 | +__revision_id__ = "$Id$" |
395 | 396 | if(__version_info__[4] is not None): |
396 | 397 | __version_date_plusrc__ = __version_date__ + \ |
397 | 398 | "-" + str(__version_date_info__[4]) |
@@ -621,6 +622,182 @@ def _normalize_initial_data(data, isbytes, encoding): |
621 | 622 | return str(data) |
622 | 623 |
|
623 | 624 |
|
| 625 | +def _split_posix(path_text): |
| 626 | + """Split POSIX paths regardless of OS; return list of components.""" |
| 627 | + # Normalize leading './' |
| 628 | + if path_text.startswith(u'./'): |
| 629 | + path_text = path_text[2:] |
| 630 | + # Strip redundant slashes |
| 631 | + path_text = re.sub(u'/+', u'/', path_text) |
| 632 | + # Drop trailing '/' so 'dir/' -> ['dir'] |
| 633 | + if path_text.endswith(u'/'): |
| 634 | + path_text = path_text[:-1] |
| 635 | + return path_text.split(u'/') if path_text else [] |
| 636 | + |
| 637 | +def _is_abs_like(s): |
| 638 | + """Absolute targets (POSIX or Windows-drive style).""" |
| 639 | + return s.startswith(u'/') or s.startswith(u'\\') or re.match(u'^[A-Za-z]:[/\\\\]', s) |
| 640 | + |
| 641 | +def _resolves_outside(base_rel, target_rel): |
| 642 | + """ |
| 643 | + Given a base directory (relative, POSIX) and a target (relative), |
| 644 | + return True if base/target resolves outside of base. |
| 645 | + We anchor under '/' so normpath is root-anchored and portable. |
| 646 | + """ |
| 647 | + base_clean = u'/'.join(_split_posix(base_rel)) |
| 648 | + target_clean = u'/'.join(_split_posix(target_rel)) |
| 649 | + base_abs = u'/' + base_clean if base_clean else u'/' |
| 650 | + combined = pp.normpath(pp.join(base_abs, target_clean)) |
| 651 | + if combined == base_abs or combined.startswith(base_abs + u'/'): |
| 652 | + return False |
| 653 | + return True |
| 654 | + |
| 655 | +def DetectTarbombFoxfileArray(listarchivefiles, |
| 656 | + top_file_ratio_threshold=0.6, |
| 657 | + min_members_for_ratio=4, |
| 658 | + symlink_policy="escape-only", # 'escape-only' | 'deny' | 'single-folder-only' |
| 659 | + to_text=to_text): |
| 660 | + """ |
| 661 | + Detect 'tarbomb-like' archives from ArchiveFileToArray/TarFileToArray dicts. |
| 662 | +
|
| 663 | + Parameters: |
| 664 | + listarchivefiles: dict with key 'ffilelist' -> list of entries (requires 'fname') |
| 665 | + top_file_ratio_threshold: float, fraction of root files considered tarbomb |
| 666 | + min_members_for_ratio: int, minimum members before ratio heuristic applies |
| 667 | + symlink_policy: |
| 668 | + - 'escape-only': only symlinks that escape parent/are absolute are unsafe |
| 669 | + - 'deny': any symlink is unsafe |
| 670 | + - 'single-folder-only': symlinks allowed only if archive has a single top-level folder |
| 671 | + to_text: normalization function (your provided to_text) |
| 672 | +
|
| 673 | + Returns dict with: |
| 674 | + - is_tarbomb, reasons, total_members, top_level_entries, top_level_files_count, |
| 675 | + has_absolute_paths, has_parent_traversal, |
| 676 | + symlink_escapes_root (bool), symlink_issues (list[{entry,target,reason}]) |
| 677 | + """ |
| 678 | + files = listarchivefiles or {} |
| 679 | + members = files.get('ffilelist') or [] |
| 680 | + |
| 681 | + names = [] |
| 682 | + has_abs = False |
| 683 | + has_parent = False |
| 684 | + |
| 685 | + # Symlink tracking |
| 686 | + has_any_symlink = False |
| 687 | + symlink_issues = [] |
| 688 | + any_symlink_escape = False |
| 689 | + |
| 690 | + for m in members: |
| 691 | + m = m or {} |
| 692 | + name = to_text(m.get('fname', u"")) |
| 693 | + |
| 694 | + if _is_abs_like(name): |
| 695 | + has_abs = True |
| 696 | + |
| 697 | + parts = _split_posix(name) |
| 698 | + if u'..' in parts: |
| 699 | + has_parent = True |
| 700 | + |
| 701 | + if not parts: |
| 702 | + continue |
| 703 | + |
| 704 | + norm_name = u'/'.join(parts) |
| 705 | + names.append(norm_name) |
| 706 | + |
| 707 | + # ---- Symlink detection ---- |
| 708 | + ftype = m.get('ftype') |
| 709 | + is_symlink = (ftype == 2) or (to_text(ftype).lower() == u'symlink' if ftype is not None else False) |
| 710 | + if is_symlink: |
| 711 | + has_any_symlink = True |
| 712 | + target = to_text(m.get('flinkname', u"")) |
| 713 | + # Absolute symlink target is unsafe |
| 714 | + if _is_abs_like(target): |
| 715 | + any_symlink_escape = True |
| 716 | + symlink_issues.append({'entry': norm_name, 'target': target, 'reason': 'absolute symlink target'}) |
| 717 | + else: |
| 718 | + parent = u'/'.join(parts[:-1]) # may be '' |
| 719 | + if _resolves_outside(parent, target): |
| 720 | + any_symlink_escape = True |
| 721 | + symlink_issues.append({'entry': norm_name, 'target': target, 'reason': 'symlink escapes parent directory'}) |
| 722 | + |
| 723 | + total = len(names) |
| 724 | + reasons = [] |
| 725 | + if total == 0: |
| 726 | + return { |
| 727 | + "is_tarbomb": False, |
| 728 | + "reasons": ["archive contains no members"], |
| 729 | + "total_members": 0, |
| 730 | + "top_level_entries": [], |
| 731 | + "top_level_files_count": 0, |
| 732 | + "has_absolute_paths": has_abs, |
| 733 | + "has_parent_traversal": has_parent, |
| 734 | + "symlink_escapes_root": any_symlink_escape, |
| 735 | + "symlink_issues": symlink_issues, |
| 736 | + } |
| 737 | + |
| 738 | + # Layout counts |
| 739 | + top_counts = {} |
| 740 | + top_level_files_count = 0 |
| 741 | + for name in names: |
| 742 | + parts = name.split(u'/') |
| 743 | + first = parts[0] |
| 744 | + top_counts[first] = top_counts.get(first, 0) + 1 |
| 745 | + if len(parts) == 1: # directly at archive root |
| 746 | + top_level_files_count += 1 |
| 747 | + |
| 748 | + top_keys = sorted(top_counts.keys()) |
| 749 | + is_tarbomb = False |
| 750 | + |
| 751 | + # Path-based dangers |
| 752 | + if has_abs: |
| 753 | + is_tarbomb = True |
| 754 | + reasons.append("contains absolute paths (dangerous)") |
| 755 | + if has_parent: |
| 756 | + is_tarbomb = True |
| 757 | + reasons.append("contains parent-traversal ('..') entries (dangerous)") |
| 758 | + if any_symlink_escape: |
| 759 | + is_tarbomb = True |
| 760 | + reasons.append("contains symlinks that escape their parent directory") |
| 761 | + |
| 762 | + # Symlink policy enforcement |
| 763 | + if symlink_policy == "deny" and has_any_symlink: |
| 764 | + is_tarbomb = True |
| 765 | + reasons.append("symlinks present and policy is 'deny'") |
| 766 | + elif symlink_policy == "single-folder-only" and has_any_symlink and len(top_keys) != 1: |
| 767 | + is_tarbomb = True |
| 768 | + reasons.append("symlinks present but archive lacks a single top-level folder") |
| 769 | + |
| 770 | + # Tarbomb layout heuristics |
| 771 | + if len(top_keys) == 1: |
| 772 | + reasons.append("single top-level entry '{0}'".format(top_keys[0])) |
| 773 | + else: |
| 774 | + ratio = float(top_level_files_count) / float(total) |
| 775 | + if total >= min_members_for_ratio and ratio > float(top_file_ratio_threshold): |
| 776 | + is_tarbomb = True |
| 777 | + reasons.append("high fraction of members ({0:.0%}) at archive root".format(ratio)) |
| 778 | + else: |
| 779 | + max_bucket = max(top_counts.values()) if top_counts else 0 |
| 780 | + if max_bucket < total * 0.9: |
| 781 | + is_tarbomb = True |
| 782 | + reasons.append("multiple top-level entries with no dominant folder: {0}".format( |
| 783 | + u", ".join(top_keys[:10]))) |
| 784 | + else: |
| 785 | + reasons.append("multiple top-level entries but one dominates") |
| 786 | + |
| 787 | + return { |
| 788 | + "is_tarbomb": bool(is_tarbomb), |
| 789 | + "reasons": reasons, |
| 790 | + "total_members": total, |
| 791 | + "top_level_entries": top_keys, |
| 792 | + "top_level_files_count": top_level_files_count, |
| 793 | + "has_absolute_paths": has_abs, |
| 794 | + "has_parent_traversal": has_parent, |
| 795 | + "symlink_escapes_root": any_symlink_escape, |
| 796 | + "symlink_issues": symlink_issues, |
| 797 | + } |
| 798 | + |
| 799 | + |
| 800 | + |
624 | 801 | def MkTempFile(data=None, inmem=__use_inmemfile__, isbytes=True, prefix=__project__, |
625 | 802 | delete=True, encoding="utf-8"): |
626 | 803 | """ |
|
0 commit comments