Skip to content

Commit fec4889

Browse files
authored
Add files via upload
1 parent f4210d0 commit fec4889

1 file changed

Lines changed: 178 additions & 1 deletion

File tree

pyfoxfile.py

Lines changed: 178 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
import binascii
3939
import platform
4040
from io import StringIO, BytesIO
41+
import posixpath as pp # POSIX-safe joins/normpaths
4142
try:
4243
from backports import tempfile
4344
except ImportError:
@@ -391,7 +392,7 @@ def decode_unicode_escape(value):
391392
__version_date__ = str(__version_date_info__[0]) + "." + str(
392393
__version_date_info__[1]).zfill(2) + "." + str(__version_date_info__[2]).zfill(2)
393394
__revision__ = __version_info__[3]
394-
__revision_id__ = "$Id: 9911cc0b37bf3b39652ce61ec29c1e2180f46e43 $"
395+
__revision_id__ = "$Id$"
395396
if(__version_info__[4] is not None):
396397
__version_date_plusrc__ = __version_date__ + \
397398
"-" + str(__version_date_info__[4])
@@ -621,6 +622,182 @@ def _normalize_initial_data(data, isbytes, encoding):
621622
return str(data)
622623

623624

625+
def _split_posix(path_text):
626+
"""Split POSIX paths regardless of OS; return list of components."""
627+
# Normalize leading './'
628+
if path_text.startswith(u'./'):
629+
path_text = path_text[2:]
630+
# Strip redundant slashes
631+
path_text = re.sub(u'/+', u'/', path_text)
632+
# Drop trailing '/' so 'dir/' -> ['dir']
633+
if path_text.endswith(u'/'):
634+
path_text = path_text[:-1]
635+
return path_text.split(u'/') if path_text else []
636+
637+
def _is_abs_like(s):
638+
"""Absolute targets (POSIX or Windows-drive style)."""
639+
return s.startswith(u'/') or s.startswith(u'\\') or re.match(u'^[A-Za-z]:[/\\\\]', s)
640+
641+
def _resolves_outside(base_rel, target_rel):
642+
"""
643+
Given a base directory (relative, POSIX) and a target (relative),
644+
return True if base/target resolves outside of base.
645+
We anchor under '/' so normpath is root-anchored and portable.
646+
"""
647+
base_clean = u'/'.join(_split_posix(base_rel))
648+
target_clean = u'/'.join(_split_posix(target_rel))
649+
base_abs = u'/' + base_clean if base_clean else u'/'
650+
combined = pp.normpath(pp.join(base_abs, target_clean))
651+
if combined == base_abs or combined.startswith(base_abs + u'/'):
652+
return False
653+
return True
654+
655+
def DetectTarbombFoxfileArray(listarchivefiles,
656+
top_file_ratio_threshold=0.6,
657+
min_members_for_ratio=4,
658+
symlink_policy="escape-only", # 'escape-only' | 'deny' | 'single-folder-only'
659+
to_text=to_text):
660+
"""
661+
Detect 'tarbomb-like' archives from ArchiveFileToArray/TarFileToArray dicts.
662+
663+
Parameters:
664+
listarchivefiles: dict with key 'ffilelist' -> list of entries (requires 'fname')
665+
top_file_ratio_threshold: float, fraction of root files considered tarbomb
666+
min_members_for_ratio: int, minimum members before ratio heuristic applies
667+
symlink_policy:
668+
- 'escape-only': only symlinks that escape parent/are absolute are unsafe
669+
- 'deny': any symlink is unsafe
670+
- 'single-folder-only': symlinks allowed only if archive has a single top-level folder
671+
to_text: normalization function (your provided to_text)
672+
673+
Returns dict with:
674+
- is_tarbomb, reasons, total_members, top_level_entries, top_level_files_count,
675+
has_absolute_paths, has_parent_traversal,
676+
symlink_escapes_root (bool), symlink_issues (list[{entry,target,reason}])
677+
"""
678+
files = listarchivefiles or {}
679+
members = files.get('ffilelist') or []
680+
681+
names = []
682+
has_abs = False
683+
has_parent = False
684+
685+
# Symlink tracking
686+
has_any_symlink = False
687+
symlink_issues = []
688+
any_symlink_escape = False
689+
690+
for m in members:
691+
m = m or {}
692+
name = to_text(m.get('fname', u""))
693+
694+
if _is_abs_like(name):
695+
has_abs = True
696+
697+
parts = _split_posix(name)
698+
if u'..' in parts:
699+
has_parent = True
700+
701+
if not parts:
702+
continue
703+
704+
norm_name = u'/'.join(parts)
705+
names.append(norm_name)
706+
707+
# ---- Symlink detection ----
708+
ftype = m.get('ftype')
709+
is_symlink = (ftype == 2) or (to_text(ftype).lower() == u'symlink' if ftype is not None else False)
710+
if is_symlink:
711+
has_any_symlink = True
712+
target = to_text(m.get('flinkname', u""))
713+
# Absolute symlink target is unsafe
714+
if _is_abs_like(target):
715+
any_symlink_escape = True
716+
symlink_issues.append({'entry': norm_name, 'target': target, 'reason': 'absolute symlink target'})
717+
else:
718+
parent = u'/'.join(parts[:-1]) # may be ''
719+
if _resolves_outside(parent, target):
720+
any_symlink_escape = True
721+
symlink_issues.append({'entry': norm_name, 'target': target, 'reason': 'symlink escapes parent directory'})
722+
723+
total = len(names)
724+
reasons = []
725+
if total == 0:
726+
return {
727+
"is_tarbomb": False,
728+
"reasons": ["archive contains no members"],
729+
"total_members": 0,
730+
"top_level_entries": [],
731+
"top_level_files_count": 0,
732+
"has_absolute_paths": has_abs,
733+
"has_parent_traversal": has_parent,
734+
"symlink_escapes_root": any_symlink_escape,
735+
"symlink_issues": symlink_issues,
736+
}
737+
738+
# Layout counts
739+
top_counts = {}
740+
top_level_files_count = 0
741+
for name in names:
742+
parts = name.split(u'/')
743+
first = parts[0]
744+
top_counts[first] = top_counts.get(first, 0) + 1
745+
if len(parts) == 1: # directly at archive root
746+
top_level_files_count += 1
747+
748+
top_keys = sorted(top_counts.keys())
749+
is_tarbomb = False
750+
751+
# Path-based dangers
752+
if has_abs:
753+
is_tarbomb = True
754+
reasons.append("contains absolute paths (dangerous)")
755+
if has_parent:
756+
is_tarbomb = True
757+
reasons.append("contains parent-traversal ('..') entries (dangerous)")
758+
if any_symlink_escape:
759+
is_tarbomb = True
760+
reasons.append("contains symlinks that escape their parent directory")
761+
762+
# Symlink policy enforcement
763+
if symlink_policy == "deny" and has_any_symlink:
764+
is_tarbomb = True
765+
reasons.append("symlinks present and policy is 'deny'")
766+
elif symlink_policy == "single-folder-only" and has_any_symlink and len(top_keys) != 1:
767+
is_tarbomb = True
768+
reasons.append("symlinks present but archive lacks a single top-level folder")
769+
770+
# Tarbomb layout heuristics
771+
if len(top_keys) == 1:
772+
reasons.append("single top-level entry '{0}'".format(top_keys[0]))
773+
else:
774+
ratio = float(top_level_files_count) / float(total)
775+
if total >= min_members_for_ratio and ratio > float(top_file_ratio_threshold):
776+
is_tarbomb = True
777+
reasons.append("high fraction of members ({0:.0%}) at archive root".format(ratio))
778+
else:
779+
max_bucket = max(top_counts.values()) if top_counts else 0
780+
if max_bucket < total * 0.9:
781+
is_tarbomb = True
782+
reasons.append("multiple top-level entries with no dominant folder: {0}".format(
783+
u", ".join(top_keys[:10])))
784+
else:
785+
reasons.append("multiple top-level entries but one dominates")
786+
787+
return {
788+
"is_tarbomb": bool(is_tarbomb),
789+
"reasons": reasons,
790+
"total_members": total,
791+
"top_level_entries": top_keys,
792+
"top_level_files_count": top_level_files_count,
793+
"has_absolute_paths": has_abs,
794+
"has_parent_traversal": has_parent,
795+
"symlink_escapes_root": any_symlink_escape,
796+
"symlink_issues": symlink_issues,
797+
}
798+
799+
800+
624801
def MkTempFile(data=None, inmem=__use_inmemfile__, isbytes=True, prefix=__project__,
625802
delete=True, encoding="utf-8"):
626803
"""

0 commit comments

Comments
 (0)