|
3 | 3 | """ |
4 | 4 |
|
5 | 5 | import zipfile |
| 6 | +import tarfile |
6 | 7 | import io |
7 | 8 | import struct |
8 | 9 |
|
| 10 | + |
9 | 11 | # PE file extensions (case-insensitive) |
10 | 12 | PE_EXTENSIONS = {'.exe', '.dll', '.sys', '.scr', '.ocx', '.com', '.drv', '.cpl', '.efi'} |
11 | 13 |
|
@@ -57,6 +59,116 @@ def is_pe_file(filename: str, file_data: bytes) -> bool: |
57 | 59 | return False |
58 | 60 |
|
59 | 61 |
|
| 62 | +def _is_metadata_file(filename: str) -> bool: |
| 63 | + """Check if a file is a metadata file that should be ignored when counting. |
| 64 | +
|
| 65 | + Args: |
| 66 | + filename: The filename/path within the archive |
| 67 | +
|
| 68 | + Returns: |
| 69 | + True if the file is a metadata file, False otherwise |
| 70 | + """ |
| 71 | + # macOS resource fork files and metadata |
| 72 | + if filename.startswith('__MACOSX/'): |
| 73 | + return True |
| 74 | + # macOS AppleDouble files (resource forks) |
| 75 | + basename = filename.rsplit('/', 1)[-1] |
| 76 | + if basename.startswith('._'): |
| 77 | + return True |
| 78 | + # macOS .DS_Store files |
| 79 | + if basename == '.DS_Store': |
| 80 | + return True |
| 81 | + return False |
| 82 | + |
| 83 | + |
| 84 | +def is_rar_file(file_data: bytes) -> bool: |
| 85 | + """Check if file is a RAR archive by magic bytes.""" |
| 86 | + # RAR magic: "Rar!" (0x52 0x61 0x72 0x21) |
| 87 | + return file_data[:4] == b'Rar!' |
| 88 | + |
| 89 | + |
| 90 | +def is_tar_file(file_data: bytes) -> bool: |
| 91 | + """Check if file is a tar archive (including .tar.gz, .tar.bz2, .tar.xz).""" |
| 92 | + # Check for gzip magic (1f 8b) - could be .tar.gz |
| 93 | + if file_data[:2] == b'\x1f\x8b': |
| 94 | + try: |
| 95 | + with tarfile.open(fileobj=io.BytesIO(file_data)) as tf: |
| 96 | + return True |
| 97 | + except Exception: |
| 98 | + return False |
| 99 | + |
| 100 | + # Check for bzip2 magic (42 5a 68) - could be .tar.bz2 |
| 101 | + if file_data[:3] == b'BZh': |
| 102 | + try: |
| 103 | + with tarfile.open(fileobj=io.BytesIO(file_data)) as tf: |
| 104 | + return True |
| 105 | + except Exception: |
| 106 | + return False |
| 107 | + |
| 108 | + # Check for xz magic (fd 37 7a 58 5a 00) - could be .tar.xz |
| 109 | + if file_data[:6] == b'\xfd7zXZ\x00': |
| 110 | + try: |
| 111 | + with tarfile.open(fileobj=io.BytesIO(file_data)) as tf: |
| 112 | + return True |
| 113 | + except Exception: |
| 114 | + return False |
| 115 | + |
| 116 | + # Check for plain tar (ustar at offset 257) |
| 117 | + if len(file_data) > 262 and file_data[257:262] == b'ustar': |
| 118 | + return True |
| 119 | + |
| 120 | + return False |
| 121 | + |
| 122 | + |
| 123 | +def is_unsupported_archive(file_data: bytes) -> bool: |
| 124 | + """Check if file is an unsupported archive format (RAR, tar, etc.). |
| 125 | +
|
| 126 | + Only ZIP files are supported for multi-file uploads. |
| 127 | + """ |
| 128 | + return is_rar_file(file_data) or is_tar_file(file_data) |
| 129 | + |
| 130 | + |
| 131 | +def get_archive_file_count(file_data: bytes) -> int | None: |
| 132 | + """Get the number of files in a ZIP archive. |
| 133 | +
|
| 134 | + Returns None if the file is not a valid ZIP archive. |
| 135 | + Excludes metadata files (macOS __MACOSX, .DS_Store, ._ files) from the count. |
| 136 | +
|
| 137 | + Args: |
| 138 | + file_data: The binary content of the archive file |
| 139 | +
|
| 140 | + Returns: |
| 141 | + Number of files in the archive, or None if not a valid ZIP archive |
| 142 | + """ |
| 143 | + try: |
| 144 | + with zipfile.ZipFile(io.BytesIO(file_data), 'r') as zf: |
| 145 | + file_count = sum( |
| 146 | + 1 for info in zf.infolist() |
| 147 | + if not info.is_dir() and not _is_metadata_file(info.filename) |
| 148 | + ) |
| 149 | + return file_count |
| 150 | + except zipfile.BadZipFile: |
| 151 | + return None |
| 152 | + except Exception: |
| 153 | + return None |
| 154 | + |
| 155 | + |
| 156 | +def is_single_file_archive(file_data: bytes) -> bool: |
| 157 | + """Check if an archive contains only a single file. |
| 158 | +
|
| 159 | + This is used to reject archives that unnecessarily wrap a single file, |
| 160 | + since users should upload single files directly without archiving them. |
| 161 | +
|
| 162 | + Args: |
| 163 | + file_data: The binary content of the archive file |
| 164 | +
|
| 165 | + Returns: |
| 166 | + True if the archive contains exactly one file, False otherwise |
| 167 | + """ |
| 168 | + file_count = get_archive_file_count(file_data) |
| 169 | + return file_count == 1 |
| 170 | + |
| 171 | + |
60 | 172 | def is_archive_password_protected(file_data: bytes) -> bool: |
61 | 173 | """Check if an archive file is password-protected. |
62 | 174 |
|
|
0 commit comments