Skip to content

Commit 82e4f3e

Browse files
EmlParser - fallback to hash-based name for unsafe attachment filenames
1 parent afb2903 commit 82e4f3e

1 file changed

Lines changed: 20 additions & 5 deletions

File tree

analyzers/EmlParser/parse.py

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,11 @@ def artifacts(self, raw):
168168
tags=["body:attachment", "autoImport:true"] + h["tag"],
169169
)
170170
)
171-
filepath = os.path.join(self.job_directory, "output", os.path.basename(h.get("filename", "")))
171+
filepath = os.path.join(
172+
self.job_directory,
173+
"output",
174+
_attachment_output_name(h.get("filename", ""), h.get("hash")),
175+
)
172176
file_key = ("file", filepath)
173177
if file_key not in seen:
174178
seen.add(file_key)
@@ -194,14 +198,23 @@ def _add_ioc(ioc_list, data, tags):
194198

195199

196200
def _safe_attachment_filename(raw_name):
201+
"""Reduce an attachment name to a safe basename, or None if unusable."""
197202
if not raw_name or "\x00" in raw_name:
198-
raise ValueError("invalid attachment filename")
199-
base = os.path.basename(raw_name)
203+
return None
204+
# normalise Windows separators first; "\" is not a separator on POSIX
205+
base = os.path.basename(raw_name.replace("\\", "/"))
200206
if not base or base in (".", "..") or os.path.isabs(base) or re.match(r"^[A-Za-z]:", base):
201-
raise ValueError("invalid attachment filename")
207+
return None
202208
return base
203209

204210

211+
def _attachment_output_name(raw_name, sha256):
212+
"""On-disk basename for an attachment, falling back to its hash."""
213+
return _safe_attachment_filename(raw_name) or "attachment_{}".format(
214+
(sha256 or "unknown")[:16]
215+
)
216+
217+
205218
def parseEml(filepath, job_directory, wkhtmltoimage, sanitized_rendering):
206219
ep = eml_parser.EmlParser(include_raw_body=True, include_attachment_data=True)
207220
with open(filepath, "rb") as f:
@@ -331,7 +344,9 @@ def parseEml(filepath, job_directory, wkhtmltoimage, sanitized_rendering):
331344
for a in decoded_email.get("attachment"):
332345
a["mime"] = magic.from_buffer(binascii.a2b_base64(a.get("raw")))
333346
if isinstance(a.get("raw"), bytes):
334-
filename = _safe_attachment_filename(a.get("filename", ""))
347+
filename = _attachment_output_name(
348+
a.get("filename", ""), (a.get("hash") or {}).get("sha256")
349+
)
335350
out_dir = os.path.join(job_directory, "output")
336351
filepath = os.path.join(out_dir, filename)
337352
if not os.path.realpath(filepath).startswith(

0 commit comments

Comments
 (0)