Skip to content

Commit 439eab8

Browse files
authored
Merge pull request #553 from wmetcalf/feature/sigma-evtx-snapshots
Sigma: support EVTX periodic snapshots and analyzer noise filtering
2 parents 157a14a + 63da4f4 commit 439eab8

1 file changed

Lines changed: 113 additions & 23 deletions

File tree

modules/processing/sigma.py

Lines changed: 113 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -521,42 +521,58 @@ def _run_zircolite(self, zircolite_path, rulesets, timeout, input_path, extra_ar
521521
shutil.rmtree(tmpdir, ignore_errors=True)
522522

523523
def _run_evtx(self, zircolite_path, rulesets, timeout):
524-
"""Extract and scan EVTX files."""
524+
"""Extract and scan EVTX files from all evtx zips (snapshots + final)."""
525525
evtx_dir = os.path.join(self.analysis_path, "evtx")
526-
evtx_zip = os.path.join(evtx_dir, "evtx.zip")
527526

528-
if not os.path.exists(evtx_zip):
529-
log.debug("No evtx.zip found at %s", evtx_zip)
527+
if not os.path.isdir(evtx_dir):
528+
log.debug("No evtx directory found at %s", evtx_dir)
529+
return None
530+
531+
# Collect all evtx zip files (evtx.zip + evtx_snapshot_*.zip)
532+
evtx_zips = sorted(
533+
os.path.join(evtx_dir, f)
534+
for f in os.listdir(evtx_dir)
535+
if f.endswith(".zip")
536+
)
537+
if not evtx_zips:
538+
log.debug("No evtx zip files found in %s", evtx_dir)
530539
return None
531540

532541
tmpdir = None
533542
try:
534543
tmpdir = tempfile.mkdtemp(prefix="cape_sigma_evtx_")
535544
real_tmpdir = os.path.realpath(tmpdir)
536-
with zipfile.ZipFile(evtx_zip, "r") as zf:
537-
# Check total uncompressed size to prevent zip bombs
538-
max_extracted = 5 * 1024 * 1024 * 1024 # 5 GB
539-
total_uncompressed = sum(m.file_size for m in zf.infolist())
540-
if total_uncompressed > max_extracted:
541-
log.warning("evtx.zip uncompressed size too large (%d bytes), skipping", total_uncompressed)
542-
return None
543-
544-
for member in zf.infolist():
545-
# Reject symlinks based on Unix external attributes
546-
if (member.external_attr >> 16) & 0o170000 == 0o120000:
547-
log.warning("Symlink in evtx.zip rejected: %s", member.filename)
548-
return None
549-
target = os.path.realpath(os.path.join(tmpdir, member.filename))
550-
if not target.startswith(real_tmpdir + os.sep) and target != real_tmpdir:
551-
log.warning("Zip slip attempt in evtx.zip: %s", member.filename)
552-
return None
553-
zf.extract(member, tmpdir)
545+
546+
# Extract all evtx zips (snapshots are incremental, each
547+
# contains events since the last wipe)
548+
max_extracted = 5 * 1024 * 1024 * 1024 # 5 GB
549+
for zip_idx, evtx_zip in enumerate(evtx_zips):
550+
subdir = os.path.join(tmpdir, str(zip_idx))
551+
os.makedirs(subdir, exist_ok=True)
552+
try:
553+
with zipfile.ZipFile(evtx_zip, "r") as zf:
554+
total_uncompressed = sum(m.file_size for m in zf.infolist())
555+
if total_uncompressed > max_extracted:
556+
log.warning("evtx zip too large (%d bytes), skipping: %s", total_uncompressed, evtx_zip)
557+
continue
558+
559+
for member in zf.infolist():
560+
if (member.external_attr >> 16) & 0o170000 == 0o120000:
561+
log.warning("Symlink in evtx zip rejected: %s", member.filename)
562+
continue
563+
target = os.path.realpath(os.path.join(subdir, member.filename))
564+
if not target.startswith(os.path.realpath(subdir) + os.sep) and target != os.path.realpath(subdir):
565+
log.warning("Zip slip attempt in evtx zip: %s", member.filename)
566+
continue
567+
zf.extract(member, subdir)
568+
except Exception as e:
569+
log.debug("Failed to extract %s: %s", evtx_zip, e)
554570

555571
# Defense-in-depth: check for symlinks after extraction
556572
for root, dirs, files in os.walk(tmpdir):
557573
for name in files + dirs:
558574
if os.path.islink(os.path.join(root, name)):
559-
log.warning("Symlink found in evtx.zip: %s", name)
575+
log.warning("Symlink found in evtx: %s", name)
560576
return None
561577

562578
evtx_files = []
@@ -568,6 +584,80 @@ def _run_evtx(self, zircolite_path, rulesets, timeout):
568584
log.debug("No .evtx files found in archive")
569585
return None
570586

587+
# Filter analyzer noise: convert evtx to JSONL via evtx_dump,
588+
# strip events from the CAPE analyzer parent process, and
589+
# feed clean JSONL to zircolite.
590+
evtx_dump_bin = self.options.get("evtx_dump_bin", "/usr/local/bin/evtx_dump")
591+
# Load analyzer noise filter from shared config
592+
analyzer_exclude = set()
593+
try:
594+
filters_path = self.options.get("filters", "data/sigma/filters.json")
595+
filters_local = self.options.get("filters_local", "data/sigma/filters_local.json")
596+
for fp in [filters_path, filters_local]:
597+
if fp and not os.path.isabs(fp):
598+
fp = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))), fp)
599+
if fp and os.path.exists(fp):
600+
with open(fp) as _f:
601+
_data = json.load(_f)
602+
_pf = _data.get("pre_filters", {})
603+
for p in _pf.get("exclude_parent_processes", []):
604+
analyzer_exclude.add(p.lower())
605+
for p in _pf.get("exclude_image_processes", []):
606+
analyzer_exclude.add(p.lower())
607+
for p in _pf.get("exclude_target_paths", []):
608+
analyzer_exclude.add(p.lower())
609+
except Exception:
610+
pass
611+
if not analyzer_exclude:
612+
analyzer_exclude = {"icacls.exe", "python.exe", "wevtutil.exe", "conhost.exe"}
613+
# Compile a single regex for efficient matching
614+
exclude_re = re.compile("|".join(re.escape(p) for p in analyzer_exclude), re.IGNORECASE)
615+
616+
if os.path.isfile(evtx_dump_bin):
617+
filtered_dir = os.path.join(tmpdir, "filtered")
618+
os.makedirs(filtered_dir, exist_ok=True)
619+
has_filtered = False
620+
for evtx_file in evtx_files:
621+
try:
622+
# Use unique name per snapshot to avoid collisions
623+
rel_path = os.path.relpath(evtx_file, tmpdir)
624+
basename = rel_path.replace(os.sep, "_").rsplit(".", 1)[0] + ".json"
625+
jsonl_path = os.path.join(filtered_dir, basename)
626+
# Stream output line-by-line to avoid loading all into memory
627+
proc = subprocess.Popen(
628+
[evtx_dump_bin, "-o", "jsonl", evtx_file],
629+
stdout=subprocess.PIPE, stderr=subprocess.DEVNULL, text=True,
630+
)
631+
with open(jsonl_path, "w") as out:
632+
for line in proc.stdout:
633+
line = line.rstrip("\n")
634+
if not line.strip():
635+
continue
636+
# Check specific JSON fields rather than substring on whole line
637+
try:
638+
evt = json.loads(line)
639+
event_data = evt.get("Event", {}).get("EventData", {})
640+
image = str(event_data.get("Image", ""))
641+
parent = str(event_data.get("ParentImage", ""))
642+
target = str(event_data.get("TargetFilename", ""))
643+
if exclude_re.search(image) or exclude_re.search(parent) or exclude_re.search(target):
644+
continue
645+
except (json.JSONDecodeError, AttributeError):
646+
pass
647+
out.write(line + "\n")
648+
proc.wait(timeout=120)
649+
if os.path.getsize(jsonl_path) > 0:
650+
has_filtered = True
651+
except Exception as e:
652+
log.debug("evtx_dump filter failed for %s: %s", evtx_file, e)
653+
654+
if has_filtered:
655+
return self._run_zircolite(
656+
zircolite_path, rulesets, timeout, filtered_dir,
657+
extra_args=["--jsonl"]
658+
)
659+
log.debug("evtx_dump filtering produced no output, falling back to raw evtx")
660+
571661
return self._run_zircolite(zircolite_path, rulesets, timeout, tmpdir)
572662
except Exception as e:
573663
log.warning("EVTX sigma scan failed: %s", e)

0 commit comments

Comments
 (0)