@@ -521,42 +521,58 @@ def _run_zircolite(self, zircolite_path, rulesets, timeout, input_path, extra_ar
521521 shutil .rmtree (tmpdir , ignore_errors = True )
522522
523523 def _run_evtx (self , zircolite_path , rulesets , timeout ):
524- """Extract and scan EVTX files."""
524+ """Extract and scan EVTX files from all evtx zips (snapshots + final) ."""
525525 evtx_dir = os .path .join (self .analysis_path , "evtx" )
526- evtx_zip = os .path .join (evtx_dir , "evtx.zip" )
527526
528- if not os .path .exists (evtx_zip ):
529- log .debug ("No evtx.zip found at %s" , evtx_zip )
527+ if not os .path .isdir (evtx_dir ):
528+ log .debug ("No evtx directory found at %s" , evtx_dir )
529+ return None
530+
531+ # Collect all evtx zip files (evtx.zip + evtx_snapshot_*.zip)
532+ evtx_zips = sorted (
533+ os .path .join (evtx_dir , f )
534+ for f in os .listdir (evtx_dir )
535+ if f .endswith (".zip" )
536+ )
537+ if not evtx_zips :
538+ log .debug ("No evtx zip files found in %s" , evtx_dir )
530539 return None
531540
532541 tmpdir = None
533542 try :
534543 tmpdir = tempfile .mkdtemp (prefix = "cape_sigma_evtx_" )
535544 real_tmpdir = os .path .realpath (tmpdir )
536- with zipfile .ZipFile (evtx_zip , "r" ) as zf :
537- # Check total uncompressed size to prevent zip bombs
538- max_extracted = 5 * 1024 * 1024 * 1024 # 5 GB
539- total_uncompressed = sum (m .file_size for m in zf .infolist ())
540- if total_uncompressed > max_extracted :
541- log .warning ("evtx.zip uncompressed size too large (%d bytes), skipping" , total_uncompressed )
542- return None
543-
544- for member in zf .infolist ():
545- # Reject symlinks based on Unix external attributes
546- if (member .external_attr >> 16 ) & 0o170000 == 0o120000 :
547- log .warning ("Symlink in evtx.zip rejected: %s" , member .filename )
548- return None
549- target = os .path .realpath (os .path .join (tmpdir , member .filename ))
550- if not target .startswith (real_tmpdir + os .sep ) and target != real_tmpdir :
551- log .warning ("Zip slip attempt in evtx.zip: %s" , member .filename )
552- return None
553- zf .extract (member , tmpdir )
545+
546+ # Extract all evtx zips (snapshots are incremental, each
547+ # contains events since the last wipe)
548+ max_extracted = 5 * 1024 * 1024 * 1024 # 5 GB
549+ for zip_idx , evtx_zip in enumerate (evtx_zips ):
550+ subdir = os .path .join (tmpdir , str (zip_idx ))
551+ os .makedirs (subdir , exist_ok = True )
552+ try :
553+ with zipfile .ZipFile (evtx_zip , "r" ) as zf :
554+ total_uncompressed = sum (m .file_size for m in zf .infolist ())
555+ if total_uncompressed > max_extracted :
556+ log .warning ("evtx zip too large (%d bytes), skipping: %s" , total_uncompressed , evtx_zip )
557+ continue
558+
559+ for member in zf .infolist ():
560+ if (member .external_attr >> 16 ) & 0o170000 == 0o120000 :
561+ log .warning ("Symlink in evtx zip rejected: %s" , member .filename )
562+ continue
563+ target = os .path .realpath (os .path .join (subdir , member .filename ))
564+ if not target .startswith (os .path .realpath (subdir ) + os .sep ) and target != os .path .realpath (subdir ):
565+ log .warning ("Zip slip attempt in evtx zip: %s" , member .filename )
566+ continue
567+ zf .extract (member , subdir )
568+ except Exception as e :
569+ log .debug ("Failed to extract %s: %s" , evtx_zip , e )
554570
555571 # Defense-in-depth: check for symlinks after extraction
556572 for root , dirs , files in os .walk (tmpdir ):
557573 for name in files + dirs :
558574 if os .path .islink (os .path .join (root , name )):
559- log .warning ("Symlink found in evtx.zip : %s" , name )
575+ log .warning ("Symlink found in evtx: %s" , name )
560576 return None
561577
562578 evtx_files = []
@@ -568,6 +584,80 @@ def _run_evtx(self, zircolite_path, rulesets, timeout):
568584 log .debug ("No .evtx files found in archive" )
569585 return None
570586
587+ # Filter analyzer noise: convert evtx to JSONL via evtx_dump,
588+ # strip events from the CAPE analyzer parent process, and
589+ # feed clean JSONL to zircolite.
590+ evtx_dump_bin = self .options .get ("evtx_dump_bin" , "/usr/local/bin/evtx_dump" )
591+ # Load analyzer noise filter from shared config
592+ analyzer_exclude = set ()
593+ try :
594+ filters_path = self .options .get ("filters" , "data/sigma/filters.json" )
595+ filters_local = self .options .get ("filters_local" , "data/sigma/filters_local.json" )
596+ for fp in [filters_path , filters_local ]:
597+ if fp and not os .path .isabs (fp ):
598+ fp = os .path .join (os .path .dirname (os .path .dirname (os .path .dirname (os .path .abspath (__file__ )))), fp )
599+ if fp and os .path .exists (fp ):
600+ with open (fp ) as _f :
601+ _data = json .load (_f )
602+ _pf = _data .get ("pre_filters" , {})
603+ for p in _pf .get ("exclude_parent_processes" , []):
604+ analyzer_exclude .add (p .lower ())
605+ for p in _pf .get ("exclude_image_processes" , []):
606+ analyzer_exclude .add (p .lower ())
607+ for p in _pf .get ("exclude_target_paths" , []):
608+ analyzer_exclude .add (p .lower ())
609+ except Exception :
610+ pass
611+ if not analyzer_exclude :
612+ analyzer_exclude = {"icacls.exe" , "python.exe" , "wevtutil.exe" , "conhost.exe" }
613+ # Compile a single regex for efficient matching
614+ exclude_re = re .compile ("|" .join (re .escape (p ) for p in analyzer_exclude ), re .IGNORECASE )
615+
616+ if os .path .isfile (evtx_dump_bin ):
617+ filtered_dir = os .path .join (tmpdir , "filtered" )
618+ os .makedirs (filtered_dir , exist_ok = True )
619+ has_filtered = False
620+ for evtx_file in evtx_files :
621+ try :
622+ # Use unique name per snapshot to avoid collisions
623+ rel_path = os .path .relpath (evtx_file , tmpdir )
624+ basename = rel_path .replace (os .sep , "_" ).rsplit ("." , 1 )[0 ] + ".json"
625+ jsonl_path = os .path .join (filtered_dir , basename )
626+ # Stream output line-by-line to avoid loading all into memory
627+ proc = subprocess .Popen (
628+ [evtx_dump_bin , "-o" , "jsonl" , evtx_file ],
629+ stdout = subprocess .PIPE , stderr = subprocess .DEVNULL , text = True ,
630+ )
631+ with open (jsonl_path , "w" ) as out :
632+ for line in proc .stdout :
633+ line = line .rstrip ("\n " )
634+ if not line .strip ():
635+ continue
636+ # Check specific JSON fields rather than substring on whole line
637+ try :
638+ evt = json .loads (line )
639+ event_data = evt .get ("Event" , {}).get ("EventData" , {})
640+ image = str (event_data .get ("Image" , "" ))
641+ parent = str (event_data .get ("ParentImage" , "" ))
642+ target = str (event_data .get ("TargetFilename" , "" ))
643+ if exclude_re .search (image ) or exclude_re .search (parent ) or exclude_re .search (target ):
644+ continue
645+ except (json .JSONDecodeError , AttributeError ):
646+ pass
647+ out .write (line + "\n " )
648+ proc .wait (timeout = 120 )
649+ if os .path .getsize (jsonl_path ) > 0 :
650+ has_filtered = True
651+ except Exception as e :
652+ log .debug ("evtx_dump filter failed for %s: %s" , evtx_file , e )
653+
654+ if has_filtered :
655+ return self ._run_zircolite (
656+ zircolite_path , rulesets , timeout , filtered_dir ,
657+ extra_args = ["--jsonl" ]
658+ )
659+ log .debug ("evtx_dump filtering produced no output, falling back to raw evtx" )
660+
571661 return self ._run_zircolite (zircolite_path , rulesets , timeout , tmpdir )
572662 except Exception as e :
573663 log .warning ("EVTX sigma scan failed: %s" , e )
0 commit comments