@@ -20,15 +20,14 @@ use memmap2::{Mmap, MmapOptions};
2020use protobuf:: { CodedInputStream , MessageDyn } ;
2121use thiserror:: Error ;
2222
23+ use crate :: Variable ;
2324use crate :: compiler:: { RuleId , Rules } ;
2425use crate :: models:: Rule ;
2526use crate :: modules:: { BUILTIN_MODULES , Module , ModuleError } ;
2627use crate :: scanner:: context:: create_wasm_store_and_ctx;
27- use crate :: types:: { Struct , TypeValue } ;
2828use crate :: variables:: VariableError ;
2929use crate :: wasm:: MATCHING_RULES_BITMAP_BASE ;
3030use crate :: wasm:: runtime:: Store ;
31- use crate :: { Variable , modules} ;
3231
3332pub ( crate ) use crate :: scanner:: context:: RuntimeObject ;
3433pub ( crate ) use crate :: scanner:: context:: RuntimeObjectHandle ;
@@ -152,6 +151,7 @@ pub struct ProfilingData<'r> {
152151#[ derive( Debug , Default ) ]
153152pub struct ScanOptions < ' a > {
154153 module_metadata : HashMap < & ' a str , & ' a [ u8 ] > ,
154+ lazy_modules : bool ,
155155}
156156
157157impl < ' a > ScanOptions < ' a > {
@@ -160,7 +160,7 @@ impl<'a> ScanOptions<'a> {
160160 ///
161161 /// Use other methods to add additional information.
162162 pub fn new ( ) -> Self {
163- Self { module_metadata : Default :: default ( ) }
163+ Self { module_metadata : Default :: default ( ) , lazy_modules : false }
164164 }
165165
166166 /// Adds metadata for a YARA module.
@@ -172,6 +172,18 @@ impl<'a> ScanOptions<'a> {
172172 self . module_metadata . insert ( module_name, metadata) ;
173173 self
174174 }
175+
176+ /// Enables or disables lazy module execution for this scan.
177+ ///
178+ /// When enabled, imported modules are executed only if rule condition
179+ /// evaluation actually accesses one of their fields or functions. This can
180+ /// avoid expensive parsers for rules that short-circuit on strings, but
181+ /// modules skipped this way won't appear in [`ScanResults::module_output`]
182+ /// or [`ScanResults::module_outputs`].
183+ pub fn lazy_modules ( mut self , yes : bool ) -> Self {
184+ self . lazy_modules = yes;
185+ self
186+ }
175187}
176188
177189/// Scans data with already compiled YARA rules.
@@ -502,111 +514,33 @@ impl<'r> Scanner<'r> {
502514 // Indicate that the scanner is currently scanning the given data.
503515 ctx. scan_state = ScanState :: ScanningData ( data) ;
504516
505- for module_name in ctx. compiled_rules . imports ( ) {
506- // Lookup the module in the list of built-in modules.
507- let module = modules:: BUILTIN_MODULES
508- . get ( module_name)
509- . unwrap_or_else ( || panic ! ( "module `{module_name}` not found" ) ) ;
510-
511- let root_struct_name = module. root_struct_descriptor . full_name ( ) ;
512-
513- let module_output;
514- // If the user already provided some output for the module by
515- // calling `Scanner::set_module_output`, use that output. If not,
516- // call the module's main function (if the module has a main
517- // function) for getting its output.
518- if let Some ( output) =
519- ctx. user_provided_module_outputs . remove ( root_struct_name)
520- {
521- module_output = Some ( output) ;
522- } else {
523- let meta: Option < & ' opts [ u8 ] > =
524- options. as_ref ( ) . and_then ( |options| {
525- options. module_metadata . get ( module_name) . copied ( )
526- } ) ;
527-
528- if let Some ( main_fn) = module. main_fn {
529- module_output = Some (
530- main_fn ( ctx. scanned_data ( ) . unwrap ( ) , meta) . map_err (
531- |err| ScanError :: ModuleError {
532- module : module_name. to_string ( ) ,
533- err,
534- } ,
535- ) ?,
536- ) ;
537- } else {
538- module_output = None ;
539- }
540- }
517+ ctx. lazy_modules =
518+ options. as_ref ( ) . is_some_and ( |options| options. lazy_modules ) ;
541519
542- if let Some ( module_output) = & module_output {
543- // Make sure that the module is returning a protobuf message of
544- // the expected type.
545- debug_assert_eq ! (
546- module_output. descriptor_dyn( ) . full_name( ) ,
547- module. root_struct_descriptor. full_name( ) ,
548- "main function of module `{}` must return `{}`, but returned `{}`" ,
549- module_name,
550- module. root_struct_descriptor. full_name( ) ,
551- module_output. descriptor_dyn( ) . full_name( ) ,
552- ) ;
553-
554- // Make sure that the module is returning a protobuf message
555- // where all required fields are initialized. This only applies
556- // to proto2, proto3 doesn't have "required" fields, all fields
557- // are optional.
558- debug_assert ! (
559- module_output. is_initialized_dyn( ) ,
560- "module `{}` returned a protobuf `{}` where some required fields are not initialized " ,
561- module_name,
562- module. root_struct_descriptor. full_name( )
563- ) ;
520+ if let Some ( options) = options. as_ref ( ) {
521+ for ( module_name, metadata) in & options. module_metadata {
522+ ctx. module_metadata
523+ . insert ( ( * module_name) . to_string ( ) , ( * metadata) . to_vec ( ) ) ;
564524 }
525+ }
565526
566- // When constant folding is enabled we don't need to generate
567- // structure fields for enums. This is because during the
568- // optimization process symbols like MyEnum.ENUM_ITEM are resolved
569- // to their constant values at compile time. In other words, the
570- // compiler determines that MyEnum.ENUM_ITEM is equal to some value
571- // X, and uses that value in the generated code.
572- //
573- // However, without constant folding, enums are treated as any
574- // other field in a struct, and their values are determined at scan
575- // time. For that reason these fields must be generated for enums
576- // when constant folding is disabled.
577- let generate_fields_for_enums =
578- !cfg ! ( feature = "constant-folding" ) ;
579-
580- let module_struct = Struct :: from_proto_descriptor_and_msg (
581- & module. root_struct_descriptor ,
582- module_output. as_deref ( ) ,
583- generate_fields_for_enums,
584- ) ;
585-
586- if let Some ( module_output) = module_output {
587- ctx. module_outputs
588- . insert ( root_struct_name. to_string ( ) , module_output) ;
527+ if !ctx. lazy_modules {
528+ let imported_modules: Vec < String > =
529+ ctx. compiled_rules . imports ( ) . map ( str:: to_owned) . collect ( ) ;
530+ for module_name in imported_modules {
531+ ctx. materialize_module ( module_name. as_str ( ) ) ?;
589532 }
590-
591- // The data structure obtained from the module is added to the
592- // root structure. Any data from previous scans will be replaced
593- // with the new data structure.
594- ctx. root_struct
595- . add_field ( module_name, TypeValue :: Struct ( module_struct) ) ;
596533 }
597534
598- // The user provided module outputs are not needed anymore. Let's
599- // clear any remaining entry in the hash map (which can happen if
600- // the user has set outputs for modules that are not even imported
601- // by the rules.
602- ctx. user_provided_module_outputs . clear ( ) ;
603-
604535 // Clear the flag that indicates that the search phase was done.
605536 ctx. set_pattern_search_done ( false ) ;
606537
607538 // Evaluate the conditions of every rule, this will call
608539 // `ScanContext::search_for_patterns` if necessary.
609- ctx. eval_conditions ( ) ?;
540+ let eval_result = ctx. eval_conditions ( ) ;
541+ ctx. module_metadata . clear ( ) ;
542+ ctx. user_provided_module_outputs . clear ( ) ;
543+ eval_result?;
610544
611545 let data = match ctx. scan_state . take ( ) {
612546 ScanState :: ScanningData ( data) => data,
@@ -698,7 +632,9 @@ impl<'a, 'r> ScanResults<'a, 'r> {
698632 /// data.
699633 ///
700634 /// The result will be `None` if the module doesn't exist or didn't
701- /// produce any output.
635+ /// produce any output. When [`ScanOptions::lazy_modules`] is enabled,
636+ /// imported modules that were never accessed during condition evaluation
637+ /// are omitted as well.
702638 pub fn module_output (
703639 & self ,
704640 module_name : & str ,
@@ -715,7 +651,9 @@ impl<'a, 'r> ScanResults<'a, 'r> {
715651 /// Returns an iterator that yields tuples composed of a YARA module name
716652 /// and the protobuf produced by that module.
717653 ///
718- /// Only returns the modules that produced some output.
654+ /// Only returns the modules that produced some output. When
655+ /// [`ScanOptions::lazy_modules`] is enabled, imported modules that were
656+ /// never accessed during condition evaluation are not included.
719657 pub fn module_outputs ( & self ) -> ModuleOutputs < ' a , ' r > {
720658 ModuleOutputs :: new ( self . ctx )
721659 }
0 commit comments