Skip to content

Commit c9ba85f

Browse files
committed
refactor: consolidate disabled pattern tracking in MatchTracker
Combines the `ScanContext::disabled_patterns` and `MatchTracker::limit_reached` sets into a single `MatchTracker::disabled_patterns` set. This centralizes the management of all patterns that are ignored during a scan, whether due to reaching the maximum number of matches, failing filesize/header constraints, or other reasons. It reduces redundancy and makes `MatchTracker` the single source of truth for pattern state during a scan.
1 parent b5e93fc commit c9ba85f

1 file changed

Lines changed: 25 additions & 24 deletions

File tree

lib/src/scanner/context.rs

Lines changed: 25 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -62,11 +62,25 @@ impl<'a> ScanState<'a> {
6262
}
6363
}
6464

65+
/// Tracks the matches found during a scan.
6566
pub(crate) struct MatchTracker<'r> {
67+
/// Contains the matches found so far.
6668
pub pattern_matches: PatternMatches,
69+
/// Contains matches for subpatterns that are part of a chain but
70+
/// the whole chain has not been confirmed yet
6771
pub unconfirmed_matches: FxHashMap<SubPatternId, Vec<UnconfirmedMatch>>,
68-
pub limit_reached: FxHashSet<PatternId>,
72+
/// Patterns that have been disabled, either because they have reached
73+
/// the maximum number of matches or because they are meant to be ignored
74+
/// during this scan because they belong to some rule that we know that
75+
/// can't match.
76+
pub disabled_patterns: FxHashSet<PatternId>,
77+
/// The rules that are being used during the scan.
6978
pub compiled_rules: &'r Rules,
79+
/// Indicates whether fast mode is enabled. In fast mode the scanner
80+
/// only looks for the first match of each pattern, unless the condition
81+
/// requires tracking all the matches (i.e: the condition relies on the
82+
/// total number of matches). The drawback is that the user can't retrieve
83+
/// all the matches for a give pattern.
7084
pub fast_scan: bool,
7185
}
7286

@@ -171,9 +185,6 @@ pub struct ScanContext<'r, 'd> {
171185
pub(crate) console_log: Option<Box<dyn FnMut(String) + 'r>>,
172186
/// Virtual Machines used for executing regexps.
173187
pub(crate) vm: VM<'r>,
174-
/// Patterns that are disabled for the current scan (e.g. because they don't
175-
/// comply with filesize bounds or header constraints).
176-
pub(crate) disabled_patterns: FxHashSet<PatternId>,
177188
/// Hash map that tracks the time spend on each pattern. Keys are pattern
178189
/// PatternIds and values are the cumulative time spent on verifying each
179190
/// pattern.
@@ -544,12 +555,8 @@ impl ScanContext<'_, '_> {
544555
// Free all runtime objects left around by previous scans.
545556
self.runtime_objects.clear();
546557

547-
// Clear the set that tracks the disabled patterns.
548-
self.disabled_patterns.clear();
549-
550-
// Clear the array that tracks the patterns that reached the maximum
551-
// number of patterns.
552-
self.tracker.limit_reached.clear();
558+
// Clear the set that tracks the patterns that has been disabled.
559+
self.tracker.disabled_patterns.clear();
553560

554561
self.tracker.unconfirmed_matches.clear();
555562
self.num_matching_private_rules = 0;
@@ -853,11 +860,7 @@ impl ScanContext<'_, '_> {
853860
let (pattern_id, sub_pattern) =
854861
&self.compiled_rules.get_sub_pattern(sub_pattern_id);
855862

856-
if self.disabled_patterns.contains(pattern_id) {
857-
return;
858-
}
859-
860-
if self.tracker.limit_reached.contains(pattern_id) {
863+
if self.tracker.disabled_patterns.contains(pattern_id) {
861864
return;
862865
}
863866

@@ -1075,7 +1078,7 @@ impl ScanContext<'_, '_> {
10751078
self.compiled_rules.filesize_bounds(pattern_id)
10761079
&& !bounds.contains(filesize)
10771080
{
1078-
self.disabled_patterns.insert(pattern_id);
1081+
self.tracker.disabled_patterns.insert(pattern_id);
10791082
}
10801083
}
10811084
}
@@ -1087,7 +1090,7 @@ impl ScanContext<'_, '_> {
10871090
self.compiled_rules.header_constraints(pattern_id)
10881091
&& !constraints.is_satisfied(data)
10891092
{
1090-
self.disabled_patterns.insert(pattern_id);
1093+
self.tracker.disabled_patterns.insert(pattern_id);
10911094
}
10921095
}
10931096
}
@@ -1146,11 +1149,10 @@ impl ScanContext<'_, '_> {
11461149
.anchored_sub_patterns()
11471150
.iter()
11481151
.map(|id| (id, self.compiled_rules.get_sub_pattern(*id)))
1149-
// Disabled patterns are ignored.
1150-
.filter(|(_, (pattern_id, _))| {
1151-
!self.disabled_patterns.contains(pattern_id)
1152-
})
11531152
{
1153+
if self.tracker.disabled_patterns.contains(pattern_id) {
1154+
continue;
1155+
}
11541156
match sub_pattern {
11551157
SubPattern::Literal {
11561158
pattern,
@@ -1774,7 +1776,7 @@ fn track_pattern_match(
17741776
|| (tracker.fast_scan
17751777
&& tracker.compiled_rules.is_fast_scan(pattern_id))
17761778
{
1777-
tracker.limit_reached.insert(pattern_id);
1779+
tracker.disabled_patterns.insert(pattern_id);
17781780
}
17791781
}
17801782

@@ -1935,7 +1937,7 @@ pub fn create_wasm_store_and_ctx<'r>(
19351937
tracker: MatchTracker {
19361938
pattern_matches: PatternMatches::new(),
19371939
unconfirmed_matches: FxHashMap::default(),
1938-
limit_reached: FxHashSet::default(),
1940+
disabled_patterns: FxHashSet::default(),
19391941
compiled_rules: rules,
19401942
fast_scan: false,
19411943
},
@@ -1946,7 +1948,6 @@ pub fn create_wasm_store_and_ctx<'r>(
19461948
pike_vm: PikeVM::new(rules.re_code()),
19471949
fast_vm: FastVM::new(rules.re_code()),
19481950
},
1949-
disabled_patterns: FxHashSet::default(),
19501951
custom_base64_engine_cache: Vec::new(),
19511952
#[cfg(feature = "rules-profiling")]
19521953
time_spent_in_pattern: FxHashMap::default(),

0 commit comments

Comments
 (0)