@@ -105,6 +105,30 @@ def trigger_text(self):
105105 def text_should_not_be_present (self ):
106106 return self ._get_merged_rules ('text_should_not_be_present' )
107107
108+ def get_filter_config_hash (self ):
109+ """
110+ Stable hash of the effective filter configuration.
111+
112+ Used by the skip-logic in run_changedetection() so that any change to
113+ global settings, tag overrides, or watch filters automatically invalidates
114+ the raw-content-unchanged shortcut — without needing scattered
115+ clear_all_last_checksums() calls at every settings mutation site.
116+ """
117+ app = self .datastore .data ['settings' ]['application' ]
118+ config = {
119+ 'extract_lines_containing' : sorted (self .extract_lines_containing ),
120+ 'extract_text' : sorted (self .extract_text ),
121+ 'ignore_text' : sorted (self .ignore_text ),
122+ 'include_filters' : sorted (self .include_filters ),
123+ 'subtractive_selectors' : sorted (self .subtractive_selectors ),
124+ 'text_should_not_be_present' : sorted (self .text_should_not_be_present ),
125+ 'trigger_text' : sorted (self .trigger_text ),
126+ # Global processing flags not captured by the filter lists above
127+ 'ignore_whitespace' : app .get ('ignore_whitespace' , False ),
128+ 'strip_ignored_lines' : app .get ('strip_ignored_lines' , False ),
129+ }
130+ return hashlib .md5 (json .dumps (config , sort_keys = True ).encode ()).hexdigest ()
131+
108132 @property
109133 def has_include_filters (self ):
110134 return bool (self .include_filters ) and bool (self .include_filters [0 ].strip ())
@@ -392,19 +416,26 @@ def run_changedetection(self, watch, force_reprocess=False):
392416 raise Exception ("Watch no longer exists." )
393417
394418 current_raw_document_checksum = self .get_raw_document_checksum ()
395- # Skip processing only if BOTH conditions are true:
396- # 1. HTML content unchanged (checksum matches last saved checksum)
397- # 2. Watch configuration was not edited (including trigger_text, filters, etc.)
398- # The was_edited flag handles all watch configuration changes, so we don't need
399- # separate checks for trigger_text or other processing rules.
419+
420+ # Build filter config up front so we can hash it for the skip check.
421+ filter_config = FilterConfig (watch , self .datastore )
422+ current_filter_config_hash = filter_config .get_filter_config_hash ()
423+
424+ # Skip only when ALL of these hold:
425+ # 1. raw HTML is unchanged
426+ # 2. watch config was not edited (was_edited covers per-watch field changes)
427+ # 3. effective filter config is unchanged (covers global/tag setting changes that
428+ # bypass was_edited — e.g. global_ignore_text, global_subtractive_selectors)
429+ # last_filter_config_hash being False means first run or upgrade: don't skip.
400430 if (not force_reprocess and
401431 not watch .was_edited and
402432 self .last_raw_content_checksum and
403- self .last_raw_content_checksum == current_raw_document_checksum ):
433+ self .last_raw_content_checksum == current_raw_document_checksum and
434+ watch .get ('last_filter_config_hash' ) and
435+ watch .get ('last_filter_config_hash' ) == current_filter_config_hash ):
404436 raise checksumFromPreviousCheckWasTheSame ()
405437
406- # Initialize components
407- filter_config = FilterConfig (watch , self .datastore )
438+ # Initialize remaining components
408439 content_processor = ContentProcessor (self .fetcher , watch , filter_config , self .datastore )
409440 transformer = ContentTransformer ()
410441 rule_engine = RuleEngine ()
@@ -425,6 +456,7 @@ def run_changedetection(self, watch, force_reprocess=False):
425456
426457 # Save the raw content checksum to file (processor implementation detail, not watch config)
427458 self .update_last_raw_content_checksum (current_raw_document_checksum )
459+ update_obj ['last_filter_config_hash' ] = current_filter_config_hash
428460
429461 # === CONTENT PREPROCESSING ===
430462 # Avoid creating unnecessary intermediate string copies by reassigning only when needed
@@ -555,8 +587,8 @@ def run_changedetection(self, watch, force_reprocess=False):
555587 # === BLOCKING RULES EVALUATION ===
556588 blocked = False
557589
558- # Check trigger_text
559- if rule_engine .evaluate_trigger_text (stripped_text , filter_config .trigger_text ):
590+ # Check trigger_text - use text_for_checksuming so ignore_text can suppress trigger_text
591+ if rule_engine .evaluate_trigger_text (text_for_checksuming , filter_config .trigger_text ):
560592 blocked = True
561593
562594 # Check text_should_not_be_present
0 commit comments