Skip to content

Commit c78b98e

Browse files
authored
Ignore text should override trigger text (It should ignore the trigger text if it appears) (#3450)
1 parent 47ffd9a commit c78b98e

4 files changed

Lines changed: 57 additions & 11 deletions

File tree

changedetectionio/model/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -338,6 +338,7 @@ def _mark_field_as_edited(self, key):
338338
# These are set by processors/workers and should not trigger edited flag
339339
additional_system_fields = {
340340
'last_check_status', # Set by processors
341+
'last_filter_config_hash', # Set by text_json_diff processor, internal skip-cache
341342
'restock', # Set by restock processor
342343
'last_viewed', # Set by mark_all_viewed endpoint
343344
}

changedetectionio/processors/base.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,6 @@ def read_last_raw_content_checksum(self):
9797
logger.warning(f"Failed to read checksum file for {self.watch_uuid}: {e}")
9898
self.last_raw_content_checksum = None
9999

100-
101100
async def validate_iana_url(self):
102101
"""Pre-flight SSRF check — runs DNS lookup in executor to avoid blocking the event loop.
103102
Covers all fetchers (requests, playwright, puppeteer, plugins) since every fetch goes

changedetectionio/processors/text_json_diff/processor.py

Lines changed: 42 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,30 @@ def trigger_text(self):
105105
def text_should_not_be_present(self):
106106
return self._get_merged_rules('text_should_not_be_present')
107107

108+
def get_filter_config_hash(self):
109+
"""
110+
Stable hash of the effective filter configuration.
111+
112+
Used by the skip-logic in run_changedetection() so that any change to
113+
global settings, tag overrides, or watch filters automatically invalidates
114+
the raw-content-unchanged shortcut — without needing scattered
115+
clear_all_last_checksums() calls at every settings mutation site.
116+
"""
117+
app = self.datastore.data['settings']['application']
118+
config = {
119+
'extract_lines_containing': sorted(self.extract_lines_containing),
120+
'extract_text': sorted(self.extract_text),
121+
'ignore_text': sorted(self.ignore_text),
122+
'include_filters': sorted(self.include_filters),
123+
'subtractive_selectors': sorted(self.subtractive_selectors),
124+
'text_should_not_be_present': sorted(self.text_should_not_be_present),
125+
'trigger_text': sorted(self.trigger_text),
126+
# Global processing flags not captured by the filter lists above
127+
'ignore_whitespace': app.get('ignore_whitespace', False),
128+
'strip_ignored_lines': app.get('strip_ignored_lines', False),
129+
}
130+
return hashlib.md5(json.dumps(config, sort_keys=True).encode()).hexdigest()
131+
108132
@property
109133
def has_include_filters(self):
110134
return bool(self.include_filters) and bool(self.include_filters[0].strip())
@@ -392,19 +416,26 @@ def run_changedetection(self, watch, force_reprocess=False):
392416
raise Exception("Watch no longer exists.")
393417

394418
current_raw_document_checksum = self.get_raw_document_checksum()
395-
# Skip processing only if BOTH conditions are true:
396-
# 1. HTML content unchanged (checksum matches last saved checksum)
397-
# 2. Watch configuration was not edited (including trigger_text, filters, etc.)
398-
# The was_edited flag handles all watch configuration changes, so we don't need
399-
# separate checks for trigger_text or other processing rules.
419+
420+
# Build filter config up front so we can hash it for the skip check.
421+
filter_config = FilterConfig(watch, self.datastore)
422+
current_filter_config_hash = filter_config.get_filter_config_hash()
423+
424+
# Skip only when ALL of these hold:
425+
# 1. raw HTML is unchanged
426+
# 2. watch config was not edited (was_edited covers per-watch field changes)
427+
# 3. effective filter config is unchanged (covers global/tag setting changes that
428+
# bypass was_edited — e.g. global_ignore_text, global_subtractive_selectors)
429+
# last_filter_config_hash being False means first run or upgrade: don't skip.
400430
if (not force_reprocess and
401431
not watch.was_edited and
402432
self.last_raw_content_checksum and
403-
self.last_raw_content_checksum == current_raw_document_checksum):
433+
self.last_raw_content_checksum == current_raw_document_checksum and
434+
watch.get('last_filter_config_hash') and
435+
watch.get('last_filter_config_hash') == current_filter_config_hash):
404436
raise checksumFromPreviousCheckWasTheSame()
405437

406-
# Initialize components
407-
filter_config = FilterConfig(watch, self.datastore)
438+
# Initialize remaining components
408439
content_processor = ContentProcessor(self.fetcher, watch, filter_config, self.datastore)
409440
transformer = ContentTransformer()
410441
rule_engine = RuleEngine()
@@ -425,6 +456,7 @@ def run_changedetection(self, watch, force_reprocess=False):
425456

426457
# Save the raw content checksum to file (processor implementation detail, not watch config)
427458
self.update_last_raw_content_checksum(current_raw_document_checksum)
459+
update_obj['last_filter_config_hash'] = current_filter_config_hash
428460

429461
# === CONTENT PREPROCESSING ===
430462
# Avoid creating unnecessary intermediate string copies by reassigning only when needed
@@ -555,8 +587,8 @@ def run_changedetection(self, watch, force_reprocess=False):
555587
# === BLOCKING RULES EVALUATION ===
556588
blocked = False
557589

558-
# Check trigger_text
559-
if rule_engine.evaluate_trigger_text(stripped_text, filter_config.trigger_text):
590+
# Check trigger_text - use text_for_checksuming so ignore_text can suppress trigger_text
591+
if rule_engine.evaluate_trigger_text(text_for_checksuming, filter_config.trigger_text):
560592
blocked = True
561593

562594
# Check text_should_not_be_present

changedetectionio/tests/test_trigger.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,10 @@ def test_trigger_functionality(client, live_server, measure_memory_usage, datast
7070
uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
7171
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
7272

73+
74+
# And set the trigger text as 'ignore text', it should then not trigger
75+
live_server.app.config['DATASTORE'].data['settings']['application']['global_ignore_text'] = [trigger_text]
76+
7377
# Trigger a check
7478
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
7579

@@ -122,6 +126,16 @@ def test_trigger_functionality(client, live_server, measure_memory_usage, datast
122126
# Now set the content which contains the trigger text
123127
set_modified_with_trigger_text_response(datastore_path=datastore_path)
124128

129+
# There is a "ignore text" set of the change that should be also the trigger, it should not trigger
130+
# because the ignore text should be stripped from the response, therefor, the trigger should not fire
131+
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
132+
wait_for_all_checks(client)
133+
res = client.get(url_for("watchlist.index"))
134+
assert b'has-unread-changes' not in res.data
135+
136+
137+
live_server.app.config['DATASTORE'].data['settings']['application']['global_ignore_text'] = []
138+
# check that the trigger fired once we stopped ignore it
125139
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
126140
wait_for_all_checks(client)
127141
res = client.get(url_for("watchlist.index"))

0 commit comments

Comments
 (0)