Skip to content

Commit 08028fd

Browse files
Mikarina13claude
andcommitted
fix: gate screen context injection to prevent irrelevant pollution
The [SCREEN CONTEXT: ...] block was being injected into every task whenever a screenshot had been captured, even when the user's intent had nothing to do with the screen. A saved-for-later screenshot stayed in state forever, causing Qwen 35B to chew through 800 tokens of unrelated UI text for simple queries like "speed test 1+1" — and in one case, rambling about Cloudflare Access because an earlier chat window was still cached as "screen context". Two fixes in codec.py + codec_keyboard.py: 1. Relevance gate: skip screen context for trivial intents matched by a small regex (arithmetic, time/date, bitcoin/weather, greetings, status, ping). 2. TTL: drop captured screen context after 120s. `screen_ctx_ts` timestamp added to state, checked on every injection attempt. The existing one-shot behavior (use + clear) is preserved via `_maybe_screen_context()` helper in codec.py. `codec_keyboard.py` inlines the same logic since it doesn't import from codec.py. Verified: 8/8 trivial patterns match, 4/4 screen-relevant phrases preserved. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 159e67f commit 08028fd

File tree

2 files changed

+77
-9
lines changed

2 files changed

+77
-9
lines changed

codec.py

Lines changed: 54 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -140,11 +140,61 @@ def screenshot_ctx():
140140
"last_f13": 0.0,
141141
"last_star": 0.0,
142142
"screen_ctx": "",
143+
"screen_ctx_ts": 0.0, # when screen_ctx was captured; used for TTL expiry
143144
"last_plus": 0.0,
144145
"last_minus": 0.0,
145146
"doc_ctx": "",
146147
}
147148

149+
# ── SCREEN-CONTEXT RELEVANCE GATE ─────────────────────────────────────────────
150+
# Tasks that clearly have nothing to do with the screen — skip context injection
151+
# to prevent the LLM from being confused by stale/irrelevant captured text.
152+
_TRIVIAL_SCREEN_BYPASS = re.compile(
153+
r"^\s*(?:"
154+
r"\d+\s*[+\-*/x×÷]\s*\d+" # arithmetic: "1+1", "5 * 3"
155+
r"|what\s*time" # "what time is it"
156+
r"|time\s*(?:is\s*it|now)?" # "time now"
157+
r"|what'?s?\s+the\s+date" # "what's the date"
158+
r"|bitcoin\s*(?:price)?" # "bitcoin price"
159+
r"|btc\s*price"
160+
r"|weather" # weather queries
161+
r"|calculate\s+" # "calculate 5 * 4"
162+
r"|speed\s*test" # the user's actual failing query
163+
r"|ping"
164+
r"|hello|hi|hey" # greetings
165+
r"|status|health|uptime" # system checks
166+
r")\b",
167+
re.IGNORECASE,
168+
)
169+
_SCREEN_CTX_TTL = 120.0 # seconds — stale screen context expires
170+
171+
def _maybe_screen_context(task: str) -> str:
172+
"""Return ' [SCREEN CONTEXT: ...]' to append, or '' if skipped.
173+
174+
Clears expired/used screen_ctx as a side-effect. Keeps existing behavior
175+
when the task genuinely looks screen-related; skips for trivial lookups or
176+
when the captured screenshot is older than TTL.
177+
"""
178+
ctx = state.get("screen_ctx", "")
179+
if not ctx:
180+
return ""
181+
# TTL: stale screenshots shouldn't follow the user around
182+
ts = state.get("screen_ctx_ts", 0.0)
183+
if ts and (time.time() - ts) > _SCREEN_CTX_TTL:
184+
print(f"[CODEC] Screen context expired ({int(time.time()-ts)}s old) — discarding")
185+
state["screen_ctx"] = ""
186+
state["screen_ctx_ts"] = 0.0
187+
return ""
188+
# Relevance: trivial intents ignore screen context
189+
if _TRIVIAL_SCREEN_BYPASS.match(task or ""):
190+
print(f"[CODEC] Trivial task — skipping screen context injection")
191+
return ""
192+
# Use it, one-shot
193+
out = " [SCREEN CONTEXT: " + ctx[:800] + "]"
194+
state["screen_ctx"] = ""
195+
state["screen_ctx_ts"] = 0.0
196+
return out
197+
148198
# ── DISPATCH LOCK — only one dispatch at a time, prevents feedback loops ──
149199
_dispatch_lock = threading.Lock()
150200
_dispatch_cooldown = 0.0 # timestamp: ignore wake words until this time
@@ -467,18 +517,18 @@ def do_screenshot_question():
467517
"role": "system",
468518
"content": f"[SCREEN CAPTURE: The user's screen currently shows: {ctx[:1000]}]"
469519
})
520+
state["screen_ctx_ts"] = time.time()
470521
push(lambda: show_overlay('Screenshot saved — use voice or text to ask', '#E8711A', 3000))
471522
except Exception as e:
472523
print(f"[CODEC] Screenshot dialog error: {e}")
473524
state["screen_ctx"] = ctx
525+
state["screen_ctx_ts"] = time.time()
474526

475527
# ── TEXT/VOICE HANDLERS ───────────────────────────────────────────────────────
476528
def do_text():
477529
task = get_text_dialog()
478530
if task:
479-
if state.get("screen_ctx"):
480-
task = task + " [SCREEN CONTEXT: " + state["screen_ctx"][:800] + "]"
481-
state["screen_ctx"] = ""
531+
task = task + _maybe_screen_context(task)
482532
dispatch(task)
483533

484534
def do_start_recording():
@@ -536,9 +586,7 @@ def do_stop_voice():
536586
task = cleaned
537587
break
538588
print(f"[CODEC] Heard: {task}")
539-
if state.get("screen_ctx"):
540-
task = task + " [SCREEN CONTEXT: " + state["screen_ctx"][:800] + "]"
541-
state["screen_ctx"] = ""
589+
task = task + _maybe_screen_context(task)
542590
dispatch(task)
543591

544592
# ── WAKE WORD LISTENER ───────────────────────────────────────────────────────

codec_keyboard.py

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
"""CODEC Keyboard — listener, wake word, recording, double-tap shortcuts"""
22
import os
3+
import re
34
import time
45
import tempfile
56
import subprocess
@@ -95,9 +96,28 @@ def do_stop_voice():
9596
log.info("No speech detected")
9697
return
9798
log.info(f"Heard: {task}")
98-
if state.get("screen_ctx"):
99-
task = task + " [SCREEN CONTEXT: " + state["screen_ctx"][:800] + "]"
100-
state["screen_ctx"] = ""
99+
# Relevance + TTL gate (mirrors codec.py):
100+
# - skip screen context for trivial intents (math/time/etc.)
101+
# - drop screen context older than 120s
102+
ctx = state.get("screen_ctx", "")
103+
ts = state.get("screen_ctx_ts", 0.0)
104+
if ctx:
105+
stale = ts and (time.time() - ts) > 120.0
106+
trivial = bool(re.match(
107+
r"^\s*(?:\d+\s*[+\-*/x×÷]\s*\d+|what\s*time|time\s*(?:is\s*it|now)?"
108+
r"|bitcoin\s*(?:price)?|btc\s*price|weather|calculate\s+"
109+
r"|speed\s*test|ping|hello|hi|hey|status|health|uptime)\b",
110+
task or "", re.IGNORECASE))
111+
if stale:
112+
log.info(f"Screen context expired ({int(time.time()-ts)}s old) — discarding")
113+
state["screen_ctx"] = ""
114+
state["screen_ctx_ts"] = 0.0
115+
elif trivial:
116+
log.info("Trivial task — skipping screen context injection")
117+
else:
118+
task = task + " [SCREEN CONTEXT: " + ctx[:800] + "]"
119+
state["screen_ctx"] = ""
120+
state["screen_ctx_ts"] = 0.0
101121
dispatch(task)
102122

103123
# ── Wake word listener ────────────────────────────────────────────────────

0 commit comments

Comments
 (0)