Skip to content

Commit b905c78

Browse files
authored
Cache ignored DDIGNORE queries in incremental query metrics collector (DataDog#24056)
* Properly cache and ignore DDIGNORE labeled queries * partial commit miss * remove test dashboard * Fix edge case on cache correctness
1 parent 1448e8a commit b905c78

5 files changed

Lines changed: 241 additions & 20 deletions

File tree

postgres/changelog.d/24056.fixed

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Cache ignored ``/* DDIGNORE */`` queries in incremental query metrics collector to avoid repeated lookups.

postgres/datadog_checks/postgres/obfuscation_lookup.py

Lines changed: 65 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -25,11 +25,22 @@ class ObfuscationResult:
2525

2626

2727
class ObfuscationLookup:
28-
"""Two-tier LRU cache: (queryid, dbid, userid) -> query_signature -> ObfuscationResult.
29-
30-
Cache hit avoids both PG text fetch and FFI obfuscation. On miss the
31-
caller supplies raw text; we obfuscate, store both mappings, and discard
32-
the raw text. Multiple pgss keys sharing a query_signature share one result.
28+
"""LRU cache mapping pg_stat_statements keys to obfuscated query results.
29+
30+
A lookup resolves a (queryid, dbid, userid) key to one of three outcomes:
31+
32+
- hit: the obfuscated result is cached, avoiding both PG text fetch and FFI
33+
obfuscation. Stored as two tiers, key -> query_signature -> result, so
34+
multiple keys sharing a query_signature share one result.
35+
- miss: nothing is cached for the key; the caller must fetch its text and
36+
pass it to :meth:`populate` to obfuscate, store, and discard the raw text.
37+
- ignored: the key's text is known to be non-cacheable (e.g. the agent's own
38+
/* DDIGNORE */ queries). These are neither hit nor miss; lookup skips them
39+
so they never trigger a repeated text fetch.
40+
41+
The caller decides what is non-cacheable (via :meth:`mark_ignored`); the
42+
cache only owns storage and lifecycle. All three tiers are LRU-bounded by
43+
``maxsize`` and cleared for a key by :meth:`evict` when it leaves pgss.
3344
"""
3445

3546
def __init__(self, maxsize: int, obfuscate_options: str, log_unobfuscated_queries: bool = False):
@@ -39,6 +50,8 @@ def __init__(self, maxsize: int, obfuscate_options: str, log_unobfuscated_querie
3950

4051
self._key_to_sig: OrderedDict[PgssKey, str] = OrderedDict()
4152
self._sig_to_result: OrderedDict[str, ObfuscationResult] = OrderedDict()
53+
# Negative cache: keys we have learned resolve to nothing cacheable.
54+
self._ignored_keys: OrderedDict[PgssKey, None] = OrderedDict()
4255

4356
self._hits = 0
4457
self._misses = 0
@@ -51,6 +64,10 @@ def queryid_map_size(self) -> int:
5164
def signature_map_size(self) -> int:
5265
return len(self._sig_to_result)
5366

67+
@property
68+
def ignored_map_size(self) -> int:
69+
return len(self._ignored_keys)
70+
5471
@property
5572
def hits(self) -> int:
5673
return self._hits
@@ -64,11 +81,20 @@ def reset_stats(self):
6481
self._misses = 0
6582

6683
def lookup(self, keys: set[PgssKey]) -> tuple[dict[PgssKey, ObfuscationResult], set[PgssKey]]:
67-
"""Return (hits, misses) for the given pg_stat_statements row keys."""
84+
"""Return (hits, misses) for the given pg_stat_statements row keys.
85+
86+
Keys in the negative cache are excluded from both: they are neither a hit
87+
(no result to return) nor a miss (must not be re-fetched).
88+
"""
6889
hits: dict[PgssKey, ObfuscationResult] = {}
6990
misses: set[PgssKey] = set()
91+
ignored = 0
7092

7193
for pgss_key in keys:
94+
if pgss_key in self._ignored_keys:
95+
self._ignored_keys.move_to_end(pgss_key)
96+
ignored += 1
97+
continue
7298
sig = self._key_to_sig.get(pgss_key)
7399
if sig is not None:
74100
self._key_to_sig.move_to_end(pgss_key)
@@ -82,15 +108,35 @@ def lookup(self, keys: set[PgssKey]) -> tuple[dict[PgssKey, ObfuscationResult],
82108
misses.add(pgss_key)
83109

84110
logger.debug(
85-
"lookup: requested=%d hits=%d misses=%d key_map=%d sig_map=%d",
111+
"lookup: requested=%d hits=%d misses=%d ignored=%d key_map=%d sig_map=%d ignored_map=%d",
86112
len(keys),
87113
len(hits),
88114
len(misses),
115+
ignored,
89116
len(self._key_to_sig),
90117
len(self._sig_to_result),
118+
len(self._ignored_keys),
91119
)
92120
return hits, misses
93121

122+
def mark_ignored(self, keys: set[PgssKey]) -> None:
123+
"""Record keys whose text is non-cacheable so future lookups skip them.
124+
125+
The caller is responsible for deciding what is non-cacheable (e.g. the
126+
agent's own /* DDIGNORE */ queries). Entries are forgotten via
127+
:meth:`evict` when their key disappears from pg_stat_statements.
128+
"""
129+
for pgss_key in keys:
130+
# Drop any stale positive mapping so an ignored key can never resurface as a
131+
# hit (e.g. if its signature is later repopulated by another key after this
132+
# negative entry is LRU-trimmed).
133+
self._key_to_sig.pop(pgss_key, None)
134+
self._ignored_keys[pgss_key] = None
135+
self._ignored_keys.move_to_end(pgss_key)
136+
if keys:
137+
self._trim_ignored()
138+
logger.debug("mark_ignored: added=%d ignored_map=%d", len(keys), len(self._ignored_keys))
139+
94140
def populate(self, raw_texts: dict[PgssKey, str]) -> dict[PgssKey, ObfuscationResult]:
95141
"""Obfuscate raw texts, store results, and return pgss_key -> ObfuscationResult."""
96142
results: dict[PgssKey, ObfuscationResult] = {}
@@ -121,11 +167,17 @@ def populate(self, raw_texts: dict[PgssKey, str]) -> dict[PgssKey, ObfuscationRe
121167
return results
122168

123169
def evict(self, keys: set[PgssKey]) -> None:
124-
"""Remove tier-1 entries for keys evicted from pgss."""
170+
"""Forget all state (positive and negative) for keys evicted from pgss."""
125171
for pgss_key in keys:
126172
self._key_to_sig.pop(pgss_key, None)
173+
self._ignored_keys.pop(pgss_key, None)
127174
if keys:
128-
logger.debug("evict: removed=%d key_map=%d", len(keys), len(self._key_to_sig))
175+
logger.debug(
176+
"evict: removed=%d key_map=%d ignored_map=%d",
177+
len(keys),
178+
len(self._key_to_sig),
179+
len(self._ignored_keys),
180+
)
129181

130182
def _obfuscate_single(self, raw_text: str) -> ObfuscationResult | None:
131183
try:
@@ -154,3 +206,7 @@ def _trim_keys(self):
154206
def _trim_sig(self):
155207
while len(self._sig_to_result) > self._maxsize:
156208
self._sig_to_result.popitem(last=False)
209+
210+
def _trim_ignored(self):
211+
while len(self._ignored_keys) > self._maxsize:
212+
self._ignored_keys.popitem(last=False)

postgres/datadog_checks/postgres/statements_v2.py

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -356,17 +356,20 @@ def _resolve_obfuscations(
356356

357357
if misses:
358358
raw_texts = self._fetch_query_texts(misses)
359-
filtered = {
360-
pgss_key: text
361-
for pgss_key, text in raw_texts.items()
362-
if text and text != '<insufficient privilege>' and not text.startswith('/* DDIGNORE */')
363-
}
364-
self._log.debug(
365-
"resolve: fetched=%d filtered=%d for %d misses",
366-
len(raw_texts),
367-
len(filtered),
368-
len(misses),
369-
)
359+
filtered = {}
360+
ignorable: set[PgssKey] = set()
361+
for pgss_key, text in raw_texts.items():
362+
if not text:
363+
continue
364+
if text.startswith('/* DDIGNORE */'):
365+
# We want to ignore tracking query metrics for queries with the /* DDIGNORE */ comment.
366+
ignorable.add(pgss_key)
367+
continue
368+
if text == '<insufficient privilege>':
369+
continue
370+
filtered[pgss_key] = text
371+
if ignorable:
372+
self._obfuscation_lookup.mark_ignored(ignorable)
370373
populated = self._populate_obfuscation_lookup(filtered)
371374
hits.update(populated)
372375

postgres/tests/test_statements_v2.py

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,63 @@ def test_lookup_updates_lru_order(self):
208208
_, misses = lk.lookup({(2, 2, 2)})
209209
assert (2, 2, 2) in misses
210210

211+
# --- negative cache (ignored keys) ---
212+
213+
def test_mark_ignored_excludes_from_hits_and_misses(self):
214+
"""A negatively-cached key is neither a hit nor a miss on lookup."""
215+
lk = self._make_lookup()
216+
lk.mark_ignored({(1, 1, 1)})
217+
hits, misses = lk.lookup({(1, 1, 1), (2, 1, 1)})
218+
assert (1, 1, 1) not in hits
219+
assert (1, 1, 1) not in misses
220+
assert misses == {(2, 1, 1)}
221+
assert lk.ignored_map_size == 1
222+
223+
def test_ignored_keys_do_not_increment_miss_counter(self):
224+
lk = self._make_lookup()
225+
lk.mark_ignored({(1, 1, 1)})
226+
lk.reset_stats()
227+
lk.lookup({(1, 1, 1)})
228+
assert lk.misses == 0
229+
assert lk.hits == 0
230+
231+
def test_evict_forgets_ignored_key(self):
232+
"""Evicting a vanished key clears its negative-cache entry so it can be re-evaluated."""
233+
lk = self._make_lookup()
234+
lk.mark_ignored({(1, 1, 1)})
235+
lk.evict({(1, 1, 1)})
236+
assert lk.ignored_map_size == 0
237+
_, misses = lk.lookup({(1, 1, 1)})
238+
assert (1, 1, 1) in misses
239+
240+
def test_ignored_keys_lru_trimmed_to_maxsize(self):
241+
lk = self._make_lookup(maxsize=2)
242+
lk.mark_ignored({(1, 1, 1), (2, 2, 2), (3, 3, 3)})
243+
assert lk.ignored_map_size == 2
244+
245+
def test_mark_ignored_drops_stale_positive_mapping(self):
246+
"""An ignored key must not resurface as a hit via a stale tier-1 mapping.
247+
248+
Reproduces the case where a key keeps its tier-1 mapping after its tier-2
249+
signature was evicted: marking it ignored must drop the tier-1 entry so that,
250+
even after the negative entry is trimmed and the signature is repopulated by
251+
another key, the ignored key never produces a positive hit.
252+
"""
253+
lk = self._make_lookup()
254+
# Two keys share the same normalized SQL (one signature).
255+
lk.populate({(1, 1, 1): 'SELECT 1', (2, 1, 1): 'SELECT 1'})
256+
assert lk.queryid_map_size == 2
257+
258+
# Key (1, 1, 1) turns out to be ignorable; its tier-1 mapping must be dropped.
259+
lk.mark_ignored({(1, 1, 1)})
260+
assert (1, 1, 1) not in lk._key_to_sig
261+
262+
# The shared signature is still cached (via the other key), but the ignored key
263+
# must not hit it.
264+
hits, misses = lk.lookup({(1, 1, 1)})
265+
assert (1, 1, 1) not in hits
266+
assert (1, 1, 1) not in misses
267+
211268

212269
# ---------------------------------------------------------------------------
213270
# PostgresStatementMetricsV2 — unit tests (no live database)
@@ -300,6 +357,42 @@ def test_resolve_obfuscations_partial_filter(self):
300357
assert bad_key not in result
301358
assert good_key in result
302359

360+
def test_resolve_obfuscations_skips_known_ddignore_keys_on_later_cycles(self):
361+
"""A DDIGNORE key is fetched once, negative-cached, then skipped (no fetch) on later cycles."""
362+
v2 = self._make()
363+
ddignore_key = (1, 1, 1)
364+
365+
with mock.patch.object(
366+
v2, '_fetch_query_texts', return_value={ddignore_key: '/* DDIGNORE */ SELECT 1'}
367+
) as fetch:
368+
v2._resolve_obfuscations({ddignore_key}, set())
369+
assert fetch.call_count == 1
370+
assert ddignore_key in v2._obfuscation_lookup._ignored_keys
371+
372+
# Second cycle: same key changes again but is now skipped before the fetch.
373+
result = v2._resolve_obfuscations({ddignore_key}, set())
374+
assert result == {}
375+
assert fetch.call_count == 1
376+
377+
def test_resolve_obfuscations_does_not_fetch_when_all_keys_ignored(self):
378+
"""When every changed key is already negative-cached, no text fetch is issued."""
379+
v2 = self._make()
380+
ddignore_key = (1, 1, 1)
381+
v2._obfuscation_lookup.mark_ignored({ddignore_key})
382+
with mock.patch.object(v2, '_fetch_query_texts') as fetch:
383+
result = v2._resolve_obfuscations({ddignore_key}, set())
384+
assert result == {}
385+
fetch.assert_not_called()
386+
387+
def test_resolve_obfuscations_forgets_ignored_key_when_vanished(self):
388+
"""An ignored key that vanishes from pgss is dropped from the negative cache via evict."""
389+
v2 = self._make()
390+
ddignore_key = (1, 1, 1)
391+
v2._obfuscation_lookup.mark_ignored({ddignore_key})
392+
with mock.patch.object(v2, '_fetch_query_texts', return_value={}):
393+
v2._resolve_obfuscations(set(), {ddignore_key})
394+
assert ddignore_key not in v2._obfuscation_lookup._ignored_keys
395+
303396
# --- execute query cancel event ---
304397

305398
def test_execute_query_raises_when_cancelled(self):

postgres/tests/test_statements_v2_integration.py

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
from datadog_checks.base.utils.db.sql import compute_sql_signature
1010
from datadog_checks.postgres.statements import PG_STAT_STATEMENTS_METRICS_COLUMNS
11+
from datadog_checks.postgres.statements_v2 import PostgresStatementMetricsV2
1112

1213
from .common import (
1314
DB_NAME,
@@ -518,3 +519,70 @@ def test_fqt_cache_deduplication_v2(aggregator, integration_check, dbm_instance_
518519
assert len(matching) == 1, (
519520
f"Expected exactly 1 FQT event across 3 cycles but got {len(matching)}; TTL cache deduplication may be broken"
520521
)
522+
523+
524+
# ---------------------------------------------------------------------------
525+
# Ignored (/* DDIGNORE */) queries are learned once and never re-fetched
526+
# ---------------------------------------------------------------------------
527+
528+
529+
@requires_over_10
530+
def test_ignored_queries_do_not_cause_lookup_cycles_v2(aggregator, integration_check, dbm_instance_v2):
531+
"""The check's own /* DDIGNORE */ queries run every cycle and would otherwise trigger a query-text
532+
fetch each time. Once classified as DDIGNORE, a key must be remembered and skipped so it never costs
533+
another fetch on a later cycle (unless it genuinely vanished from pg_stat_statements in between)."""
534+
conn = psycopg.connect(
535+
host=HOST, dbname=DB_NAME, user="bob", password="bob", autocommit=True, cursor_factory=ClientCursor
536+
)
537+
538+
check = integration_check(dbm_instance_v2)
539+
check._connect()
540+
541+
# First cycle selects and instantiates the V2 collector; capture it before installing the spy.
542+
conn.cursor().execute("SELECT city FROM persons WHERE city = %s", ("hello",))
543+
run_one_check(check, cancel=False)
544+
job = check.statement_metrics
545+
assert isinstance(job, PostgresStatementMetricsV2)
546+
547+
# Spy on the text fetch across every cycle, recording which keys were classified as DDIGNORE and
548+
# which keys vanished from pgss before each fetch (legitimate re-fetches if they later return).
549+
original_fetch = job._fetch_query_texts
550+
ddignore_keys_seen: set = set()
551+
refetched_ddignore: set = set()
552+
vanished_before_fetch: set = set()
553+
554+
def _spy(keys):
555+
# A key already known to be DDIGNORE that is fetched again — and did not vanish in the
556+
# meantime — means it slipped past the skip and is still costing a lookup every cycle.
557+
replayed = (set(keys) & ddignore_keys_seen) - vanished_before_fetch
558+
refetched_ddignore.update(replayed)
559+
texts = original_fetch(keys)
560+
for key, text in texts.items():
561+
if text and text.startswith('/* DDIGNORE */'):
562+
ddignore_keys_seen.add(key)
563+
return texts
564+
565+
original_resolve = job._resolve_obfuscations
566+
567+
def _resolve_spy(changed_pgss_keys, vanished_pgss_keys):
568+
vanished_before_fetch.update(vanished_pgss_keys)
569+
return original_resolve(changed_pgss_keys, vanished_pgss_keys)
570+
571+
with (
572+
mock.patch.object(job, '_fetch_query_texts', side_effect=_spy),
573+
mock.patch.object(job, '_resolve_obfuscations', side_effect=_resolve_spy),
574+
):
575+
for _ in range(8):
576+
conn.cursor().execute("SELECT city FROM persons WHERE city = %s", ("hello",))
577+
run_one_check(check, cancel=False)
578+
579+
conn.close()
580+
581+
if not ddignore_keys_seen:
582+
# Some Postgres versions (e.g. 18) normalize the leading comment out of the stored
583+
# pg_stat_statements text, so /* DDIGNORE */ queries never reach the fetch path and
584+
# there is nothing to assert about skipping them here.
585+
pytest.skip("No /* DDIGNORE */ queries surfaced in pg_stat_statements on this version")
586+
assert not refetched_ddignore, (
587+
f"DDIGNORE keys were re-fetched on later cycles instead of being skipped: {sorted(refetched_ddignore)}"
588+
)

0 commit comments

Comments
 (0)