Skip to content

Commit 7352f76

Browse files
authored
[SDBMON-2665] Cache parameterized-query explain failures caused by unresolvable parameter types (DataDog#24102)
* Cache parameterized-query explain failures caused by unresolvable parameter types * Add changelog * Return the correct explain error
1 parent b3b45af commit 7352f76

4 files changed

Lines changed: 219 additions & 27 deletions

File tree

postgres/changelog.d/24102.fixed

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Cache parameterized-query explain failures caused by unresolvable parameter types so they aren't re-attempted on every collection.

postgres/datadog_checks/postgres/explain_parameterized_queries.py

Lines changed: 90 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
import logging
66
import re
7+
from typing import Dict, List, Optional, Tuple
78

89
import psycopg
910

@@ -26,6 +27,16 @@
2627

2728
EXPLAIN_QUERY = 'SELECT {explain_function}(%s)'
2829

30+
# Errors raised when PostgreSQL can't resolve a parameter's type while preparing or explaining a parameterized
31+
# query (e.g. untyped NULL parameters from ORMs using the extended query protocol: "operator does not exist:
32+
# bigint = text"). The parameter types that made the original query valid aren't available in pg_stat_activity,
33+
# so these queries can't be explained and the failure is deterministic for a given query signature.
34+
EXPECTED_PARAMETER_TYPE_ERRORS = (
35+
psycopg.errors.IndeterminateDatatype,
36+
psycopg.errors.UndefinedFunction,
37+
psycopg.errors.DatatypeMismatch,
38+
)
39+
2940

3041
def agent_check_getter(self):
3142
return self._check
@@ -73,7 +84,9 @@ def __init__(self, check, config, explain_function):
7384
self._explain_function = explain_function
7485

7586
@tracked_method(agent_check_getter=agent_check_getter)
76-
def explain_statement(self, dbname, statement, obfuscated_statement, query_signature):
87+
def explain_statement(
88+
self, dbname: str, statement: str, obfuscated_statement: str, query_signature: str
89+
) -> Tuple[Optional[Dict], Optional[DBExplainError], Optional[str]]:
7790
if self._check.version < V12:
7891
# if pg version < 12, skip explaining parameterized queries because
7992
# plan_cache_mode is not supported
@@ -86,18 +99,27 @@ def explain_statement(self, dbname, statement, obfuscated_statement, query_signa
8699
with self._check.db_pool.get_connection(dbname) as conn:
87100
try:
88101
self._set_plan_cache_mode(conn)
89-
self._create_prepared_statement(conn, statement, obfuscated_statement, query_signature)
90-
except psycopg.errors.IndeterminateDatatype as e:
91-
return None, DBExplainError.indeterminate_datatype, '{}'.format(type(e))
92-
except psycopg.errors.UndefinedFunction as e:
93-
return None, DBExplainError.undefined_function, '{}'.format(type(e))
102+
prepared_statement_error = self._create_prepared_statement(
103+
conn, statement, obfuscated_statement, query_signature
104+
)
94105
except Exception as e:
95-
# if we fail to create a prepared statement, we cannot explain the query
106+
# an unexpected failure creating the prepared statement means we cannot explain the query
96107
return None, DBExplainError.failed_to_explain_with_prepared_statement, '{}'.format(type(e))
97108

109+
if prepared_statement_error is not None:
110+
# an expected, deterministic parameter type-resolution failure (e.g. untyped NULL parameters).
111+
error_code, err_msg = prepared_statement_error
112+
return None, error_code, err_msg
113+
98114
try:
99-
result = self._explain_prepared_statement(conn, statement, obfuscated_statement, query_signature)
100-
if result:
115+
result, explain_error = self._explain_prepared_statement(
116+
conn, statement, obfuscated_statement, query_signature
117+
)
118+
if explain_error is not None:
119+
# an expected, deterministic parameter type-resolution failure surfaced during EXPLAIN EXECUTE
120+
error_code, err_msg = explain_error
121+
return None, error_code, err_msg
122+
elif result:
101123
plan = result[0][0][0]
102124
return plan, DBExplainError.explained_with_prepared_statement, None
103125
else:
@@ -117,24 +139,57 @@ def _set_plan_cache_mode(self, conn):
117139
self._execute_query(conn, "SET plan_cache_mode = force_generic_plan")
118140

119141
@tracked_method(agent_check_getter=agent_check_getter)
120-
def _create_prepared_statement(self, conn, statement, obfuscated_statement, query_signature):
142+
def _create_prepared_statement(
143+
self, conn, statement: str, obfuscated_statement: str, query_signature: str
144+
) -> Optional[Tuple[DBExplainError, str]]:
145+
# Returns None on success, or a (DBExplainError, err_msg) tuple when the query can't be prepared because
146+
# a parameter's type can't be resolved. Other unexpected errors are re-raised.
121147
try:
122148
self._execute_query(
123149
conn,
124150
PREPARE_STATEMENT_QUERY.format(query_signature=query_signature, statement=statement),
125151
)
152+
return None
153+
except EXPECTED_PARAMETER_TYPE_ERRORS as e:
154+
# The parameter types can't be resolved, so this query can't be prepared (and therefore can't be
155+
# explained). Map the failure to the corresponding explain error code.
156+
self._log_failed_statement(
157+
'Failed to create prepared statement when explaining statement(%s)=[%s] | err=[%s]',
158+
statement,
159+
obfuscated_statement,
160+
query_signature,
161+
e,
162+
)
163+
return self._map_parameter_type_error(e)
126164
except Exception as e:
127-
logged_statement = obfuscated_statement
128-
if self._config.log_unobfuscated_plans:
129-
logged_statement = statement
130-
logger.debug(
165+
self._log_failed_statement(
131166
'Failed to create prepared statement when explaining statement(%s)=[%s] | err=[%s]',
167+
statement,
168+
obfuscated_statement,
132169
query_signature,
133-
logged_statement,
134170
e,
135171
)
136172
raise
137173

174+
def _map_parameter_type_error(self, e: Exception) -> Tuple[DBExplainError, str]:
175+
# Map an unresolved parameter-type error to its specific explain error code so cached responses
176+
# and emitted error tags reflect the actual failure rather than a generic one.
177+
if isinstance(e, psycopg.errors.IndeterminateDatatype):
178+
return DBExplainError.indeterminate_datatype, '{}'.format(type(e))
179+
if isinstance(e, psycopg.errors.DatatypeMismatch):
180+
return DBExplainError.datatype_mismatch, '{}'.format(type(e))
181+
return DBExplainError.undefined_function, '{}'.format(type(e))
182+
183+
def _log_failed_statement(
184+
self, message: str, statement: str, obfuscated_statement: str, query_signature: str, e: Exception
185+
) -> None:
186+
# Logs the obfuscated statement by default, falling back to the raw statement only when explicitly
187+
# configured. The message is expected to interpolate (query_signature, statement, error) in that order.
188+
logged_statement = obfuscated_statement
189+
if self._config.log_unobfuscated_plans:
190+
logged_statement = statement
191+
logger.debug(message, query_signature, logged_statement, e)
192+
138193
@tracked_method(agent_check_getter=agent_check_getter)
139194
def _get_number_of_parameters_for_prepared_statement(self, conn, query_signature):
140195
rows = self._execute_query_and_fetch_rows(conn, PARAM_TYPES_COUNT_QUERY.format(query_signature=query_signature))
@@ -152,22 +207,35 @@ def _generate_prepared_statement_query(self, conn, query_signature: str) -> str:
152207
return EXECUTE_PREPARED_STATEMENT_QUERY.format(prepared_statement=query_signature, parameters=parameters)
153208

154209
@tracked_method(agent_check_getter=agent_check_getter)
155-
def _explain_prepared_statement(self, conn, statement, obfuscated_statement, query_signature):
210+
def _explain_prepared_statement(
211+
self, conn, statement: str, obfuscated_statement: str, query_signature: str
212+
) -> Tuple[Optional[List], Optional[Tuple[DBExplainError, str]]]:
213+
# Returns (rows, None) on success, or (None, (DBExplainError, err_msg)) when a parameter's type can't be
214+
# resolved during EXPLAIN EXECUTE. Other unexpected errors are re-raised.
156215
try:
157216
prepared_statement_query = self._generate_prepared_statement_query(conn, query_signature)
158-
return self._execute_query_and_fetch_rows(
217+
rows = self._execute_query_and_fetch_rows(
159218
conn,
160219
EXPLAIN_QUERY.format(explain_function=self._explain_function),
161220
(prepared_statement_query,),
162221
)
222+
return rows, None
223+
except EXPECTED_PARAMETER_TYPE_ERRORS as e:
224+
# The parameter types couldn't be resolved during EXPLAIN EXECUTE, so the query can't be explained.
225+
self._log_failed_statement(
226+
'Failed to explain parameterized statement(%s)=[%s] | err=[%s]',
227+
statement,
228+
obfuscated_statement,
229+
query_signature,
230+
e,
231+
)
232+
return None, self._map_parameter_type_error(e)
163233
except Exception as e:
164-
logged_statement = obfuscated_statement
165-
if self._config.log_unobfuscated_plans:
166-
logged_statement = statement
167-
logger.debug(
234+
self._log_failed_statement(
168235
'Failed to explain parameterized statement(%s)=[%s] | err=[%s]',
236+
statement,
237+
obfuscated_statement,
169238
query_signature,
170-
logged_statement,
171239
e,
172240
)
173241
raise

postgres/datadog_checks/postgres/statement_samples.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,16 @@
5353

5454
SUPPORTED_EXPLAIN_STATEMENTS = frozenset({'select', 'table', 'delete', 'insert', 'replace', 'update', 'with'})
5555

56+
# Deterministic parameterized-query explain failures that will not succeed on retry for a given query signature
57+
# (e.g. a type mismatch caused by untyped parameters).
58+
PARAMETERIZED_EXPLAIN_ERRORS_TO_CACHE = frozenset(
59+
{
60+
DBExplainError.undefined_function,
61+
DBExplainError.indeterminate_datatype,
62+
DBExplainError.datatype_mismatch,
63+
}
64+
)
65+
5666
# columns from pg_stat_activity which correspond to attributes common to all databases and are therefore stored in
5767
# under other standard keys
5868
pg_stat_activity_sample_exclude_keys = {
@@ -806,9 +816,13 @@ def _run_explain_safe(self, dbname, statement, obfuscated_statement, query_signa
806816
# instead of trying to explain it then failing
807817
if self._explain_parameterized_queries._is_parameterized_query(statement):
808818
if is_affirmative(self._config.query_samples.explain_parameterized_queries):
809-
return self._explain_parameterized_queries.explain_statement(
819+
plan, explain_err_code, err_msg = self._explain_parameterized_queries.explain_statement(
810820
dbname, statement, obfuscated_statement, query_signature
811821
)
822+
# Cache deterministic failures (e.g. type mismatches from untyped parameters)
823+
if explain_err_code in PARAMETERIZED_EXPLAIN_ERRORS_TO_CACHE:
824+
self._explain_errors_cache[query_signature] = (plan, explain_err_code, err_msg)
825+
return plan, explain_err_code, err_msg
812826
e = psycopg.errors.UndefinedParameter("Unable to explain parameterized query")
813827
self._log.debug(
814828
"Unable to collect execution plan, clients using the extended query protocol or prepared statements"

postgres/tests/test_explain_parameterized_queries.py

Lines changed: 113 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -184,7 +184,7 @@ def test_explain_parameterized_queries_explain_prepared_statement_no_plan_return
184184

185185
with mock.patch(
186186
'datadog_checks.postgres.explain_parameterized_queries.ExplainParameterizedQueries._execute_query_and_fetch_rows',
187-
return_value=None,
187+
return_value=[],
188188
):
189189
plan_dict, explain_err_code, err = check.statement_samples._run_and_track_explain(
190190
DB_NAME,
@@ -197,6 +197,52 @@ def test_explain_parameterized_queries_explain_prepared_statement_no_plan_return
197197
assert err is None
198198

199199

200+
@pytest.mark.integration
201+
@pytest.mark.usefixtures("dd_environment")
202+
@requires_over_12
203+
@pytest.mark.parametrize(
204+
"query,expected_error_code",
205+
[
206+
# the parameter appears only in `$1 IS NULL`, so its type can't be resolved while preparing
207+
("SELECT * FROM pg_settings WHERE $1 IS NULL", DBExplainError.indeterminate_datatype),
208+
# `setting` is text but `$1::int` pins $1 to int, leaving no `text = integer` operator
209+
(
210+
"SELECT * FROM pg_settings WHERE setting = $1 AND name = $1::int",
211+
DBExplainError.undefined_function,
212+
),
213+
],
214+
)
215+
def test_parameterized_explain_type_resolution_failure_is_handled(
216+
integration_check, dbm_instance, query, expected_error_code
217+
):
218+
"""A parameterized query whose parameter types can't be resolved can't be prepared or explained.
219+
220+
Real Postgres raises the type-resolution error while preparing the statement. The failure should
221+
surface as an explain error code (not raise) and be cached per query signature so we don't keep
222+
attempting to prepare a statement we already know we can't prepare.
223+
"""
224+
check = integration_check(dbm_instance)
225+
check.check(dbm_instance)
226+
query_signature = compute_sql_signature(query)
227+
228+
plan, explain_err_code, _ = check.statement_samples._run_and_track_explain(DB_NAME, query, query, query_signature)
229+
230+
assert plan is None
231+
assert explain_err_code == expected_error_code
232+
# the deterministic failure is cached so we don't re-attempt it every collection
233+
assert query_signature in check.statement_samples._explain_errors_cache
234+
235+
# the cached failure short-circuits subsequent attempts without re-querying Postgres
236+
with mock.patch.object(
237+
check.statement_samples._explain_parameterized_queries,
238+
'explain_statement',
239+
return_value=(None, expected_error_code, None),
240+
) as mock_explain:
241+
_, cached_err_code, _ = check.statement_samples._run_explain_safe(DB_NAME, query, query, query_signature)
242+
assert cached_err_code == expected_error_code
243+
mock_explain.assert_not_called()
244+
245+
200246
@requires_over_12
201247
def test_generate_prepared_statement_query_no_parameters(integration_check, dbm_instance):
202248
check = integration_check(dbm_instance)
@@ -232,21 +278,84 @@ def test_generate_prepared_statement_query_three_parameters(integration_check, d
232278

233279
@pytest.mark.unit
234280
@requires_over_12
235-
def test_create_prepared_statement_exception(integration_check, dbm_instance):
281+
# psycopg.errors.DatabaseError is the parent of the expected type-resolution errors, so it also guards that
282+
# the narrow except in _create_prepared_statement does not accidentally swallow unexpected database errors.
283+
@pytest.mark.parametrize("exception_class", [Exception, psycopg.errors.DatabaseError])
284+
def test_create_prepared_statement_exception(integration_check, dbm_instance, exception_class):
236285
check = integration_check(dbm_instance)
237286

238287
query = "SELECT * FROM pg_settings WHERE name = $1"
239288
query_signature = compute_sql_signature(query)
240289
with mock.patch(
241290
'datadog_checks.postgres.explain_parameterized_queries.ExplainParameterizedQueries._execute_query',
242-
side_effect=Exception,
291+
side_effect=exception_class,
243292
):
244-
with pytest.raises(Exception):
293+
with pytest.raises(exception_class):
245294
check.statement_samples._explain_parameterized_queries._create_prepared_statement(
246295
DB_NAME, query, query, query_signature
247296
)
248297

249298

299+
@pytest.mark.unit
300+
@requires_over_12
301+
def test_create_prepared_statement_datatype_mismatch_maps_to_code(integration_check, dbm_instance):
302+
"""A DatatypeMismatch during PREPARE means the statement can't be prepared, so it maps to the
303+
datatype_mismatch error code instead of raising.
304+
305+
IndeterminateDatatype and UndefinedFunction are covered end-to-end against real Postgres in
306+
test_parameterized_explain_type_resolution_failure_is_handled. DatatypeMismatch is exercised here
307+
because it isn't reliably triggered by a portable query.
308+
"""
309+
check = integration_check(dbm_instance)
310+
epq = check.statement_samples._explain_parameterized_queries
311+
312+
with mock.patch.object(epq, '_execute_query', side_effect=psycopg.errors.DatatypeMismatch("type mismatch")):
313+
result = epq._create_prepared_statement(
314+
None,
315+
"SELECT id FROM t WHERE id = $1",
316+
"SELECT id FROM t WHERE id = $1",
317+
"test_sig",
318+
)
319+
320+
assert result is not None
321+
assert result[0] == DBExplainError.datatype_mismatch
322+
323+
324+
@pytest.mark.unit
325+
@pytest.mark.parametrize(
326+
"exception_class,expected_error_code",
327+
[
328+
(psycopg.errors.IndeterminateDatatype, DBExplainError.indeterminate_datatype),
329+
(psycopg.errors.DatatypeMismatch, DBExplainError.datatype_mismatch),
330+
(psycopg.errors.UndefinedFunction, DBExplainError.undefined_function),
331+
],
332+
)
333+
def test_explain_prepared_statement_type_mismatch_maps_to_code(
334+
integration_check, dbm_instance, exception_class, expected_error_code
335+
):
336+
"""When EXPLAIN EXECUTE hits a parameter type-resolution error the statement can't be explained,
337+
so _explain_prepared_statement returns the specific mapped error code rather than raising."""
338+
check = integration_check(dbm_instance)
339+
epq = check.statement_samples._explain_parameterized_queries
340+
341+
with mock.patch.object(epq, '_generate_prepared_statement_query', return_value="EXECUTE dd_test(null)"):
342+
with mock.patch.object(
343+
epq,
344+
'_execute_query_and_fetch_rows',
345+
side_effect=exception_class("operator does not exist: bigint = text"),
346+
):
347+
rows, explain_error = epq._explain_prepared_statement(
348+
None,
349+
"SELECT id FROM t WHERE id = $1",
350+
"SELECT id FROM t WHERE id = $1",
351+
"test_sig",
352+
)
353+
354+
assert rows is None
355+
assert explain_error is not None
356+
assert explain_error[0] == expected_error_code
357+
358+
250359
@pytest.mark.unit
251360
@pytest.mark.parametrize(
252361
"query,statement_is_parameterized_query",

0 commit comments

Comments
 (0)