Skip to content

Commit b906344

Browse files
joelmarcotteclaude
andauthored
[postgres] Qualify diagnose category with instance host (DataDog#23620)
* [postgres] Qualify diagnose category with instance host to distinguish multiple instances Co-Authored-By: Claude Sonnet 4.6 (1M context) <noreply@anthropic.com> * [postgres] Add changelog entry for DataDog#23620 Co-Authored-By: Claude Sonnet 4.6 (1M context) <noreply@anthropic.com> * [postgres] Use `instance=<host>` prefix for diagnose category Replaces the `postgres:<host>` prefix on the diagnose category with `instance=<host>` so the host attribution reads as a key/value pair. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> * [postgres] Use canonical database_identifier in diagnose category Replaces the hand-built `host:port` value with `check.database_identifier`, the canonical instance identifier already used in metadata, health events, and statement payloads. Categories now match what's surfaced in the UI and other logs/debug channels. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> * [postgres] Fall back to host:port when database_identifier is unavailable `database_identifier` resolves a Template over `_config` and `tags`, which can fail when the very config diagnose is meant to surface is broken. Wrap it in a try/except and fall back to the raw `host:port` so the diagnose category is always populated. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> --------- Co-authored-by: Claude Sonnet 4.6 (1M context) <noreply@anthropic.com>
1 parent 1befb90 commit b906344

2 files changed

Lines changed: 52 additions & 41 deletions

File tree

postgres/changelog.d/23620.fixed

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Qualify the diagnose category with the instance host so multiple instances are distinguishable in `datadog-agent diagnose` output.

postgres/datadog_checks/postgres/diagnose.py

Lines changed: 51 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,6 @@
2727
)
2828
from .version_utils import V9_6, VersionUtils
2929

30-
CATEGORY_POSTGRES = "postgres"
31-
3230
# Recommended minimum track_activity_query_size. Default Postgres value is 1024, which
3331
# silently truncates queries and breaks explain plan collection.
3432
RECOMMENDED_TRACK_ACTIVITY_QUERY_SIZE = 4096
@@ -207,7 +205,7 @@ def _open_probe_connection(self, dbname):
207205
diagnosis="Failed to connect to {host} (dbname={db}) as {user}: {err}".format(
208206
host=host_desc, db=dbname, user=username, err=e
209207
),
210-
category=CATEGORY_POSTGRES,
208+
category=self._category,
211209
description=DIAGNOSTIC_METADATA[code]["description"],
212210
remediation=build_remediation(code),
213211
rawerror=str(e),
@@ -216,7 +214,7 @@ def _open_probe_connection(self, dbname):
216214
self._check.diagnosis.success(
217215
name=DatabaseConfigurationError.connection_failure.value,
218216
diagnosis="Connected to {host} (dbname={db}) as {user}".format(host=host_desc, db=dbname, user=username),
219-
category=CATEGORY_POSTGRES,
217+
category=self._category,
220218
)
221219
return conn
222220

@@ -230,15 +228,15 @@ def _diagnose_version(self, conn):
230228
self._fail(
231229
code,
232230
diagnosis="Unable to determine Postgres version: {}".format(e),
233-
category=CATEGORY_POSTGRES,
231+
category=self._category,
234232
rawerror=str(e),
235233
)
236234
return
237235
if not row or row[0] is None:
238236
self._fail(
239237
code,
240238
diagnosis="Unable to determine Postgres version: SHOW SERVER_VERSION returned no rows.",
241-
category=CATEGORY_POSTGRES,
239+
category=self._category,
242240
)
243241
return
244242
raw_version = row[0]
@@ -248,23 +246,23 @@ def _diagnose_version(self, conn):
248246
self._fail(
249247
code,
250248
diagnosis="Unable to parse Postgres version {!r}: {}".format(raw_version, e),
251-
category=CATEGORY_POSTGRES,
249+
category=self._category,
252250
rawerror=str(e),
253251
)
254252
return
255253
if version < V9_6:
256254
self._fail(
257255
code,
258256
diagnosis="Postgres version {} is below the minimum supported version (9.6).".format(raw_version),
259-
category=CATEGORY_POSTGRES,
257+
category=self._category,
260258
description=DIAGNOSTIC_METADATA[code]["description"],
261259
remediation=build_remediation(code),
262260
)
263261
return
264262
self._check.diagnosis.success(
265263
name=code.value,
266264
diagnosis="Postgres version {} is supported.".format(raw_version),
267-
category=CATEGORY_POSTGRES,
265+
category=self._category,
268266
)
269267

270268
def _diagnose_shared_preload_libraries(self, conn):
@@ -281,7 +279,7 @@ def _diagnose_shared_preload_libraries(self, conn):
281279
"pg_monitor members. Grant pg_monitor to the datadog user so this "
282280
"diagnostic can run."
283281
),
284-
category=CATEGORY_POSTGRES,
282+
category=self._category,
285283
description=DIAGNOSTIC_METADATA[code]["description"],
286284
remediation=build_remediation(DatabaseConfigurationError.missing_pg_monitor_role),
287285
)
@@ -290,7 +288,7 @@ def _diagnose_shared_preload_libraries(self, conn):
290288
self._check.diagnosis.success(
291289
name=code.value,
292290
diagnosis="shared_preload_libraries contains pg_stat_statements.",
293-
category=CATEGORY_POSTGRES,
291+
category=self._category,
294292
)
295293
return
296294
self._fail(
@@ -299,7 +297,7 @@ def _diagnose_shared_preload_libraries(self, conn):
299297
"shared_preload_libraries = '{}' does not contain pg_stat_statements; DBM query metrics "
300298
"will not be collected until the server is restarted with it loaded."
301299
).format(libs),
302-
category=CATEGORY_POSTGRES,
300+
category=self._category,
303301
description=DIAGNOSTIC_METADATA[code]["description"],
304302
remediation=build_remediation(code),
305303
)
@@ -317,7 +315,7 @@ def _diagnose_track_activity_query_size(self, conn):
317315
self._check.diagnosis.success(
318316
name=code.value,
319317
diagnosis="track_activity_query_size = {} (>= {}).".format(size, RECOMMENDED_TRACK_ACTIVITY_QUERY_SIZE),
320-
category=CATEGORY_POSTGRES,
318+
category=self._category,
321319
)
322320
return
323321
self._check.diagnosis.warning(
@@ -326,7 +324,7 @@ def _diagnose_track_activity_query_size(self, conn):
326324
"track_activity_query_size = {} is below the recommended {}; long queries will be "
327325
"truncated and may not be explainable."
328326
).format(size, RECOMMENDED_TRACK_ACTIVITY_QUERY_SIZE),
329-
category=CATEGORY_POSTGRES,
327+
category=self._category,
330328
description=DIAGNOSTIC_METADATA[code]["description"],
331329
remediation=build_remediation(code),
332330
)
@@ -340,13 +338,13 @@ def _diagnose_track_io_timing(self, conn):
340338
self._check.diagnosis.success(
341339
name=code.value,
342340
diagnosis="track_io_timing is on.",
343-
category=CATEGORY_POSTGRES,
341+
category=self._category,
344342
)
345343
return
346344
self._check.diagnosis.warning(
347345
name=code.value,
348346
diagnosis="track_io_timing = {}; I/O timing columns will not be collected.".format(raw),
349-
category=CATEGORY_POSTGRES,
347+
category=self._category,
350348
description=DIAGNOSTIC_METADATA[code]["description"],
351349
remediation=build_remediation(code),
352350
)
@@ -367,15 +365,15 @@ def _diagnose_pg_stat_statements_max(self, conn):
367365
self._check.diagnosis.success(
368366
name=code.value,
369367
diagnosis="pg_stat_statements.max = {} (<= threshold {}).".format(value, threshold),
370-
category=CATEGORY_POSTGRES,
368+
category=self._category,
371369
)
372370
return
373371
self._check.diagnosis.warning(
374372
name=code.value,
375373
diagnosis=(
376374
"pg_stat_statements.max = {} exceeds the threshold of {}; the collection query may run slowly."
377375
).format(value, threshold),
378-
category=CATEGORY_POSTGRES,
376+
category=self._category,
379377
description=DIAGNOSTIC_METADATA[code]["description"],
380378
remediation=build_remediation(code),
381379
)
@@ -391,7 +389,7 @@ def _diagnose_pg_monitor_role(self, conn):
391389
self._fail(
392390
code,
393391
diagnosis="Unable to check pg_monitor role membership: {}".format(e),
394-
category=CATEGORY_POSTGRES,
392+
category=self._category,
395393
description=DIAGNOSTIC_METADATA[code]["description"],
396394
remediation=build_remediation(code),
397395
rawerror=str(e),
@@ -408,7 +406,7 @@ def _diagnose_pg_monitor_role(self, conn):
408406
self._fail(
409407
code,
410408
diagnosis="Unable to check pg_monitor role membership: {}".format(e),
411-
category=CATEGORY_POSTGRES,
409+
category=self._category,
412410
description=DIAGNOSTIC_METADATA[code]["description"],
413411
remediation=build_remediation(code),
414412
rawerror=str(e),
@@ -418,13 +416,13 @@ def _diagnose_pg_monitor_role(self, conn):
418416
self._check.diagnosis.success(
419417
name=code.value,
420418
diagnosis="Current user is a member of pg_monitor.",
421-
category=CATEGORY_POSTGRES,
419+
category=self._category,
422420
)
423421
return
424422
self._fail(
425423
code,
426424
diagnosis=("The datadog user is not a member of pg_monitor; other users' activity rows will be masked."),
427-
category=CATEGORY_POSTGRES,
425+
category=self._category,
428426
description=DIAGNOSTIC_METADATA[code]["description"],
429427
remediation=build_remediation(code),
430428
)
@@ -446,7 +444,7 @@ def _diagnose_pg_stat_activity_access(self, conn):
446444
self._fail(
447445
DatabaseConfigurationError.undefined_activity_view,
448446
diagnosis="Unable to query {}: {}".format(view, e),
449-
category=CATEGORY_POSTGRES,
447+
category=self._category,
450448
description=DIAGNOSTIC_METADATA[DatabaseConfigurationError.undefined_activity_view]["description"],
451449
remediation=build_remediation(DatabaseConfigurationError.undefined_activity_view),
452450
rawerror=str(e),
@@ -459,15 +457,15 @@ def _diagnose_pg_stat_activity_access(self, conn):
459457
"{} rows in {} are masked as '<insufficient privilege>'; activity samples will miss "
460458
"other users' queries."
461459
).format(masked, view),
462-
category=CATEGORY_POSTGRES,
460+
category=self._category,
463461
description=DIAGNOSTIC_METADATA[code]["description"],
464462
remediation=build_remediation(code),
465463
)
466464
return
467465
self._check.diagnosis.success(
468466
name=code.value,
469467
diagnosis="{} is readable with full query visibility.".format(view),
470-
category=CATEGORY_POSTGRES,
468+
category=self._category,
471469
)
472470

473471
def _diagnose_pg_stat_database_access(self, conn):
@@ -487,7 +485,7 @@ def _diagnose_pg_stat_database_access(self, conn):
487485
self._fail(
488486
code,
489487
diagnosis="Unable to SELECT from pg_stat_database: {}".format(e),
490-
category=CATEGORY_POSTGRES,
488+
category=self._category,
491489
description=DIAGNOSTIC_METADATA[code]["description"],
492490
remediation=build_remediation(code),
493491
rawerror=str(e),
@@ -496,7 +494,7 @@ def _diagnose_pg_stat_database_access(self, conn):
496494
self._check.diagnosis.success(
497495
name=code.value,
498496
diagnosis="pg_stat_database is readable.",
499-
category=CATEGORY_POSTGRES,
497+
category=self._category,
500498
)
501499

502500
def _diagnose_datadog_schema(self, conn, dbname=None, failed=None):
@@ -514,13 +512,13 @@ def _diagnose_datadog_schema(self, conn, dbname=None, failed=None):
514512
self._check.diagnosis.success(
515513
name=code.value,
516514
diagnosis="`datadog` schema exists in {}.".format(dbname),
517-
category=CATEGORY_POSTGRES,
515+
category=self._category,
518516
)
519517
return
520518
self._fail(
521519
code,
522520
diagnosis="`datadog` schema is missing in {}; DBM setup is incomplete.".format(dbname),
523-
category=CATEGORY_POSTGRES,
521+
category=self._category,
524522
description=DIAGNOSTIC_METADATA[code]["description"],
525523
remediation=build_remediation(code),
526524
failed_codes=failed,
@@ -557,13 +555,13 @@ def _diagnose_schema_usage(self, conn, dbname, schema, failed):
557555
self._check.diagnosis.success(
558556
name=code.value,
559557
diagnosis="datadog has USAGE on schema `{}` in {}.".format(schema, dbname),
560-
category=CATEGORY_POSTGRES,
558+
category=self._category,
561559
)
562560
return
563561
self._fail(
564562
code,
565563
diagnosis="datadog is missing USAGE on schema `{}` in {}.".format(schema, dbname),
566-
category=CATEGORY_POSTGRES,
564+
category=self._category,
567565
description=build_description(code, schema=schema),
568566
remediation=build_remediation(code, schema=schema),
569567
failed_codes=failed,
@@ -585,13 +583,13 @@ def _diagnose_pg_stat_statements_extension(self, conn, dbname=None, failed=None)
585583
self._check.diagnosis.success(
586584
name=created.value,
587585
diagnosis="pg_stat_statements extension is installed in schema `{}` in {}.".format(row, dbname),
588-
category=CATEGORY_POSTGRES,
586+
category=self._category,
589587
)
590588
return
591589
self._fail(
592590
created,
593591
diagnosis="pg_stat_statements extension is not installed in {}.".format(dbname),
594-
category=CATEGORY_POSTGRES,
592+
category=self._category,
595593
description=build_description(created, dbname=dbname),
596594
remediation=build_remediation(created, dbname=dbname),
597595
failed_codes=failed,
@@ -632,7 +630,7 @@ def _diagnose_pg_stat_statements_readable(self, conn, dbname=None, failed=None):
632630
self._fail(
633631
code,
634632
diagnosis=diagnosis,
635-
category=CATEGORY_POSTGRES,
633+
category=self._category,
636634
description=DIAGNOSTIC_METADATA[code]["description"],
637635
remediation=remediation,
638636
rawerror=str(e),
@@ -642,7 +640,7 @@ def _diagnose_pg_stat_statements_readable(self, conn, dbname=None, failed=None):
642640
self._check.diagnosis.success(
643641
name=code.value,
644642
diagnosis="{} is readable in {}.".format(view, dbname),
645-
category=CATEGORY_POSTGRES,
643+
category=self._category,
646644
)
647645

648646
def _diagnose_explain_function(self, conn, dbname=None, failed=None):
@@ -675,7 +673,7 @@ def _diagnose_explain_function(self, conn, dbname=None, failed=None):
675673
diagnosis="{} cannot be executed in {}; execution plans cannot be collected: {}".format(
676674
explain_function, dbname, e
677675
),
678-
category=CATEGORY_POSTGRES,
676+
category=self._category,
679677
description=build_description(code, explain_function=explain_function),
680678
remediation=build_remediation(code, explain_function=explain_function),
681679
rawerror=str(e),
@@ -687,15 +685,15 @@ def _diagnose_explain_function(self, conn, dbname=None, failed=None):
687685
self._check.diagnosis.success(
688686
name=code.value,
689687
diagnosis="{} executed successfully in {}.".format(explain_function, dbname),
690-
category=CATEGORY_POSTGRES,
688+
category=self._category,
691689
)
692690
return
693691
self._fail(
694692
code,
695693
diagnosis="{} did not return an execution plan in {}; execution plans cannot be collected.".format(
696694
explain_function, dbname
697695
),
698-
category=CATEGORY_POSTGRES,
696+
category=self._category,
699697
description=build_description(code, explain_function=explain_function),
700698
remediation=build_remediation(code, explain_function=explain_function),
701699
failed_codes=failed,
@@ -716,7 +714,7 @@ def _diagnose_config_validation(self):
716714
self._check.diagnosis.warning(
717715
name=code.value,
718716
diagnosis="Postgres config validation did not complete (check initialization failed).",
719-
category=CATEGORY_POSTGRES,
717+
category=self._category,
720718
)
721719
return
722720

@@ -729,7 +727,7 @@ def _diagnose_config_validation(self):
729727
self._check.diagnosis.success(
730728
name=code.value,
731729
diagnosis=diagnosis_line,
732-
category=CATEGORY_POSTGRES,
730+
category=self._category,
733731
)
734732
return
735733

@@ -756,13 +754,25 @@ def _diagnose_config_validation(self):
756754
method(
757755
name=code.value,
758756
diagnosis=diagnosis_line,
759-
category=CATEGORY_POSTGRES,
757+
category=self._category,
760758
description=description,
761759
remediation=remediation,
762760
)
763761

764762
# -- helpers --------------------------------------------------------------
765763

764+
@property
765+
def _category(self) -> str:
766+
# Fall back to host:port — diagnose must keep working on broken config,
767+
# which is exactly when database_identifier (templated over config+tags) can blow up.
768+
try:
769+
identifier = self._check.database_identifier
770+
except Exception:
771+
identifier = self._host_desc()
772+
if len(identifier) > 27:
773+
identifier = f"{identifier[:12]}...{identifier[-12:]}"
774+
return f"instance={identifier}"
775+
766776
def _host_desc(self):
767777
host = self._check._config.host or "localhost"
768778
port = self._check._config.port

0 commit comments

Comments
 (0)