Skip to content

Commit fb693bf

Browse files
committed
fix: Also use reset_val for statement_timeout in settings metric
pgwatch sets statement_timeout per-metric during collection, which would mask the actual configured value. Apply the same reset_val fix as lock_timeout.
1 parent c7c2b27 commit fb693bf

2 files changed

Lines changed: 34 additions & 30 deletions

File tree

config/pgwatch-prometheus/metrics.yml

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -362,23 +362,23 @@ metrics:
362362
This metric collects various PostgreSQL server settings and configurations.
363363
It provides insights into the server's configuration, including version, memory settings, and other important parameters.
364364
This metric is useful for monitoring server settings and ensuring optimal performance.
365-
Note: For lock_timeout, we use reset_val instead of setting because pgwatch sets
366-
lock_timeout to 100ms during metric collection, which would mask the actual configured value.
365+
Note: For lock_timeout and statement_timeout, we use reset_val instead of setting because
366+
pgwatch overrides these during metric collection, which would mask the actual configured values.
367367
sqls:
368368
11: |-
369369
select /* pgwatch_generated */
370370
(extract(epoch from now()) * 1e9)::int8 as epoch_ns,
371371
current_database() as tag_datname,
372372
name as tag_setting_name,
373-
-- Use reset_val for lock_timeout because pgwatch sets it to 100ms during collection,
374-
-- which would mask the actual configured value. reset_val shows the true server config.
375-
case when name = 'lock_timeout' then reset_val else setting end as tag_setting_value,
373+
-- Use reset_val for lock_timeout/statement_timeout because pgwatch overrides them during
374+
-- collection (lock_timeout=100ms, statement_timeout per-metric), masking actual config.
375+
case when name in ('lock_timeout', 'statement_timeout') then reset_val else setting end as tag_setting_value,
376376
unit as tag_unit,
377377
category as tag_category,
378378
vartype as tag_vartype,
379-
case when (case when name = 'lock_timeout' then reset_val else setting end) ~ '^-?[0-9]+$' then (case when name = 'lock_timeout' then reset_val else setting end)::bigint else null end as numeric_value,
380-
-- For lock_timeout, compare reset_val with boot_val since source becomes 'session' during collection
381-
case when name = 'lock_timeout' then (case when reset_val = boot_val then 1 else 0 end) else (case when source <> 'default' then 0 else 1 end) end as is_default,
379+
case when (case when name in ('lock_timeout', 'statement_timeout') then reset_val else setting end) ~ '^-?[0-9]+$' then (case when name in ('lock_timeout', 'statement_timeout') then reset_val else setting end)::bigint else null end as numeric_value,
380+
-- For these settings, compare reset_val with boot_val since source becomes 'session' during collection
381+
case when name in ('lock_timeout', 'statement_timeout') then (case when reset_val = boot_val then 1 else 0 end) else (case when source <> 'default' then 0 else 1 end) end as is_default,
382382
1 as configured
383383
from pg_settings
384384
gauges:

tests/settings/test_settings_metric.py

Lines changed: 26 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -39,24 +39,28 @@ def test_settings_metric_exists(metrics_config: dict) -> None:
3939

4040

4141
@pytest.mark.unit
42-
def test_settings_metric_uses_reset_val_for_lock_timeout(metrics_config: dict) -> None:
42+
def test_settings_metric_uses_reset_val_for_overridden_settings(metrics_config: dict) -> None:
4343
"""
44-
Test that the settings metric uses reset_val for lock_timeout.
44+
Test that the settings metric uses reset_val for settings overridden by pgwatch.
4545
46-
This is critical because pgwatch sets lock_timeout to 100ms during metric
47-
collection, which would mask the actual configured value if we used 'setting'.
46+
This is critical because pgwatch overrides these during metric collection:
47+
- lock_timeout: set to 100ms
48+
- statement_timeout: set per-metric (e.g., 15s for settings)
49+
50+
Using 'setting' would mask the actual configured values.
4851
4952
See: https://gitlab.com/postgres-ai/postgres_ai/-/issues/61
5053
"""
5154
sql = metrics_config["metrics"]["settings"]["sqls"][11]
5255

53-
# Verify the SQL uses reset_val for lock_timeout
56+
# Verify the SQL uses reset_val for overridden settings
5457
assert "reset_val" in sql, "SQL should reference reset_val column"
5558
assert "lock_timeout" in sql, "SQL should reference lock_timeout"
59+
assert "statement_timeout" in sql, "SQL should reference statement_timeout"
5660

57-
# More specific check: ensure the CASE statement is used correctly
58-
assert "case when name = 'lock_timeout' then reset_val else setting end" in sql, \
59-
"SQL should use CASE statement to select reset_val for lock_timeout"
61+
# More specific check: ensure the CASE statement handles both settings
62+
assert "name in ('lock_timeout', 'statement_timeout')" in sql, \
63+
"SQL should use IN clause to handle both lock_timeout and statement_timeout"
6064

6165

6266
@pytest.mark.unit
@@ -92,43 +96,43 @@ def test_settings_metric_has_required_fields(metrics_config: dict) -> None:
9296

9397

9498
@pytest.mark.unit
95-
def test_settings_metric_numeric_value_uses_correct_source_for_lock_timeout(metrics_config: dict) -> None:
99+
def test_settings_metric_numeric_value_uses_correct_source_for_overridden_settings(metrics_config: dict) -> None:
96100
"""
97-
Test that numeric_value also uses reset_val for lock_timeout.
101+
Test that numeric_value also uses reset_val for overridden settings.
98102
99103
The numeric_value field should be derived from the same source as tag_setting_value
100104
to ensure consistency.
101105
"""
102106
sql = metrics_config["metrics"]["settings"]["sqls"][11]
103107

104-
# Count occurrences of the CASE expression for lock_timeout
108+
# Count occurrences of the CASE expression for overridden settings
105109
# It should appear twice: once for tag_setting_value and once for numeric_value
106-
case_expr = "case when name = 'lock_timeout' then reset_val else setting end"
110+
case_expr = "case when name in ('lock_timeout', 'statement_timeout') then reset_val else setting end"
107111
occurrences = sql.count(case_expr)
108112

109113
assert occurrences >= 2, \
110-
f"CASE expression for lock_timeout should appear at least twice (for tag_setting_value and numeric_value), found {occurrences}"
114+
f"CASE expression for overridden settings should appear at least twice (for tag_setting_value and numeric_value), found {occurrences}"
111115

112116

113117
@pytest.mark.unit
114-
def test_settings_metric_is_default_uses_boot_val_for_lock_timeout(metrics_config: dict) -> None:
118+
def test_settings_metric_is_default_uses_boot_val_for_overridden_settings(metrics_config: dict) -> None:
115119
"""
116-
Test that is_default compares reset_val with boot_val for lock_timeout.
120+
Test that is_default compares reset_val with boot_val for overridden settings.
117121
118-
When pgwatch sets lock_timeout during collection, the source becomes 'session',
119-
which would incorrectly report is_default=0. Instead, we should compare
120-
reset_val with boot_val to determine if the configured value is the default.
122+
When pgwatch overrides lock_timeout/statement_timeout during collection, the source
123+
becomes 'session', which would incorrectly report is_default=0. Instead, we should
124+
compare reset_val with boot_val to determine if the configured value is the default.
121125
122126
See: https://gitlab.com/postgres-ai/postgres_ai/-/issues/61
123127
"""
124128
sql = metrics_config["metrics"]["settings"]["sqls"][11]
125129

126-
# Verify is_default uses boot_val comparison for lock_timeout
130+
# Verify is_default uses boot_val comparison for overridden settings
127131
assert "boot_val" in sql, "SQL should reference boot_val column for is_default comparison"
128132

129-
# Check for the specific pattern that handles lock_timeout specially
130-
assert "case when name = 'lock_timeout' then" in sql and "boot_val" in sql, \
131-
"SQL should use special handling for lock_timeout in is_default calculation"
133+
# Check for the specific pattern that handles overridden settings specially
134+
assert "name in ('lock_timeout', 'statement_timeout')" in sql and "boot_val" in sql, \
135+
"SQL should use special handling for overridden settings in is_default calculation"
132136

133137

134138
@pytest.mark.integration

0 commit comments

Comments
 (0)