Skip to content

Commit 0668e6a

Browse files
committed
fix(metrics): port pgwatch2 top-N + 'other' bucket to pg_stat/statio_all_*
The four per-relation metrics (pg_stat_all_indexes, pg_stat_all_tables, pg_statio_all_tables, pg_statio_all_indexes) had no schema filter and a flat LIMIT 5000 truncation. On extension- or schema-heavy databases this overran prometheus.yml's sample_limit (10000) so the entire scrape was silently rejected, and the LIMIT tail was dropped without any aggregate row left behind — dashboard sums drifted. Port the gen2 (gitlab.com/postgres-ai/pgwatch2) approach faithfully instead of reinventing it: - Read pg_stat_user_*/pg_statio_user_* — pg_catalog, information_schema and pg_toast are excluded by the Postgres view itself, so we don't maintain a hand-curated nspname LIKE pattern that has to grow every time a new extension ships its own schema. - row_number() OVER (ORDER BY <relevance>) <= 100 per database. Rank by pg_total_relation_size for tables (big tables are the interesting ones; n_live_tup+n_dead_tup starved big-but-static tables) and by activity for indexes/IO views. - UNION ALL an 'other' row that sums the tail so totals stay correct under the cap. HAVING count(*) > 0 suppresses the row when nothing was truncated. - Skip rows with no I/O activity in the statio views — most of the tail on schema-heavy DBs is dead-cold relations. - Filter pg_temp% from index metrics so leftover temp objects from dead sessions stop leaking samples. Metric names and exposed tag_* labels are unchanged so Dashboards 8–11 keep working. Adds two compliance-vector tests that pin the pattern.
1 parent ae1d192 commit 0668e6a

2 files changed

Lines changed: 226 additions & 48 deletions

File tree

config/pgwatch-prometheus/metrics.yml

Lines changed: 180 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -1552,28 +1552,85 @@ metrics:
15521552
- total_relation_size_bytes
15531553
statement_timeout_seconds: 15
15541554
pg_stat_all_indexes:
1555+
# Top-N + "other" bucket pattern ported from pgwatch2 (gen2). Reads
1556+
# pg_stat_user_indexes so pg_catalog/information_schema/pg_toast are
1557+
# excluded by the Postgres view itself, no hand-curated nspname pattern.
1558+
# The "other" row aggregates the tail so totals stay correct under a
1559+
# hard cardinality cap.
15551560
sqls:
15561561
11: |
1557-
select /* pgwatch_generated */
1562+
with ranked as ( /* pgwatch_generated */
1563+
select
1564+
row_number() over (order by idx_scan desc nulls last) as rownum,
1565+
schemaname,
1566+
relname,
1567+
indexrelname,
1568+
idx_scan,
1569+
idx_tup_read,
1570+
idx_tup_fetch
1571+
from pg_stat_user_indexes
1572+
where not schemaname like E'pg\\_temp%'
1573+
)
1574+
select
15581575
current_database() as tag_datname,
15591576
schemaname as tag_schemaname,
15601577
relname as tag_relname,
15611578
indexrelname as tag_indexrelname,
15621579
idx_scan,
15631580
idx_tup_read,
15641581
idx_tup_fetch
1565-
from pg_stat_all_indexes
1566-
order by idx_scan desc
1567-
limit 5000
1582+
from ranked
1583+
where rownum <= 100
1584+
union all
1585+
select
1586+
current_database() as tag_datname,
1587+
'other'::text as tag_schemaname,
1588+
'other'::text as tag_relname,
1589+
'other'::text as tag_indexrelname,
1590+
coalesce(sum(idx_scan), 0)::int8 as idx_scan,
1591+
coalesce(sum(idx_tup_read), 0)::int8 as idx_tup_read,
1592+
coalesce(sum(idx_tup_fetch), 0)::int8 as idx_tup_fetch
1593+
from ranked
1594+
where rownum > 100
1595+
having count(*) > 0
15681596
gauges:
15691597
- idx_scan
15701598
- idx_tup_read
15711599
- idx_tup_fetch
15721600
statement_timeout_seconds: 15
15731601
pg_stat_all_tables:
1602+
# Top-N + "other" bucket pattern ported from pgwatch2 (gen2). Ranks by
1603+
# pg_total_relation_size — large tables are usually the interesting ones,
1604+
# which avoids starving big-but-static tables out of the top-N (the old
1605+
# n_live_tup+n_dead_tup ordering did exactly that).
15741606
sqls:
15751607
11: |
1576-
select /* pgwatch_generated */
1608+
with ranked as ( /* pgwatch_generated */
1609+
select
1610+
row_number() over (order by pg_total_relation_size(relid) desc nulls last) as rownum,
1611+
schemaname,
1612+
relname,
1613+
seq_scan,
1614+
seq_tup_read,
1615+
idx_scan,
1616+
idx_tup_fetch,
1617+
n_tup_ins,
1618+
n_tup_upd,
1619+
n_tup_del,
1620+
n_tup_hot_upd,
1621+
n_live_tup,
1622+
n_dead_tup,
1623+
last_vacuum,
1624+
last_autovacuum,
1625+
last_analyze,
1626+
last_autoanalyze,
1627+
vacuum_count,
1628+
autovacuum_count,
1629+
analyze_count,
1630+
autoanalyze_count
1631+
from pg_stat_user_tables
1632+
)
1633+
select
15771634
current_database() as tag_datname,
15781635
schemaname as tag_schemaname,
15791636
relname as tag_relname,
@@ -1592,10 +1649,30 @@ metrics:
15921649
extract(epoch from greatest(last_autoanalyze, last_analyze, '1970-01-01Z'))::int8 as last_analyze,
15931650
(vacuum_count + autovacuum_count) as vacuum_count,
15941651
(analyze_count + autoanalyze_count) as analyze_count
1595-
from
1596-
pg_stat_all_tables
1597-
order by n_live_tup + n_dead_tup desc
1598-
limit 5000
1652+
from ranked
1653+
where rownum <= 100
1654+
union all
1655+
select
1656+
current_database() as tag_datname,
1657+
'other'::text as tag_schemaname,
1658+
'other'::text as tag_relname,
1659+
coalesce(sum(seq_scan), 0)::int8 as seq_scan,
1660+
coalesce(sum(seq_tup_read), 0)::int8 as seq_tup_read,
1661+
coalesce(sum(idx_scan), 0)::int8 as idx_scan,
1662+
coalesce(sum(idx_tup_fetch), 0)::int8 as idx_tup_fetch,
1663+
coalesce(sum(n_tup_ins), 0)::int8 as n_tup_ins,
1664+
coalesce(sum(n_tup_upd), 0)::int8 as n_tup_upd,
1665+
coalesce(sum(n_tup_del), 0)::int8 as n_tup_del,
1666+
coalesce(sum(n_tup_hot_upd), 0)::int8 as n_tup_hot_upd,
1667+
coalesce(sum(n_live_tup), 0)::int8 as n_live_tup,
1668+
coalesce(sum(n_dead_tup), 0)::int8 as n_dead_tup,
1669+
0::int8 as last_vacuum,
1670+
0::int8 as last_analyze,
1671+
coalesce(sum(vacuum_count + autovacuum_count), 0)::int8 as vacuum_count,
1672+
coalesce(sum(analyze_count + autoanalyze_count), 0)::int8 as analyze_count
1673+
from ranked
1674+
where rownum > 100
1675+
having count(*) > 0
15991676
gauges:
16001677
- seq_scan
16011678
- seq_tup_read
@@ -2881,60 +2958,115 @@ metrics:
28812958
statement_timeout_seconds: 15
28822959
pg_statio_all_tables:
28832960
description: >
2884-
Retrieves table-level I/O statistics from the PostgreSQL `pg_statio_all_tables` view, providing insights into I/O operations for all tables.
2885-
It returns block-level read and hit statistics for heap, index, TOAST, and TOAST index operations broken down by schema and table.
2886-
Joined with pg_class for efficient ordering by table size.
2887-
This metric helps administrators monitor table-level I/O performance and identify which tables are generating the most I/O activity.
2961+
Retrieves table-level I/O statistics from `pg_statio_user_tables`, returning
2962+
block-level read and hit counters for heap, index, TOAST and TOAST-index pages.
2963+
Ports the pgwatch2 (gen2) top-N + `'other'` bucket pattern: ranks tables by
2964+
heap_blks_read, keeps the top 100, and folds the tail into a single `'other'`
2965+
row so totals remain accurate while cardinality stays bounded. Drops rows
2966+
with no I/O activity at all (every counter zero).
28882967
Compatible with all PostgreSQL versions.
28892968
sqls:
28902969
11: |-
2891-
select /* pgwatch_generated */
2970+
with ranked as ( /* pgwatch_generated */
2971+
select
2972+
row_number() over (order by heap_blks_read desc nulls last) as rownum,
2973+
schemaname,
2974+
relname,
2975+
heap_blks_read,
2976+
heap_blks_hit,
2977+
idx_blks_read,
2978+
idx_blks_hit,
2979+
toast_blks_read,
2980+
toast_blks_hit,
2981+
tidx_blks_read,
2982+
tidx_blks_hit
2983+
from pg_statio_user_tables
2984+
where
2985+
heap_blks_read > 0 or heap_blks_hit > 0
2986+
or idx_blks_read > 0 or idx_blks_hit > 0
2987+
or toast_blks_read > 0 or toast_blks_hit > 0
2988+
or tidx_blks_read > 0 or tidx_blks_hit > 0
2989+
)
2990+
select
28922991
(extract(epoch from now()) * 1e9)::int8 as epoch_ns,
28932992
current_database() as tag_datname,
2894-
s.schemaname as tag_schemaname,
2895-
s.relname as tag_relname,
2896-
s.heap_blks_read,
2897-
s.heap_blks_hit,
2898-
s.idx_blks_read,
2899-
s.idx_blks_hit,
2900-
s.toast_blks_read,
2901-
s.toast_blks_hit,
2902-
s.tidx_blks_read,
2903-
s.tidx_blks_hit
2904-
from
2905-
pg_statio_all_tables as s
2906-
join pg_class as c on
2907-
s.relname = c.relname
2908-
and s.schemaname = c.relnamespace::regnamespace::name
2909-
order by c.relpages desc
2910-
limit 5000;
2993+
schemaname as tag_schemaname,
2994+
relname as tag_relname,
2995+
heap_blks_read,
2996+
heap_blks_hit,
2997+
idx_blks_read,
2998+
idx_blks_hit,
2999+
toast_blks_read,
3000+
toast_blks_hit,
3001+
tidx_blks_read,
3002+
tidx_blks_hit
3003+
from ranked
3004+
where rownum <= 100
3005+
union all
3006+
select
3007+
(extract(epoch from now()) * 1e9)::int8 as epoch_ns,
3008+
current_database() as tag_datname,
3009+
'other'::text as tag_schemaname,
3010+
'other'::text as tag_relname,
3011+
coalesce(sum(heap_blks_read), 0)::int8 as heap_blks_read,
3012+
coalesce(sum(heap_blks_hit), 0)::int8 as heap_blks_hit,
3013+
coalesce(sum(idx_blks_read), 0)::int8 as idx_blks_read,
3014+
coalesce(sum(idx_blks_hit), 0)::int8 as idx_blks_hit,
3015+
coalesce(sum(toast_blks_read), 0)::int8 as toast_blks_read,
3016+
coalesce(sum(toast_blks_hit), 0)::int8 as toast_blks_hit,
3017+
coalesce(sum(tidx_blks_read), 0)::int8 as tidx_blks_read,
3018+
coalesce(sum(tidx_blks_hit), 0)::int8 as tidx_blks_hit
3019+
from ranked
3020+
where rownum > 100
3021+
having count(*) > 0;
29113022
gauges:
29123023
- '*'
29133024
statement_timeout_seconds: 15
29143025
pg_statio_all_indexes:
29153026
description: >
2916-
Retrieves index-level I/O statistics from the PostgreSQL `pg_statio_all_indexes` view, providing insights into I/O operations for all indexes.
2917-
It returns block-level read and hit statistics for index operations broken down by schema, table, and index name.
2918-
Joined with pg_class for efficient ordering by index size.
2919-
This metric helps administrators monitor index-level I/O performance and identify which indexes are generating the most I/O activity.
3027+
Retrieves index-level I/O statistics from `pg_statio_user_indexes`, returning
3028+
block-level read and hit counters per index. Ports the pgwatch2 (gen2)
3029+
top-N + `'other'` bucket pattern: ranks indexes by idx_blks_read, keeps the
3030+
top 100, folds the tail into a single `'other'` row, and drops indexes with
3031+
no I/O activity. Filters temp schemas.
29203032
Compatible with all PostgreSQL versions.
29213033
sqls:
29223034
11: |-
2923-
select /* pgwatch_generated */
3035+
with ranked as ( /* pgwatch_generated */
3036+
select
3037+
row_number() over (order by idx_blks_read desc nulls last) as rownum,
3038+
schemaname,
3039+
relname,
3040+
indexrelname,
3041+
idx_blks_read,
3042+
idx_blks_hit
3043+
from pg_statio_user_indexes
3044+
where
3045+
not schemaname like E'pg\\_temp%'
3046+
and (idx_blks_read > 0 or idx_blks_hit > 0)
3047+
)
3048+
select
29243049
(extract(epoch from now()) * 1e9)::int8 as epoch_ns,
29253050
current_database() as tag_datname,
2926-
s.schemaname as tag_schemaname,
2927-
s.relname as tag_relname,
2928-
s.indexrelname as tag_indexrelname,
2929-
s.idx_blks_read,
2930-
s.idx_blks_hit
2931-
from
2932-
pg_statio_all_indexes as s
2933-
join pg_class as c on
2934-
s.indexrelname = c.relname
2935-
and s.schemaname = c.relnamespace::regnamespace::name
2936-
order by c.relpages desc
2937-
limit 5000;
3051+
schemaname as tag_schemaname,
3052+
relname as tag_relname,
3053+
indexrelname as tag_indexrelname,
3054+
idx_blks_read,
3055+
idx_blks_hit
3056+
from ranked
3057+
where rownum <= 100
3058+
union all
3059+
select
3060+
(extract(epoch from now()) * 1e9)::int8 as epoch_ns,
3061+
current_database() as tag_datname,
3062+
'other'::text as tag_schemaname,
3063+
'other'::text as tag_relname,
3064+
'other'::text as tag_indexrelname,
3065+
coalesce(sum(idx_blks_read), 0)::int8 as idx_blks_read,
3066+
coalesce(sum(idx_blks_hit), 0)::int8 as idx_blks_hit
3067+
from ranked
3068+
where rownum > 100
3069+
having count(*) > 0;
29383070
gauges:
29393071
- '*'
29403072
statement_timeout_seconds: 15

tests/compliance_vectors/test_mr219_monitoring_guards.py

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,52 @@ def test_pgwatch_metrics_yml_pg_stat_statements_has_top_n_filter():
8080
assert "limit 100" in compact_sql
8181

8282

83+
def test_pgwatch_stat_views_use_topn_and_other_bucket():
84+
"""High-cardinality per-relation metrics must port the pgwatch2 (gen2)
85+
pattern: read pg_stat_user_*/pg_statio_user_* (so pg_catalog,
86+
information_schema and pg_toast are excluded by the Postgres view
87+
itself, no hand-curated nspname pattern), keep the top 100 by relevance,
88+
and aggregate the tail into a single `'other'` tag row so dashboard
89+
totals stay correct under a hard cardinality cap. Hand-rolled nspname
90+
LIKE filters or LIMIT-only truncation silently drop the tail and break
91+
sums on extension-heavy or schema-heavy databases.
92+
"""
93+
metrics = yaml.safe_load(
94+
(PROJECT_ROOT / "config/pgwatch-prometheus/metrics.yml").read_text()
95+
)
96+
expectations = {
97+
"pg_stat_all_indexes": "pg_stat_user_indexes",
98+
"pg_stat_all_tables": "pg_stat_user_tables",
99+
"pg_statio_all_tables": "pg_statio_user_tables",
100+
"pg_statio_all_indexes": "pg_statio_user_indexes",
101+
}
102+
for metric_name, base_view in expectations.items():
103+
for sql in metrics["metrics"][metric_name]["sqls"].values():
104+
compact_sql = _compact_sql(sql)
105+
assert base_view in compact_sql, metric_name
106+
# Top-N window + tail aggregation
107+
assert "row_number() over" in compact_sql, metric_name
108+
assert "rownum <= 100" in compact_sql, metric_name
109+
assert "rownum > 100" in compact_sql, metric_name
110+
assert "'other'" in compact_sql, metric_name
111+
# No unfiltered LIMIT-only truncation left in place
112+
assert "limit 5000" not in compact_sql, metric_name
113+
114+
115+
def test_pgwatch_statio_skips_zero_activity_rows():
116+
"""pg_statio_user_* tail is mostly zero-I/O rows on schema-heavy DBs.
117+
Filtering them out (pgwatch2 behavior) cuts cardinality before the
118+
top-N cap is even reached and keeps the `'other'` bucket meaningful.
119+
"""
120+
metrics = yaml.safe_load(
121+
(PROJECT_ROOT / "config/pgwatch-prometheus/metrics.yml").read_text()
122+
)
123+
for sql in metrics["metrics"]["pg_statio_all_tables"]["sqls"].values():
124+
assert "heap_blks_read > 0" in _compact_sql(sql)
125+
for sql in metrics["metrics"]["pg_statio_all_indexes"]["sqls"].values():
126+
assert "idx_blks_read > 0" in _compact_sql(sql)
127+
128+
83129
def test_pgwatch_dockerfile_sha_pin_and_patch_present():
84130
dockerfile = (PROJECT_ROOT / "pgwatch/Dockerfile").read_text()
85131

0 commit comments

Comments
 (0)