postgres-ai
diff --git a/‎.gitlab-ci.yml‎
Lines changed: 5 additions & 1 deletion b/‎.gitlab-ci.yml‎
Lines changed: 5 additions & 1 deletion
diff --git a/‎config/pgwatch-prometheus/metrics.yml‎
Lines changed: 230 additions & 46 deletions b/‎config/pgwatch-prometheus/metrics.yml‎
Lines changed: 230 additions & 46 deletions
@@ -194,7 +194,11 @@ reporter:tests:
     - su - postgres -c "psql -c 'SELECT version();'" || echo "PostgreSQL started"
   script:
     - chown -R postgres:postgres "$CI_PROJECT_DIR"
-    - su - postgres -c "cd \"$CI_PROJECT_DIR\" && python -m pytest --run-integration --cov=reporter --cov-report=term --cov-report=xml:coverage/reporter-coverage.xml tests/reporter"
+    # PGAI_USE_SYSTEM_PG tells tests/compliance_vectors/conftest.py to
+    # connect to the cluster started by `service postgresql start` above
+    # instead of letting pytest-postgresql spin up its own (which hangs
+    # in this image — see tests/reporter/conftest.py for the same fix).
+    - su - postgres -c "cd \"$CI_PROJECT_DIR\" && PGAI_USE_SYSTEM_PG=1 python -m pytest --run-integration --cov=reporter --cov-report=term --cov-report=xml:coverage/reporter-coverage.xml tests/reporter tests/compliance_vectors/test_mr262_pgwatch_topn_integration.py"
     # Fix ownership for artifact collection
     - chown -R root:root "$CI_PROJECT_DIR/coverage" || true
   coverage: '/TOTAL\s+\d+\s+\d+\s+(\d+)%/'
 
@@ -1552,8 +1552,36 @@ metrics:
       - total_relation_size_bytes
     statement_timeout_seconds: 15
   pg_stat_all_indexes:
+    description: >
+      Retrieves index-level scan/tuple counters from `pg_stat_all_indexes`,
+      bounded to the top 100 by `idx_scan` per database. Adapts the top-N
+      + `'$other$'` bucket pattern from pgwatch2 PostgresAI edition
+      (gitlab.com/postgres-ai/pgwatch2 — our fork of Cybertec's pgwatch2):
+      everything below the cap is summed into a single `'$other$'` row so
+      dashboard totals stay correct under the cap.
+      Reads pg_stat_all_indexes (not pg_stat_user_indexes) so pg_catalog,
+      pg_toast and _timescaledb_internal indexes stay visible: a hot
+      catalog index or a Timescale chunk index ranks in by activity, not
+      by schema membership. The `'$other$'` sentinel starts with `$`,
+      which is not legal as the first character of an unquoted Postgres
+      identifier, so it cannot collide with any real schema, table or
+      index name. Compatible with all Postgres versions.
     sqls:
       11: |
+        with ranked as (
+          select
+            row_number() over (
+              order by idx_scan desc nulls last,
+                       schemaname, relname, indexrelname
+            ) as rownum,
+            schemaname,
+            relname,
+            indexrelname,
+            idx_scan,
+            idx_tup_read,
+            idx_tup_fetch
+          from pg_stat_all_indexes
+        )
         select /* pgwatch_generated */
           current_database() as tag_datname,
           schemaname as tag_schemaname,
@@ -1562,17 +1590,79 @@ metrics:
           idx_scan,
           idx_tup_read,
           idx_tup_fetch
-        from pg_stat_all_indexes
-        order by idx_scan desc
-        limit 5000
+        from ranked
+        where rownum <= 100
+        union all
+        select
+          current_database() as tag_datname,
+          '$other$'::text as tag_schemaname,
+          '$other$'::text as tag_relname,
+          '$other$'::text as tag_indexrelname,
+          coalesce(sum(idx_scan), 0)::int8 as idx_scan,
+          coalesce(sum(idx_tup_read), 0)::int8 as idx_tup_read,
+          coalesce(sum(idx_tup_fetch), 0)::int8 as idx_tup_fetch
+        from ranked
+        where rownum > 100
+        group by ()
+        having count(*) > 0
     gauges:
       - idx_scan
       - idx_tup_read
       - idx_tup_fetch
     statement_timeout_seconds: 15
   pg_stat_all_tables:
+    description: >
+      Retrieves table-level activity counters from `pg_stat_all_tables`,
+      bounded to the top 100 per database. Adapts the top-N + `'$other$'`
+      bucket pattern from pgwatch2 PostgresAI edition
+      (gitlab.com/postgres-ai/pgwatch2): everything below the cap is summed
+      into a single `'$other$'` row so dashboard totals stay correct.
+      Reads pg_stat_all_tables (not pg_stat_user_tables) so pg_catalog,
+      pg_toast and _timescaledb_internal tables stay visible: a bloated
+      TOAST table or a huge Timescale chunk ranks in by size, not by
+      schema membership. Ranks by `pg_class.relpages` (8 KiB block count
+      cached in the catalog) joined on `relid` instead of calling
+      `pg_total_relation_size(relid)` per row: the catalog join is O(1)
+      lookup and cannot raise on a concurrently-dropped relation, whereas
+      the function opens the relation and toast index every call.
+      Large, infrequently-updated tables stay in scope. The `'$other$'`
+      sentinel starts with `$`, which is not legal as the first character
+      of an unquoted Postgres identifier, so it cannot collide with any
+      real schema or table name. `last_vacuum` / `last_analyze` on the
+      `'$other$'` row aggregate the most recent maintenance time across
+      the tail (not `0`), so dashboards don't render 1970-01-01 when the
+      tail has been vacuumed. Compatible with all Postgres versions.
     sqls:
       11: |
+        with ranked as (
+          select
+            row_number() over (
+              order by coalesce(c.relpages, 0) desc nulls last,
+                       s.schemaname, s.relname
+            ) as rownum,
+            s.schemaname,
+            s.relname,
+            s.seq_scan,
+            s.seq_tup_read,
+            s.idx_scan,
+            s.idx_tup_fetch,
+            s.n_tup_ins,
+            s.n_tup_upd,
+            s.n_tup_del,
+            s.n_tup_hot_upd,
+            s.n_live_tup,
+            s.n_dead_tup,
+            s.last_vacuum,
+            s.last_autovacuum,
+            s.last_analyze,
+            s.last_autoanalyze,
+            s.vacuum_count,
+            s.autovacuum_count,
+            s.analyze_count,
+            s.autoanalyze_count
+          from pg_stat_all_tables s
+          left join pg_class c on c.oid = s.relid
+        )
         select /* pgwatch_generated */
           current_database() as tag_datname,
           schemaname as tag_schemaname,
@@ -1592,10 +1682,31 @@ metrics:
           extract(epoch from greatest(last_autoanalyze, last_analyze, '1970-01-01Z'))::int8 as last_analyze,
           (vacuum_count + autovacuum_count) as vacuum_count,
           (analyze_count + autoanalyze_count) as analyze_count
-        from
-          pg_stat_all_tables
-        order by n_live_tup + n_dead_tup desc
-        limit 5000
+        from ranked
+        where rownum <= 100
+        union all
+        select
+          current_database() as tag_datname,
+          '$other$'::text as tag_schemaname,
+          '$other$'::text as tag_relname,
+          coalesce(sum(seq_scan), 0)::int8 as seq_scan,
+          coalesce(sum(seq_tup_read), 0)::int8 as seq_tup_read,
+          coalesce(sum(idx_scan), 0)::int8 as idx_scan,
+          coalesce(sum(idx_tup_fetch), 0)::int8 as idx_tup_fetch,
+          coalesce(sum(n_tup_ins), 0)::int8 as n_tup_ins,
+          coalesce(sum(n_tup_upd), 0)::int8 as n_tup_upd,
+          coalesce(sum(n_tup_del), 0)::int8 as n_tup_del,
+          coalesce(sum(n_tup_hot_upd), 0)::int8 as n_tup_hot_upd,
+          coalesce(sum(n_live_tup), 0)::int8 as n_live_tup,
+          coalesce(sum(n_dead_tup), 0)::int8 as n_dead_tup,
+          extract(epoch from greatest(max(last_autovacuum), max(last_vacuum), '1970-01-01Z'))::int8 as last_vacuum,
+          extract(epoch from greatest(max(last_autoanalyze), max(last_analyze), '1970-01-01Z'))::int8 as last_analyze,
+          coalesce(sum(vacuum_count + autovacuum_count), 0)::int8 as vacuum_count,
+          coalesce(sum(analyze_count + autoanalyze_count), 0)::int8 as analyze_count
+        from ranked
+        where rownum > 100
+        group by ()
+        having count(*) > 0
     gauges:
       - seq_scan
       - seq_tup_read
@@ -2881,60 +2992,133 @@ metrics:
     statement_timeout_seconds: 15
   pg_statio_all_tables:
     description: >
-      Retrieves table-level I/O statistics from the PostgreSQL `pg_statio_all_tables` view, providing insights into I/O operations for all tables.
-      It returns block-level read and hit statistics for heap, index, TOAST, and TOAST index operations broken down by schema and table.
-      Joined with pg_class for efficient ordering by table size.
-      This metric helps administrators monitor table-level I/O performance and identify which tables are generating the most I/O activity.
-      Compatible with all PostgreSQL versions.
+      Retrieves table-level I/O statistics from `pg_statio_all_tables`, returning
+      block-level read and hit counters for heap, index, TOAST and TOAST-index
+      pages. Adapts the top-N + `'$other$'` bucket pattern from pgwatch2
+      PostgresAI edition (gitlab.com/postgres-ai/pgwatch2): ranks tables by
+      heap_blks_read, keeps the top 100, and folds the tail into a single
+      `'$other$'` row so totals remain accurate while cardinality stays bounded.
+      Reads pg_statio_all_tables (not pg_statio_user_tables) so I/O on pg_catalog,
+      pg_toast and _timescaledb_internal stays visible — those tables enter the
+      top-N by activity, not by schema membership. The zero-counter row skip is
+      kept (those rows literally carry no information and are not identity-based).
+      The `'$other$'` sentinel starts with `$`, which is not legal as the first
+      character of an unquoted Postgres identifier, so it cannot collide with
+      any real schema or table name. Compatible with all Postgres versions.
     sqls:
       11: |-
+        with ranked as (
+          select
+            row_number() over (
+              order by heap_blks_read desc nulls last,
+                       schemaname, relname
+            ) as rownum,
+            schemaname,
+            relname,
+            heap_blks_read,
+            heap_blks_hit,
+            idx_blks_read,
+            idx_blks_hit,
+            toast_blks_read,
+            toast_blks_hit,
+            tidx_blks_read,
+            tidx_blks_hit
+          from pg_statio_all_tables
+          where
+            heap_blks_read > 0 or heap_blks_hit > 0
+            or idx_blks_read > 0 or idx_blks_hit > 0
+            or toast_blks_read > 0 or toast_blks_hit > 0
+            or tidx_blks_read > 0 or tidx_blks_hit > 0
+        )
         select /* pgwatch_generated */
           (extract(epoch from now()) * 1e9)::int8 as epoch_ns,
           current_database() as tag_datname,
-          s.schemaname as tag_schemaname,
-          s.relname as tag_relname,
-          s.heap_blks_read,
-          s.heap_blks_hit,
-          s.idx_blks_read,
-          s.idx_blks_hit,
-          s.toast_blks_read,
-          s.toast_blks_hit,
-          s.tidx_blks_read,
-          s.tidx_blks_hit
-        from
-          pg_statio_all_tables as s
-          join pg_class as c on
-            s.relname = c.relname
-            and s.schemaname = c.relnamespace::regnamespace::name
-        order by c.relpages desc
-        limit 5000;
+          schemaname as tag_schemaname,
+          relname as tag_relname,
+          heap_blks_read,
+          heap_blks_hit,
+          idx_blks_read,
+          idx_blks_hit,
+          toast_blks_read,
+          toast_blks_hit,
+          tidx_blks_read,
+          tidx_blks_hit
+        from ranked
+        where rownum <= 100
+        union all
+        select
+          (extract(epoch from now()) * 1e9)::int8 as epoch_ns,
+          current_database() as tag_datname,
+          '$other$'::text as tag_schemaname,
+          '$other$'::text as tag_relname,
+          coalesce(sum(heap_blks_read), 0)::int8 as heap_blks_read,
+          coalesce(sum(heap_blks_hit), 0)::int8 as heap_blks_hit,
+          coalesce(sum(idx_blks_read), 0)::int8 as idx_blks_read,
+          coalesce(sum(idx_blks_hit), 0)::int8 as idx_blks_hit,
+          coalesce(sum(toast_blks_read), 0)::int8 as toast_blks_read,
+          coalesce(sum(toast_blks_hit), 0)::int8 as toast_blks_hit,
+          coalesce(sum(tidx_blks_read), 0)::int8 as tidx_blks_read,
+          coalesce(sum(tidx_blks_hit), 0)::int8 as tidx_blks_hit
+        from ranked
+        where rownum > 100
+        group by ()
+        having count(*) > 0;
     gauges:
       - '*'
     statement_timeout_seconds: 15
   pg_statio_all_indexes:
     description: >
-      Retrieves index-level I/O statistics from the PostgreSQL `pg_statio_all_indexes` view, providing insights into I/O operations for all indexes.
-      It returns block-level read and hit statistics for index operations broken down by schema, table, and index name.
-      Joined with pg_class for efficient ordering by index size.
-      This metric helps administrators monitor index-level I/O performance and identify which indexes are generating the most I/O activity.
-      Compatible with all PostgreSQL versions.
+      Retrieves index-level I/O statistics from `pg_statio_all_indexes`, returning
+      block-level read and hit counters per index. Adapts the top-N + `'$other$'`
+      bucket pattern from pgwatch2 PostgresAI edition
+      (gitlab.com/postgres-ai/pgwatch2): ranks indexes by idx_blks_read, keeps the
+      top 100, folds the tail into a single `'$other$'` row, and drops indexes
+      with no I/O activity (zero-counter rows carry no information).
+      Reads pg_statio_all_indexes (not pg_statio_user_indexes) so catalog,
+      pg_toast and _timescaledb_internal indexes stay visible: a hot catalog
+      index will rank into the top-N by activity, not be hidden by schema name.
+      The `'$other$'` sentinel starts with `$`, which is not legal as the first
+      character of an unquoted Postgres identifier, so it cannot collide with
+      any real schema, table or index name. Compatible with all Postgres versions.
     sqls:
       11: |-
+        with ranked as (
+          select
+            row_number() over (
+              order by idx_blks_read desc nulls last,
+                       schemaname, relname, indexrelname
+            ) as rownum,
+            schemaname,
+            relname,
+            indexrelname,
+            idx_blks_read,
+            idx_blks_hit
+          from pg_statio_all_indexes
+          where idx_blks_read > 0 or idx_blks_hit > 0
+        )
         select /* pgwatch_generated */
           (extract(epoch from now()) * 1e9)::int8 as epoch_ns,
           current_database() as tag_datname,
-          s.schemaname as tag_schemaname,
-          s.relname as tag_relname,
-          s.indexrelname as tag_indexrelname,
-          s.idx_blks_read,
-          s.idx_blks_hit
-        from
-          pg_statio_all_indexes as s
-          join pg_class as c on
-            s.indexrelname = c.relname
-            and s.schemaname = c.relnamespace::regnamespace::name
-        order by c.relpages desc
-        limit 5000;
+          schemaname as tag_schemaname,
+          relname as tag_relname,
+          indexrelname as tag_indexrelname,
+          idx_blks_read,
+          idx_blks_hit
+        from ranked
+        where rownum <= 100
+        union all
+        select
+          (extract(epoch from now()) * 1e9)::int8 as epoch_ns,
+          current_database() as tag_datname,
+          '$other$'::text as tag_schemaname,
+          '$other$'::text as tag_relname,
+          '$other$'::text as tag_indexrelname,
+          coalesce(sum(idx_blks_read), 0)::int8 as idx_blks_read,
+          coalesce(sum(idx_blks_hit), 0)::int8 as idx_blks_hit
+        from ranked
+        where rownum > 100
+        group by ()
+        having count(*) > 0;
     gauges:
       - '*'
     statement_timeout_seconds: 15