11# Patched pgwatch build
22#
3- # Fixes: "unexpected extension X version input: 0.0" error that kills all
4- # metric gathering when the monitored DB has extensions whose version
5- # parses to 0 (e.g. supabase-dbdev 0.0.4 → regex extracts "0.0"
6- # → VersionToInt returns 0 → pgwatch treats it as invalid and aborts).
3+ # Fixes:
4+ # 1. "unexpected extension X version input: 0.0" error that kills all metric
5+ # gathering when the monitored DB has extensions whose version parses to 0
6+ # (e.g. supabase-dbdev 0.0.4 → regex extracts "0.0" → VersionToInt returns
7+ # 0 → pgwatch treats it as invalid and aborts). The one-line fix: skip the
8+ # extension instead of returning a fatal error from FetchRuntimeInfo.
79#
8- # The one-line fix: skip the extension instead of returning a fatal
9- # error from FetchRuntimeInfo.
10+ # 2. Prometheus sink wipes its per-DB metric cache on every scrape (regression
11+ # introduced upstream in v3.6.0, commit fb7abf39 / PR #790). This turns the
12+ # /pgwatch endpoint into a transient drain: scrapes between collector polls
13+ # return zero pg metrics; scrapes after multiple polls return the union and
14+ # can exceed Prometheus sample_limit. Both halves are the same bug. Fix:
15+ # remove the 3-line wipe so the cache holds the latest sample per metric
16+ # until the next poll overwrites it. See gitlab.com/postgres-ai/postgresai#195.
1017#
1118# Based on: cybertec-postgresql/pgwatch v3.7.0
1219
@@ -45,7 +52,7 @@ ARG TARGETARCH
4552COPY --from=uibuilder /src /pgwatch
4653COPY --from=uibuilder /src/internal/webui/build /pgwatch/internal/webui/build
4754
48- # Apply the fix : skip extensions with unparseable versions instead of aborting.
55+ # Patch 1 : skip extensions with unparseable versions instead of aborting.
4956# pgwatch's regex extracts only major.minor from extension versions. For
5057# extensions like supabase-dbdev (0.0.4), this yields "0.0" which
5158# VersionToInt() maps to 0 — treated as invalid, killing all metrics.
@@ -56,6 +63,39 @@ RUN grep -q 'return fmt.Errorf("unexpected extension %s version input: %s", ext,
5663RUN sed -i 's|return fmt.Errorf("unexpected extension %s version input: %s", ext, ver)|return nil /* skip unparseable extension version */|' \
5764 /pgwatch/internal/sources/conn.go
5865
66+ # Patch 2: keep the Prometheus sink's per-DB metric cache across scrapes.
67+ # Upstream commit fb7abf39 (v3.6.0, "improve Prometheus scrapping (#790)") added
68+ # a wipe of promAsyncMetricCache[dbname] at the end of every Collect(), turning
69+ # the cache into a transient drain. Effect: VM scrapes landing between collector
70+ # polls return zero pg metrics (empty Grafana); scrapes landing after multiple
71+ # poll cycles return the union of all of them at once and routinely exceed the
72+ # configured Prometheus sample_limit (rejected scrapes). Both halves are the
73+ # same bug.
74+ # Fix: remove the 3-line wipe so the cache holds the latest sample per metric
75+ # until overwritten by the next poll. Samples are emitted with their original
76+ # collection epoch via NewMetricWithTimestamp(), so VM deduplicates repeats at
77+ # storage time — re-emitting the same (metric, timestamp) across scrapes is a
78+ # no-op. The 10-min promScrapingStalenessHardDropLimit guard in
79+ # MetricStoreMessageToPromMetrics already covers the "collection stalled, stop
80+ # emitting" case the wipe was defending against.
81+ # Refs: gitlab.com/postgres-ai/postgresai#195
82+ RUN grep -Fq 'clear the cache for this db after metrics are collected' \
83+ /pgwatch/internal/sinks/prometheus.go \
84+ || (echo "ERROR: drain patch target not found in /pgwatch/internal/sinks/prometheus.go — upstream may have changed" ; exit 1)
85+ RUN grep -Fq 'for dbname, metricsMessages := range promAsyncMetricCache' \
86+ /pgwatch/internal/sinks/prometheus.go \
87+ || (echo "ERROR: drain patch loop header not found in /pgwatch/internal/sinks/prometheus.go — upstream may have changed" ; exit 1)
88+ # Remove the 3-line wipe (Lock + assignment-with-unique-comment + Unlock).
89+ RUN sed -i '/promAsyncMetricCacheLock\. Lock()$/{N;N;/clear the cache for this db after metrics are collected/d;}' \
90+ /pgwatch/internal/sinks/prometheus.go
91+ # Removing the wipe leaves the `dbname` loop variable unused — rename to `_`.
92+ RUN sed -i 's|for dbname, metricsMessages := range promAsyncMetricCache|for _, metricsMessages := range promAsyncMetricCache|' \
93+ /pgwatch/internal/sinks/prometheus.go
94+ RUN ! grep -Fq 'clear the cache for this db after metrics are collected' /pgwatch/internal/sinks/prometheus.go \
95+ || (echo "ERROR: drain patch applied but wipe comment still present in /pgwatch/internal/sinks/prometheus.go" ; exit 1)
96+ RUN ! grep -Fq 'for dbname, metricsMessages := range promAsyncMetricCache' /pgwatch/internal/sinks/prometheus.go \
97+ || (echo "ERROR: drain patch applied but dbname loop var still present in /pgwatch/internal/sinks/prometheus.go" ; exit 1)
98+
5999RUN cd /pgwatch && CGO_ENABLED=0 GOOS=$TARGETOS GOARCH=$TARGETARCH go build \
60100 -ldflags "-X 'main.version=3.7.0-patched'" \
61101 ./cmd/pgwatch
0 commit comments