922922# enough for read-only analytics, and the alternative is a permanently
923923# unrecoverable replica.
924924#
925+ # Every pg_resetwal -f invocation also touches /pgdata/needs-reindex-all:
926+ # pg_resetwal bypasses WAL replay, so any index update that was in flight
927+ # at snapshot time may have left torn pages ("unexpected zero page at
928+ # block N" surfaces in queries later, which is hard to debug after the
929+ # fact). The main container's startup hook picks up that flag and runs
930+ # REINDEX DATABASE on every user database before marking the replica
931+ # Ready.
932+ #
925933# Stage 1: if the first attempt fails with a WAL-recovery signature,
926934# short-circuit straight to pg_resetwal + retry. Retrying the same
927935# command won't help when recovery itself is the blocker.
@@ -946,6 +954,7 @@ postgres_single_or_resetwal() {{
946954 echo "WAL recovery failed (snapshot likely captured mid-online-backup) — running pg_resetwal -f and retrying" >&2
947955 rm -f "$logfile"
948956 pg_resetwal -f "$PGDATA"
957+ touch /pgdata/needs-reindex-all
949958 echo "$sql_input" | postgres --single -D "$PGDATA" postgres
950959 return $?
951960 fi
@@ -966,6 +975,7 @@ postgres_single_or_resetwal() {{
966975 echo "second attempt also failed — running pg_resetwal -f as a last resort and retrying" >&2
967976 rm -f "$logfile"
968977 pg_resetwal -f "$PGDATA"
978+ touch /pgdata/needs-reindex-all
969979 echo "$sql_input" | postgres --single -D "$PGDATA" postgres
970980}}
971981
@@ -1061,7 +1071,7 @@ ALTER ROLE ${{ANALYTICS_USERNAME}} WITH SUPERUSER;
10611071SQLEOF
10621072fi
10631073
1064- if [ -f /pgdata/needs-reindex ]; then
1074+ if [ -f /pgdata/needs-reindex ] || [ -f /pgdata/needs-reindex-all ] ; then
10651075 PGRO_STAGE=restored
10661076else
10671077 PGRO_STAGE=ready
@@ -1235,33 +1245,112 @@ echo "Auth setup complete"
12351245 image: Some ( pg_image) ,
12361246 command: Some ( vec![ "/bin/sh" . to_string( ) , "-c" . to_string( ) ] ) ,
12371247 args: Some ( vec![ r#"
1238- if [ -f /pgdata/needs-reindex ]; then
1248+ if [ -f /pgdata/needs-reindex ] || [ -f /pgdata/needs-reindex-all ] ; then
12391249 PG_MAJOR=$(cat /pgdata/pgdata/PG_VERSION)
12401250 (
12411251 while ! pg_isready -q -U postgres -d postgres; do sleep 2; done
12421252 psql -U postgres -d postgres -c "UPDATE _pgro.restore_info SET stage = 'reindexing', last_transition_time = now() WHERE id = 1;"
1243- for db in $(psql -U postgres -d postgres -At -c "SELECT datname FROM pg_database WHERE datallowconn AND datname <> 'template0'"); do
1244- INDEXES=$(psql -U postgres -d "$db" -At -c "
1245- SELECT DISTINCT indexrelid::regclass::text
1246- FROM pg_index i
1247- JOIN pg_attribute a ON a.attrelid = i.indexrelid
1248- WHERE a.attcollation <> 0 AND i.indisvalid;
1249- ")
1250- COUNT=$(echo "$INDEXES" | grep -c . || true)
1251- echo "Reindex after locale change: $db ($COUNT collation-dependent indexes)"
1252- N=0
1253- echo "$INDEXES" | while IFS= read -r idx; do
1254- [ -z "$idx" ] && continue
1255- N=$((N + 1))
1256- echo " [$N/$COUNT] $db: $idx"
1257- if [ "$PG_MAJOR" -ge 14 ]; then
1258- psql -U postgres -d "$db" -c "REINDEX INDEX CONCURRENTLY $idx;" 2>&1 || true
1259- else
1260- psql -U postgres -d "$db" -c "REINDEX INDEX $idx;" 2>&1 || true
1261- fi
1253+ # needs-reindex-all (pg_resetwal aftermath) can leave torn pages in
1254+ # ANY index, not just collation-dependent ones. A blind REINDEX
1255+ # DATABASE takes hours on the prod DBs; instead do a two-step
1256+ # smart pass:
1257+ #
1258+ # 1. Use the amcheck contrib extension to read each valid btree
1259+ # index and verify its structural invariants. Indexes that
1260+ # fail the check (including "unexpected zero page" corruption)
1261+ # get queued for REINDEX. Indexes that pass are left alone.
1262+ # For a healthy snapshot this finds nothing and reads
1263+ # ~index-size of disk instead of ~table-size to rewrite.
1264+ #
1265+ # 2. Blindly REINDEX every non-btree index in the DB (GIN, GiST,
1266+ # BRIN, hash). amcheck only covers btree; non-btree indexes
1267+ # are typically a small fraction of total index size so the
1268+ # cost is bounded, and skipping them risks the same
1269+ # post-resetwal corruption sneaking through.
1270+ #
1271+ # CONCURRENTLY when available (PG ≥ 14) so the work overlaps with
1272+ # whatever clients hit the pod after the readiness gate lifts.
1273+ if [ -f /pgdata/needs-reindex-all ]; then
1274+ for db in $(psql -U postgres -d postgres -At -c "SELECT datname FROM pg_database WHERE datallowconn AND datname <> 'template0'"); do
1275+ echo "Reindex after pg_resetwal: $db (smart pass via amcheck)"
1276+ psql -U postgres -d "$db" -c "CREATE EXTENSION IF NOT EXISTS amcheck;" 2>&1 || true
1277+
1278+ # Step 1: scan btree indexes; collect those that fail amcheck.
1279+ BTREE_INDEXES=$(psql -U postgres -d "$db" -At -c "
1280+ SELECT c.oid::regclass::text
1281+ FROM pg_class c
1282+ JOIN pg_am a ON a.oid = c.relam
1283+ JOIN pg_index i ON i.indexrelid = c.oid
1284+ WHERE c.relkind = 'i' AND a.amname = 'btree' AND i.indisvalid;
1285+ ")
1286+ BTREE_COUNT=$(echo "$BTREE_INDEXES" | grep -c . || true)
1287+ echo " amcheck scanning $BTREE_COUNT btree indexes in $db"
1288+ CORRUPT_BTREE=""
1289+ N=0
1290+ for idx in $BTREE_INDEXES; do
1291+ [ -z "$idx" ] && continue
1292+ N=$((N + 1))
1293+ # bt_index_check raises an error if the index is corrupt.
1294+ # Suppress its output and check exit code; an error → queue it.
1295+ if ! psql -U postgres -d "$db" -At -c "SELECT bt_index_check('$idx'::regclass);" > /dev/null 2>&1; then
1296+ echo " [$N/$BTREE_COUNT] CORRUPT: $db: $idx"
1297+ CORRUPT_BTREE="$CORRUPT_BTREE $idx"
1298+ fi
1299+ done
1300+
1301+ # Step 2: list non-btree indexes for blind reindex.
1302+ NONBTREE_INDEXES=$(psql -U postgres -d "$db" -At -c "
1303+ SELECT c.oid::regclass::text
1304+ FROM pg_class c
1305+ JOIN pg_am a ON a.oid = c.relam
1306+ JOIN pg_index i ON i.indexrelid = c.oid
1307+ WHERE c.relkind = 'i' AND a.amname <> 'btree' AND i.indisvalid;
1308+ ")
1309+ NONBTREE_COUNT=$(echo "$NONBTREE_INDEXES" | grep -c . || true)
1310+
1311+ TO_REINDEX="$CORRUPT_BTREE $NONBTREE_INDEXES"
1312+ TOTAL=$(echo "$TO_REINDEX" | tr ' ' '\n' | grep -c . || true)
1313+ echo " $db: $TOTAL indexes to REINDEX (corrupt btree + all non-btree=$NONBTREE_COUNT)"
1314+ N=0
1315+ for idx in $TO_REINDEX; do
1316+ [ -z "$idx" ] && continue
1317+ N=$((N + 1))
1318+ echo " [$N/$TOTAL] $db: REINDEX $idx"
1319+ if [ "$PG_MAJOR" -ge 14 ]; then
1320+ psql -U postgres -d "$db" -c "REINDEX INDEX CONCURRENTLY $idx;" 2>&1 || true
1321+ else
1322+ psql -U postgres -d "$db" -c "REINDEX INDEX $idx;" 2>&1 || true
1323+ fi
1324+ done
1325+ done
1326+ rm -f /pgdata/needs-reindex-all
1327+ # needs-reindex (collation-dependent only) is a strict subset of
1328+ # what we just did, so clear it too.
1329+ rm -f /pgdata/needs-reindex
1330+ elif [ -f /pgdata/needs-reindex ]; then
1331+ for db in $(psql -U postgres -d postgres -At -c "SELECT datname FROM pg_database WHERE datallowconn AND datname <> 'template0'"); do
1332+ INDEXES=$(psql -U postgres -d "$db" -At -c "
1333+ SELECT DISTINCT indexrelid::regclass::text
1334+ FROM pg_index i
1335+ JOIN pg_attribute a ON a.attrelid = i.indexrelid
1336+ WHERE a.attcollation <> 0 AND i.indisvalid;
1337+ ")
1338+ COUNT=$(echo "$INDEXES" | grep -c . || true)
1339+ echo "Reindex after locale change: $db ($COUNT collation-dependent indexes)"
1340+ N=0
1341+ echo "$INDEXES" | while IFS= read -r idx; do
1342+ [ -z "$idx" ] && continue
1343+ N=$((N + 1))
1344+ echo " [$N/$COUNT] $db: $idx"
1345+ if [ "$PG_MAJOR" -ge 14 ]; then
1346+ psql -U postgres -d "$db" -c "REINDEX INDEX CONCURRENTLY $idx;" 2>&1 || true
1347+ else
1348+ psql -U postgres -d "$db" -c "REINDEX INDEX $idx;" 2>&1 || true
1349+ fi
1350+ done
12621351 done
1263- done
1264- rm -f /pgdata/needs-reindex
1352+ rm -f /pgdata/needs-reindex
1353+ fi
12651354 psql -U postgres -d postgres -c "UPDATE _pgro.restore_info SET stage = 'ready', last_transition_time = now() WHERE id = 1;"
12661355 echo "Background reindex complete"
12671356 ) &
@@ -1308,7 +1397,7 @@ exec postgres -D /pgdata/pgdata ${PGRO_LOG_LEVEL:+-c log_min_messages=$PGRO_LOG_
13081397 command: Some ( vec![
13091398 "/bin/sh" . to_string( ) ,
13101399 "-c" . to_string( ) ,
1311- "pg_isready -U postgres -d postgres && [ ! -f /pgdata/needs-reindex ]" . to_string( ) ,
1400+ "pg_isready -U postgres -d postgres && [ ! -f /pgdata/needs-reindex ] && [ ! -f /pgdata/needs-reindex-all ] " . to_string( ) ,
13121401 ] ) ,
13131402 } ) ,
13141403 initial_delay_seconds: Some ( 5 ) ,
0 commit comments