From 7b9439fca0f9d444e33835a3410a7f228459f4ce Mon Sep 17 00:00:00 2001
From: Michal Kleczek <michal@kleczek.org>
Date: Wed, 15 Apr 2026 00:09:25 +0200
Subject: [PATCH] fix: Flush pool as late as possible during schema cache
 reloading (#4645)

retryingSchemaCacheLoad flushes the pool upon every retry before it starts reloading the schema. This is too early as schema reloading might take some time during which new connections might be acquired. The consequence is that:
* upon successful schema cache reload we might have some connections created with the old schema cache
* we close connections upon each retry and under load we will keep closing and re-opening connections until schema cache load succeeds

This change is to make sure we flush the pool only after successful schema cache querying but before loading (so that connections acquired during loading wait for it and do not interfere with timing the loading process).
---
 CHANGELOG.md                                      | 4 ++++
 src/PostgREST/AppState.hs                         | 6 ++++--
 test/io/test_io.py                                | 8 ++++----
 test/observability/Observation/SchemaCacheSpec.hs | 6 ++----
 4 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9269d97530..fd5316a5d1 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -8,6 +8,10 @@ All notable changes to this project will be documented in this file. From versio
 
 - Log when the pool is released during schema cache reload on `log-level=debug` by @mkleczek in #4668
 
+### Fixed
+
+- Fix unnecessary connection pool flushes during schema cache reloading by @mkleczek in #4645
+
 ## [14.9] - 2026-04-10
 
 ### Added
diff --git a/src/PostgREST/AppState.hs b/src/PostgREST/AppState.hs
index f265fa7e36..2ab26fdbd5 100644
--- a/src/PostgREST/AppState.hs
+++ b/src/PostgREST/AppState.hs
@@ -313,8 +313,6 @@ retryingSchemaCacheLoad appState@AppState{stateObserver=observer, stateMainThrea
       observer $ ConnectionRetryObs delay
       putNextListenerDelay appState delay
 
-    flushPool appState
-
     (,) <$> qPgVersion <*> (qInDbConfig *> qSchemaCache)
   )
   where
@@ -363,6 +361,10 @@ retryingSchemaCacheLoad appState@AppState{stateObserver=observer, stateMainThrea
           -- IORef on putSchemaCache. This is why SCacheStatus is put at SCPending here to signal the Admin server (using isPending) that we're on a recovery state.
           putSCacheStatus appState SCPending
           putSchemaCache appState $ Just sCache
+          -- Flush the pool after loading the schema cache to reset any stale session cache entries
+          -- We do it after successfully querying the schema cache (because this can fail and during retries we would flush the pool repeatedly unnecessarily)
+          -- and after marking sCacheStatus as pending,
+          flushPool appState
           observer $ SchemaCacheQueriedObs resultTime
           (t, _) <- timeItT $ observer $ SchemaCacheSummaryObs $ showSummary sCache
           observer $ SchemaCacheLoadedObs t
diff --git a/test/io/test_io.py b/test/io/test_io.py
index 16f29ba728..fca9237883 100644
--- a/test/io/test_io.py
+++ b/test/io/test_io.py
@@ -692,7 +692,7 @@ def test_log_level(level, defaultenv):
         response = postgrest.session.get("/")
         assert response.status_code == 200
 
-        output = postgrest.read_stdout(nlines=7)
+        output = postgrest.read_stdout(nlines=9)
 
         if level == "crit":
             assert len(output) == 0
@@ -730,7 +730,7 @@ def test_log_level(level, defaultenv):
                     r'- - postgrest_test_anonymous \[.+\] "GET / HTTP/1.1" 200 \d+ "" "python-requests/.+"',
                 ],
             )
-            assert len(output) == 7
+            assert len(output) == 9
             assert any("Connection" and "is available" in line for line in output)
             assert any("Connection" and "is used" in line for line in output)
 
@@ -1367,7 +1367,7 @@ def test_db_error_logging_to_stderr(level, defaultenv, metapostgrest):
         assert response.status_code == 500
 
         # ensure the message appears on the logs
-        output = postgrest.read_stdout(nlines=6)
+        output = postgrest.read_stdout(nlines=8)
 
         if level == "crit":
             assert len(output) == 0
@@ -1584,7 +1584,7 @@ def test_log_pool_req_observation(level, defaultenv):
 
         if level == "debug":
             output = postgrest.read_stdout(nlines=7)
-            assert len(output) == 6
+            assert len(output) == 7
             match_log(output, [pool_req, pool_req_fullfill])
         elif level == "info":
             output = postgrest.read_stdout(nlines=4)
diff --git a/test/observability/Observation/SchemaCacheSpec.hs b/test/observability/Observation/SchemaCacheSpec.hs
index e3260271b8..318c2dba6b 100644
--- a/test/observability/Observation/SchemaCacheSpec.hs
+++ b/test/observability/Observation/SchemaCacheSpec.hs
@@ -27,7 +27,7 @@ spec = describe "Server started with metrics enabled" $ do
       waitFor (1 * sec) "SchemaCacheLoadedObs" $ \x -> [ o | o@SchemaCacheLoadedObs{} <- pure x ]
 
 
-  it "Should flush pool multiple times when schema reloading retries" $ do
+  it "Should flush pool once when schema reloading retries" $ do
     SpecState{specAppState = appState, specObsChan} <- getState
     let waitFor = waitForObs specObsChan
 
@@ -36,16 +36,14 @@ spec = describe "Server started with metrics enabled" $ do
           AppState.putConfig appState $ cfg { configDbSchemas = pure "bad_schema" }
           AppState.schemaCacheLoader appState
 
-          waitFor (1 * sec) "PoolFlushed 1" $ \x -> [ o | o@PoolFlushed <- pure x ]
           waitFor (1 * sec) "SchemaCacheErrorObs" $ \x -> [ o | o@SchemaCacheErrorObs{} <- pure x ]
 
           -- Restore configuration
           AppState.putConfig appState cfg
 
         -- Wait for 2 seconds so that retry can happen
-        waitFor (2 * sec) "PoolFlushed 2" $ \x -> [ o | o@PoolFlushed <- pure x ]
+        waitFor (2 * sec) "PoolFlushed" $ \x -> [ o | o@PoolFlushed <- pure x ]
         waitFor (1 * sec) "SchemaCacheQueriedObs" $ \x -> [ o | o@SchemaCacheQueriedObs{} <- pure x ]
         waitFor (1 * sec) "SchemaCacheLoadedObs" $ \x -> [ o | o@SchemaCacheLoadedObs{} <- pure x ]
-
   where
     sec = 1000000