1- {-# LANGUAGE LambdaCase #-}
2- {-# LANGUAGE NamedFieldPuns #-}
3- {-# LANGUAGE RecordWildCards #-}
1+ {-# LANGUAGE LambdaCase #-}
2+ {-# LANGUAGE NamedFieldPuns #-}
3+ {-# LANGUAGE QuasiQuotes #-}
4+ {-# LANGUAGE RecordWildCards #-}
5+ {-# LANGUAGE TypeApplications #-}
46
57module PostgREST.AppState
68 ( AppState
@@ -32,7 +34,8 @@ import qualified Data.ByteString.Char8 as BS
3234import Data.Either.Combinators (whenLeft )
3335import qualified Hasql.Pool as SQL
3436import qualified Hasql.Pool.Config as SQL
35- import qualified Hasql.Session as SQL
37+ import qualified Hasql.Session as SQL hiding (statement )
38+ import qualified Hasql.Transaction as SQL hiding (sql )
3639import qualified Hasql.Transaction.Sessions as SQL
3740import qualified Network.HTTP.Types.Status as HTTP
3841import qualified PostgREST.Auth.JwtCache as JwtCache
@@ -62,11 +65,17 @@ import PostgREST.Config.Database (queryDbSettings,
6265 queryRoleSettings )
6366import PostgREST.Config.PgVersion (PgVersion (.. ),
6467 minimumPgVersion )
68+ import PostgREST.Metrics (MetricsState (connTrack ))
6569import PostgREST.SchemaCache (SchemaCache (.. ),
6670 querySchemaCache ,
6771 showSummary )
6872import PostgREST.SchemaCache.Identifiers (quoteQi )
6973
74+ import qualified Hasql.Decoders as HD
75+ import qualified Hasql.Encoders as HE
76+ import qualified Hasql.Statement as SQL
77+ import NeatInterpolation (trimming )
78+
7079import Protolude
7180
7281data AppState = AppState
@@ -299,7 +308,7 @@ getObserver = stateObserver
299308-- + Because connections cache the pg catalog(see #2620)
300309-- + For rapid recovery. Otherwise, the pool idle or lifetime timeout would have to be reached for new healthy connections to be acquired.
301310retryingSchemaCacheLoad :: AppState -> IO ()
302- retryingSchemaCacheLoad appState@ AppState {stateObserver= observer, stateMainThreadId= mainThreadId} =
311+ retryingSchemaCacheLoad appState@ AppState {stateObserver= observer, stateMainThreadId= mainThreadId, stateMetrics } =
303312 void $ retrying retryPolicy shouldRetry (\ RetryStatus {rsIterNumber, rsPreviousDelay} -> do
304313 when (rsIterNumber > 0 ) $ do
305314 let delay = fromMaybe 0 rsPreviousDelay `div` oneSecondInUs
@@ -340,9 +349,23 @@ retryingSchemaCacheLoad appState@AppState{stateObserver=observer, stateMainThrea
340349 qSchemaCache :: IO (Maybe SchemaCache )
341350 qSchemaCache = do
342351 conf@ AppConfig {.. } <- getConfig appState
352+ -- Throttle concurrent schema cache loads, guarded by advisory locks.
353+ -- This is to prevent thundering herd problem on startup or when many PostgREST
354+ -- instances receive "reload schema" notifications at the same time
355+ -- See get_lock_sql for details of the algorithm.
356+ -- Here we calculate the number of open connections passed to the query.
357+ Metrics. ConnStats connected inUse <- Metrics. connectionCounts $ connTrack stateMetrics
358+ -- Determine whether schema cache loading will create a new session
359+ let
360+ -- if all connections in use but pool not full - schema cache loading will create session
361+ scLoadingSessions = if connected <= inUse && inUse < configDbPoolSize then 1 else 0
362+ withTxLock = SQL. statement
363+ (fromIntegral $ connected + scLoadingSessions)
364+ (SQL. Statement get_lock_sql get_lock_params HD. noResult configDbPreparedStatements)
365+
343366 (resultTime, result) <-
344367 let transaction = if configDbPreparedStatements then SQL. transaction else SQL. unpreparedTransaction in
345- timeItT $ usePool appState (transaction SQL. ReadCommitted SQL. Read $ querySchemaCache conf)
368+ timeItT $ usePool appState (transaction SQL. ReadCommitted SQL. Read $ withTxLock *> querySchemaCache conf)
346369 case result of
347370 Left e -> do
348371 markSchemaCachePending appState
@@ -359,6 +382,43 @@ retryingSchemaCacheLoad appState@AppState{stateObserver=observer, stateMainThrea
359382 observer . uncurry SchemaCacheLoadedObs =<< timeItT (evaluate $ showSummary sCache)
360383 markSchemaCacheLoaded appState
361384 return $ Just sCache
385+ where
386+ -- Recursive query that tries acquiring locks in order
387+ -- and waits for randomly selected lock if no attempt succeeded.
388+ -- It has a single parameter: this node open connection count.
389+ -- It is used to estimate the number of nodes
390+ -- by counting the number of active sessions for current session_user
391+ -- and dividing it by this node open connections.
392+ -- Assuming load is uniform among cluster nodes, all should have
393+ -- statistically the same number of open connections.
394+ -- Once the number of nodes is known we calculate the number
395+ -- of locks as ceil(log(2, number_of_nodes))
396+ get_lock_sql = encodeUtf8 [trimming |
397+ WITH RECURSIVE attempts AS (
398+ SELECT 1 AS lock_number, pg_try_advisory_xact_lock(lock_id, 1) AS success FROM parameters
399+ UNION ALL
400+ SELECT next_lock_number AS lock_number, pg_try_advisory_xact_lock(lock_id, next_lock_number) AS success
401+ FROM
402+ parameters CROSS JOIN LATERAL (
403+ SELECT lock_number + 1 AS next_lock_number FROM attempts
404+ WHERE NOT success AND lock_number < locks_count
405+ ORDER BY lock_number DESC
406+ LIMIT 1
407+ ) AS previous_attempt
408+ ),
409+ counts AS (
410+ SELECT round(log(2, round(count(*)::double precision/$$1)::numeric))::int AS locks_count
411+ FROM
412+ pg_stat_activity WHERE usename = SESSION_USER
413+ ),
414+ parameters AS (
415+ SELECT locks_count, 50168275 AS lock_id FROM counts WHERE locks_count > 0
416+ )
417+ SELECT pg_advisory_xact_lock(lock_id, floor(random() * locks_count)::int + 1)
418+ FROM
419+ parameters WHERE NOT EXISTS (SELECT 1 FROM attempts WHERE success) |]
420+
421+ get_lock_params = HE. param (HE. nonNullable HE. int4)
362422
363423 shouldRetry :: RetryStatus -> (Maybe PgVersion , Maybe SchemaCache ) -> IO Bool
364424 shouldRetry _ (pgVer, sCache) = do
0 commit comments