1- {-# LANGUAGE LambdaCase #-}
2- {-# LANGUAGE NamedFieldPuns #-}
3- {-# LANGUAGE RecordWildCards #-}
4- {-# LANGUAGE RecursiveDo #-}
1+ {-# LANGUAGE LambdaCase #-}
2+ {-# LANGUAGE NamedFieldPuns #-}
3+ {-# LANGUAGE QuasiQuotes #-}
4+ {-# LANGUAGE RecordWildCards #-}
5+ {-# LANGUAGE RecursiveDo #-}
6+ {-# LANGUAGE TypeApplications #-}
57
68module PostgREST.AppState
79 ( AppState
@@ -33,7 +35,8 @@ import qualified Data.ByteString.Char8 as BS
3335import Data.Either.Combinators (whenLeft )
3436import qualified Hasql.Pool as SQL
3537import qualified Hasql.Pool.Config as SQL
36- import qualified Hasql.Session as SQL
38+ import qualified Hasql.Session as SQL hiding (statement )
39+ import qualified Hasql.Transaction as SQL hiding (sql )
3740import qualified Hasql.Transaction.Sessions as SQL
3841import qualified Network.HTTP.Types.Status as HTTP
3942import qualified PostgREST.Auth.JwtCache as JwtCache
@@ -63,11 +66,17 @@ import PostgREST.Config.Database (queryDbSettings,
6366import PostgREST.Config.PgVersion (PgVersion (.. ),
6467 minimumPgVersion )
6568import PostgREST.Debounce (makeDebouncer )
69+ import PostgREST.Metrics (MetricsState (connTrack ))
6670import PostgREST.SchemaCache (SchemaCache (.. ),
6771 querySchemaCache ,
6872 showSummary )
6973import PostgREST.SchemaCache.Identifiers (quoteQi )
7074
75+ import qualified Hasql.Decoders as HD
76+ import qualified Hasql.Encoders as HE
77+ import qualified Hasql.Statement as SQL
78+ import NeatInterpolation (trimming )
79+
7180import Protolude
7281
7382data AppState = AppState
@@ -303,7 +312,7 @@ getObserver = stateObserver
303312-- + Because connections cache the pg catalog(see #2620)
304313-- + For rapid recovery. Otherwise, the pool idle or lifetime timeout would have to be reached for new healthy connections to be acquired.
305314retryingSchemaCacheLoad :: AppState -> IO ()
306- retryingSchemaCacheLoad appState@ AppState {stateObserver= observer, stateMainThreadId= mainThreadId} =
315+ retryingSchemaCacheLoad appState@ AppState {stateObserver= observer, stateMainThreadId= mainThreadId, stateMetrics } =
307316 void $ retrying retryPolicy shouldRetry (\ RetryStatus {rsIterNumber, rsPreviousDelay} -> do
308317 when (rsIterNumber > 0 ) $ do
309318 let delay = fromMaybe 0 rsPreviousDelay `div` oneSecondInUs
@@ -342,8 +351,22 @@ retryingSchemaCacheLoad appState@AppState{stateObserver=observer, stateMainThrea
342351 qSchemaCache :: IO (Maybe SchemaCache )
343352 qSchemaCache = do
344353 conf@ AppConfig {.. } <- getConfig appState
354+ -- Throttle concurrent schema cache loads, guarded by advisory locks.
355+ -- This is to prevent thundering herd problem on startup or when many PostgREST
356+ -- instances receive "reload schema" notifications at the same time
357+ -- See get_lock_sql for details of the algorithm.
358+ -- Here we calculate the number of open connections passed to the query.
359+ Metrics. ConnStats connected inUse <- Metrics. connectionCounts $ connTrack stateMetrics
360+ -- Determine whether schema cache loading will create a new session
361+ let
362+ -- if all connections in use but pool not full - schema cache loading will create session
363+ scLoadingSessions = if connected <= inUse && inUse < configDbPoolSize then 1 else 0
364+ withTxLock = SQL. statement
365+ (fromIntegral $ connected + scLoadingSessions)
366+ (SQL. Statement get_lock_sql get_lock_params HD. noResult configDbPreparedStatements)
367+
345368 (resultTime, result) <-
346- timeItT $ usePool appState (SQL. transactionNoRetry SQL. ReadCommitted SQL. Read $ querySchemaCache conf)
369+ timeItT $ usePool appState (SQL. transactionNoRetry SQL. ReadCommitted SQL. Read $ withTxLock *> querySchemaCache conf)
347370 case result of
348371 Left e -> do
349372 markSchemaCachePending appState
@@ -365,6 +388,43 @@ retryingSchemaCacheLoad appState@AppState{stateObserver=observer, stateMainThrea
365388 observer $ SchemaCacheLoadedObs loadTime summary
366389 markSchemaCacheLoaded appState
367390 return $ Just sCache
391+ where
392+ -- Recursive query that tries acquiring locks in order
393+ -- and waits for randomly selected lock if no attempt succeeded.
394+ -- It has a single parameter: this node open connection count.
395+ -- It is used to estimate the number of nodes
396+ -- by counting the number of active sessions for current session_user
397+ -- and dividing it by this node open connections.
398+ -- Assuming load is uniform among cluster nodes, all should have
399+ -- statistically the same number of open connections.
400+ -- Once the number of nodes is known we calculate the number
401+ -- of locks as ceil(log(2, number_of_nodes))
402+ get_lock_sql = encodeUtf8 [trimming |
403+ WITH RECURSIVE attempts AS (
404+ SELECT 1 AS lock_number, pg_try_advisory_xact_lock(lock_id, 1) AS success FROM parameters
405+ UNION ALL
406+ SELECT next_lock_number AS lock_number, pg_try_advisory_xact_lock(lock_id, next_lock_number) AS success
407+ FROM
408+ parameters CROSS JOIN LATERAL (
409+ SELECT lock_number + 1 AS next_lock_number FROM attempts
410+ WHERE NOT success AND lock_number < locks_count
411+ ORDER BY lock_number DESC
412+ LIMIT 1
413+ ) AS previous_attempt
414+ ),
415+ counts AS (
416+ SELECT round(log(2, round(count(*)::double precision/$$1)::numeric))::int AS locks_count
417+ FROM
418+ pg_stat_activity WHERE usename = SESSION_USER
419+ ),
420+ parameters AS (
421+ SELECT locks_count, 50168275 AS lock_id FROM counts WHERE locks_count > 0
422+ )
423+ SELECT pg_advisory_xact_lock(lock_id, floor(random() * locks_count)::int + 1)
424+ FROM
425+ parameters WHERE NOT EXISTS (SELECT 1 FROM attempts WHERE success) |]
426+
427+ get_lock_params = HE. param (HE. nonNullable HE. int4)
368428
369429 shouldRetry :: RetryStatus -> (Maybe PgVersion , Maybe SchemaCache ) -> IO Bool
370430 shouldRetry _ (pgVer, sCache) = do
0 commit comments