1- {-# LANGUAGE LambdaCase #-}
2- {-# LANGUAGE MultiWayIf #-}
3- {-# LANGUAGE RecordWildCards #-}
1+ {-# LANGUAGE DeriveAnyClass #-}
2+ {-# LANGUAGE LambdaCase #-}
3+ {-# LANGUAGE MultiWayIf #-}
4+ {-# LANGUAGE RecordWildCards #-}
5+ {-# LANGUAGE ScopedTypeVariables #-}
6+ {-# LANGUAGE TypeApplications #-}
47
5- module PostgREST.Listener (runListener ) where
8+ module PostgREST.Listener (runListener , runListener' ) where
69
710import qualified Data.ByteString.Char8 as BS
811
@@ -18,28 +21,55 @@ import qualified PostgREST.Config as Config
1821import Control.Arrow ((&&&) )
1922import Data.Bitraversable (bisequence )
2023import Data.Either.Combinators (whenRight )
24+ import Data.IORef (IORef , newIORef ,
25+ readIORef , writeIORef )
2126import qualified Data.Text as T
27+ import Data.Time (UTCTime , diffUTCTime ,
28+ nominalDiffTimeToSeconds )
2229import qualified Database.PostgreSQL.LibPQ as LibPQ
2330import qualified Hasql.Session as SQL
2431import PostgREST.Config.Database (queryPgVersion )
2532import PostgREST.Config.PgVersion (pgvFullName )
2633import Protolude
34+ import System.IO.Error (isResourceVanishedError )
2735
2836-- | Starts the Listener in a thread
29- runListener :: AppState -> IO ()
30- runListener appState = do
37+ -- | Returns IO action to stop the listener thread.
38+ runListener :: AppState -> IO (IO () )
39+ runListener appState = runListener' appState (15 * minute) (30 * minute)
40+ where
41+ minute = 60
42+
43+ data ListenerStopped = ListenerStopped deriving (Show , Exception )
44+
45+ runListener' :: AppState -> Int -> Int -> IO (IO () )
46+ runListener' appState initialTcpKeepAlivesIdleSec maxTcpKeepAlivesIdleSec = do
3147 AppConfig {.. } <- getConfig appState
32- when configDbChannelEnabled $
33- void . forkIO . void $ retryingListen appState
48+ if configDbChannelEnabled then do
49+ started <- newIORef Nothing
50+ listenerThreadId <- forkIO . void $ retryingListen started initialTcpKeepAlivesIdleSec maxTcpKeepAlivesIdleSec False appState
51+ pure $ throwTo listenerThreadId ListenerStopped
52+ else
53+ mempty
3454
3555-- | Starts a LISTEN connection and handles notifications. It recovers with exponential backoff with a cap of 32 seconds, if the LISTEN connection is lost.
3656-- | This function never returns (but can throw) and return type enforces that.
37- retryingListen :: AppState -> IO Void
38- retryingListen appState = do
57+ retryingListen :: IORef ( Maybe UTCTime ) -> Int -> Int -> Bool -> AppState -> IO ()
58+ retryingListen lastActivity currentKeepalivesIdle maxKeepalivesIdle retryingOnIdleTimeout appState = do
3959 cfg@ AppConfig {.. } <- AppState. getConfig appState
4060 let
4161 dbChannel = toS configDbChannel
4262 onError err = do
63+ -- ResourceVanished should be reported when reading from socket fails
64+ -- as long as hasql-notifications does not wrap IOException in something else...
65+ let resourceVanished = maybe False isResourceVanishedError (fromException @ IOException err)
66+ (newTcpIdle, newMaxKeepalivesIdle) <-
67+ if resourceVanished then do
68+ readIORef lastActivity >>=
69+ maybe (pure (currentKeepalivesIdle, maxKeepalivesIdle)) adjustTcpIdle
70+ else
71+ pure (currentKeepalivesIdle, maxKeepalivesIdle)
72+ writeIORef lastActivity Nothing
4373 AppState. putIsListenerOn appState False
4474 observer $ DBListenFail dbChannel (Right err)
4575 when (isDbListenerBug err) $
@@ -54,15 +84,15 @@ retryingListen appState = do
5484 unless (delay == maxDelay) $
5585 AppState. putNextListenerDelay appState (delay * 2 )
5686 -- loop running the listener
57- retryingListen appState
87+ retryingListen lastActivity newTcpIdle newMaxKeepalivesIdle resourceVanished appState
5888
5989 -- Execute the listener with with error handling
60- handle onError $ do
90+ handle onError $ handle ( \ ListenerStopped -> pure () ) $ do
6191 -- Make sure we don't leak connections on errors
6292 bracket
6393 -- acquire connection
6494 (SQL. acquire $
65- Config. toConnectionSettings Config. addTargetSessionAttrs cfg)
95+ Config. toConnectionSettings (addKeepalivesOptions . Config. addTargetSessionAttrs) cfg)
6696 -- release connection
6797 (`whenRight` releaseConnection) $
6898 -- use connection
@@ -82,6 +112,7 @@ retryingListen appState = do
82112 AppState. putNextListenerDelay appState 1
83113
84114 observer $ DBListenStart pqHost pqPort pgFullName dbChannel
115+ saveLastActivityTime
85116
86117 -- wait for notifications
87118 -- this will never return, in case of an error it will throw and be caught by onError
@@ -96,15 +127,59 @@ retryingListen appState = do
96127 oneSecondInMicro = 1000000
97128 maxDelay = 32
98129
99- handleNotification channel msg =
130+ handleNotification channel msg = do
100131 if | BS. null msg -> observer (DBListenerGotSCacheMsg channel) >> cacheReloader
101132 | msg == " reload schema" -> observer (DBListenerGotSCacheMsg channel) >> cacheReloader
102133 | msg == " reload config" -> observer (DBListenerGotConfigMsg channel) >> AppState. readInDbConfig False appState
103134 | otherwise -> pure () -- Do nothing if anything else than an empty message is sent
135+ saveLastActivityTime
136+
137+ saveLastActivityTime = AppState. getTime appState >>= writeIORef lastActivity . Just
104138
105139 cacheReloader =
106140 AppState. schemaCacheLoader appState
107141
108142 releaseConnection = void . forkIO . handle (observer . DBListenerConnectionCleanupFail ) . SQL. release
109143
110144 isDbListenerBug e = " could not access status of transaction" `T.isInfixOf` show e
145+
146+ -- adjust the next keepalive timeout
147+ -- This is a simple discovery mechanism that
148+ -- should converge to optimum keepalive timeout
149+ -- we calculate the time T between connection failure and last activity
150+ -- if T is <= than current timeout
151+ -- it means timeout is too long
152+ -- so we set next timeout to T/2 and max timeout to T
153+ -- (max cannot be longer because we lost connection earlier)
154+ -- if T is longer than current timeout
155+ -- we set timeout in between current timeout and current max
156+ adjustTcpIdle lastActiveTime = do
157+ currentIdleSeconds <- AppState. getTime appState <&> round . nominalDiffTimeToSeconds . (`diffUTCTime` lastActiveTime)
158+ let currentIdleTimeout = currentKeepalivesIdle + keepalivesInterval * keepalivesCount
159+ -- if our idle time == current idle timeout setting it means
160+ -- we have to make it shorter
161+ if currentIdleSeconds `div` currentIdleTimeout <= 1 then
162+ -- only adjust if this is the second idle timeout failure
163+ -- this is to eliminate spurious adjustments (TODO rethink if it is really needed)
164+ if retryingOnIdleTimeout then
165+ -- try with 1/2 of current keepalive idle
166+ -- remember that it is the new maximum we can try later
167+ pure (max 1 $ currentKeepalivesIdle `div` 2 , currentKeepalivesIdle)
168+ else
169+ pure (currentKeepalivesIdle, maxKeepalivesIdle)
170+ else
171+ -- we can try to make it longer
172+ -- but not longer than previously calculated maximum
173+ pure (currentKeepalivesIdle + (maxKeepalivesIdle - currentKeepalivesIdle) `div` 2 , maxKeepalivesIdle)
174+
175+ keepalivesInterval = max 1 $ currentKeepalivesIdle `div` (5 * keepalivesCount)
176+ keepalivesCount = 5
177+
178+ -- (Config.addConnStringOption opt val) is an endomorphism
179+ -- so it is a Monoid under function composition
180+ -- Haskell is awesome
181+ addKeepalivesOptions = appEndo $ foldMap (Endo . uncurry Config. addConnStringOption . fmap show ) [
182+ (" keepalives_count" , keepalivesCount)
183+ , (" keepalives_interval" , keepalivesInterval)
184+ , (" keepalives_idle" , currentKeepalivesIdle)
185+ ]
0 commit comments