1- {-# LANGUAGE LambdaCase #-}
2- {-# LANGUAGE MultiWayIf #-}
3- {-# LANGUAGE RecordWildCards #-}
1+ {-# LANGUAGE DeriveAnyClass #-}
2+ {-# LANGUAGE LambdaCase #-}
3+ {-# LANGUAGE MultiWayIf #-}
4+ {-# LANGUAGE RecordWildCards #-}
5+ {-# LANGUAGE ScopedTypeVariables #-}
6+ {-# LANGUAGE TypeApplications #-}
47
5- module PostgREST.Listener (runListener ) where
8+ module PostgREST.Listener (runListener , runListener' ) where
69
710import qualified Data.ByteString.Char8 as BS
811
@@ -19,28 +22,55 @@ import qualified PostgREST.Config as Config
1922import Control.Arrow ((&&&) )
2023import Data.Bitraversable (bisequence )
2124import Data.Either.Combinators (whenRight )
25+ import Data.IORef (IORef , newIORef ,
26+ readIORef , writeIORef )
2227import qualified Data.Text as T
28+ import Data.Time (UTCTime , diffUTCTime ,
29+ nominalDiffTimeToSeconds )
2330import qualified Database.PostgreSQL.LibPQ as LibPQ
2431import qualified Hasql.Session as SQL
2532import PostgREST.Config.Database (queryPgVersion )
2633import PostgREST.Config.PgVersion (pgvFullName )
2734import Protolude
35+ import System.IO.Error (isResourceVanishedError )
2836
2937-- | Starts the Listener in a thread
30- runListener :: AppState -> IO ()
31- runListener appState = do
38+ -- | Returns IO action to stop the listener thread.
39+ runListener :: AppState -> IO (IO () )
40+ runListener appState = runListener' appState (15 * minute) (30 * minute)
41+ where
42+ minute = 60
43+
44+ data ListenerStopped = ListenerStopped deriving (Show , Exception )
45+
46+ runListener' :: AppState -> Int -> Int -> IO (IO () )
47+ runListener' appState initialTcpKeepAlivesIdleSec maxTcpKeepAlivesIdleSec = do
3248 AppConfig {.. } <- getConfig appState
33- when configDbChannelEnabled $
34- void . forkIO . void $ retryingListen appState
49+ if configDbChannelEnabled then do
50+ started <- newIORef Nothing
51+ listenerThreadId <- forkIO . void $ retryingListen started initialTcpKeepAlivesIdleSec maxTcpKeepAlivesIdleSec False appState
52+ pure $ throwTo listenerThreadId ListenerStopped
53+ else
54+ mempty
3555
3656-- | Starts a LISTEN connection and handles notifications. It recovers with exponential backoff with a cap of 32 seconds, if the LISTEN connection is lost.
3757-- | This function never returns (but can throw) and return type enforces that.
38- retryingListen :: AppState -> IO Void
39- retryingListen appState = do
58+ retryingListen :: IORef ( Maybe UTCTime ) -> Int -> Int -> Bool -> AppState -> IO ()
59+ retryingListen lastActivity currentKeepalivesIdle maxKeepalivesIdle retryingOnIdleTimeout appState = do
4060 AppConfig {.. } <- AppState. getConfig appState
4161 let
4262 dbChannel = toS configDbChannel
4363 onError err = do
64+ -- ResourceVanished should be reported when reading from socket fails
65+ -- as long as hasql-notifications does not wrap IOException in something else...
66+ let resourceVanished = maybe False isResourceVanishedError (fromException @ IOException err)
67+ (newTcpIdle, newMaxKeepalivesIdle) <-
68+ if resourceVanished then do
69+ readIORef lastActivity >>=
70+ maybe (pure (currentKeepalivesIdle, maxKeepalivesIdle)) adjustTcpIdle
71+ else
72+ pure (currentKeepalivesIdle, maxKeepalivesIdle)
73+ writeIORef lastActivity Nothing
4474 AppState. putIsListenerOn appState False
4575 observer $ DBListenFail dbChannel (Right err)
4676 when (isDbListenerBug err) $
@@ -55,14 +85,14 @@ retryingListen appState = do
5585 unless (delay == maxDelay) $
5686 AppState. putNextListenerDelay appState (delay * 2 )
5787 -- loop running the listener
58- retryingListen appState
88+ retryingListen lastActivity newTcpIdle newMaxKeepalivesIdle resourceVanished appState
5989
6090 -- Execute the listener with with error handling
61- handle onError $ do
91+ handle onError $ handle ( \ ListenerStopped -> pure () ) $ do
6292 -- Make sure we don't leak connections on errors
6393 bracket
6494 -- acquire connection
65- (SQL. acquire $ toUtf8 (Config. addTargetSessionAttrs $ Config. addFallbackAppName prettyVersion configDbUri))
95+ (SQL. acquire $ toUtf8 (addKeepalivesOptions $ Config. addTargetSessionAttrs $ Config. addFallbackAppName prettyVersion configDbUri))
6696 -- release connection
6797 (`whenRight` releaseConnection) $
6898 -- use connection
@@ -82,6 +112,7 @@ retryingListen appState = do
82112 AppState. putNextListenerDelay appState 1
83113
84114 observer $ DBListenStart pqHost pqPort pgFullName dbChannel
115+ saveLastActivityTime
85116
86117 -- wait for notifications
87118 -- this will never return, in case of an error it will throw and be caught by onError
@@ -96,15 +127,59 @@ retryingListen appState = do
96127 oneSecondInMicro = 1000000
97128 maxDelay = 32
98129
99- handleNotification channel msg =
130+ handleNotification channel msg = do
100131 if | BS. null msg -> observer (DBListenerGotSCacheMsg channel) >> cacheReloader
101132 | msg == " reload schema" -> observer (DBListenerGotSCacheMsg channel) >> cacheReloader
102133 | msg == " reload config" -> observer (DBListenerGotConfigMsg channel) >> AppState. readInDbConfig False appState
103134 | otherwise -> pure () -- Do nothing if anything else than an empty message is sent
135+ saveLastActivityTime
136+
137+ saveLastActivityTime = AppState. getTime appState >>= writeIORef lastActivity . Just
104138
105139 cacheReloader =
106140 AppState. schemaCacheLoader appState
107141
108142 releaseConnection = void . forkIO . handle (observer . DBListenerConnectionCleanupFail ) . SQL. release
109143
110144 isDbListenerBug e = " could not access status of transaction" `T.isInfixOf` show e
145+
146+ -- adjust the next keepalive timeout
147+ -- This is a simple discovery mechanism that
148+ -- should converge to optimum keepalive timeout
149+ -- we calculate the time T between connection failure and last activity
150+ -- if T is <= than current timeout
151+ -- it means timeout is too long
152+ -- so we set next timeout to T/2 and max timeout to T
153+ -- max cannot be longer because we lost connection earlier
154+ -- if T is longer than current timeout
155+ -- we set timeout in between current timeout and current max
156+ adjustTcpIdle lastActiveTime = do
157+ currentIdleSeconds <- AppState. getTime appState <&> round . nominalDiffTimeToSeconds . (`diffUTCTime` lastActiveTime)
158+ let currentIdleTimeout = currentKeepalivesIdle + keepalivesInterval * keepalivesCount
159+ -- if our idle time == current idle timeout setting it means
160+ -- we have to make it shorter
161+ if currentIdleSeconds `div` currentIdleTimeout <= 1 then
162+ -- only adjust if this is the second idle timeout failure
163+ -- this is to eliminate spurious adjustments (TODO rethink if it is really needed)
164+ if retryingOnIdleTimeout then
165+ -- try with 1/2 of current keepalive idle
166+ -- remember that it is the new maximum we can try later
167+ pure (max 1 $ currentKeepalivesIdle `div` 2 , currentKeepalivesIdle)
168+ else
169+ pure (currentKeepalivesIdle, maxKeepalivesIdle)
170+ else
171+ -- we can try to make it longer
172+ -- but not longer than previously calculated maximum
173+ pure (currentKeepalivesIdle + (maxKeepalivesIdle - currentKeepalivesIdle) `div` 2 , maxKeepalivesIdle)
174+
175+ keepalivesInterval = max 1 $ currentKeepalivesIdle `div` (5 * keepalivesCount)
176+ keepalivesCount = 5
177+
178+ -- (Config.addConnStringOption opt val) is an endomorphism
179+ -- so it is a Monoid under function composition
180+ -- Haskell is awesome
181+ addKeepalivesOptions = appEndo $ foldMap (Endo . uncurry Config. addConnStringOption . fmap show ) [
182+ (" keepalives_count" , keepalivesCount)
183+ , (" keepalives_interval" , keepalivesInterval)
184+ , (" keepalives_idle" , currentKeepalivesIdle)
185+ ]
0 commit comments