Skip to content

Commit 298cf98

Browse files
soulomoonfendor
andauthored
Prevent broken pipes and shutdown hangs during LSP shutdown (#4701)
Coordinate LanguageServer shutdown with worker queue teardown. The reactor now requests shutdown, waits for confirmation, logs timeout and exit outcomes, and stops server-side work before late background tasks can outlive the server. Worker threads also stop dequeuing after shutdown is requested, preventing broken-pipe failures and shutdown hangs in tests. Bump the lsp lower bound to 2.8 to match the shutdown flow used here. Co-authored-by: fendor <fendor@users.noreply.github.com>
1 parent 7d02159 commit 298cf98

10 files changed

Lines changed: 295 additions & 128 deletions

File tree

cabal.project

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ constraints:
4040
-- We want to be able to benefit from the performance optimisations
4141
-- in the future, thus: TODO: remove this flag.
4242
bitvec -simd,
43-
monad-control >=1.0.3,
43+
monad-control >=1.0.3,
4444

4545

4646
-- Some of the formatters need the latest Cabal-syntax version,

ghcide-test/exe/InitializeResponseTests.hs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,4 +96,3 @@ tests = withResource acquire release tests where
9696

9797
release :: TResponseMessage Method_Initialize -> IO ()
9898
release = mempty
99-

ghcide/session-loader/Development/IDE/Session.hs

Lines changed: 17 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -84,15 +84,14 @@ import Data.Void
8484

8585
import Control.Concurrent.STM.Stats (atomically, modifyTVar',
8686
readTVar, writeTVar)
87-
import Control.Concurrent.STM.TQueue
8887
import Control.Monad.Trans.Cont (ContT (ContT, runContT))
8988
import Data.Foldable (for_)
9089
import Data.HashMap.Strict (HashMap)
9190
import Data.HashSet (HashSet)
9291
import qualified Data.HashSet as Set
9392
import Database.SQLite.Simple
9493
import Development.IDE.Core.Tracing (withTrace)
95-
import Development.IDE.Core.WorkerThread (withWorkerQueue)
94+
import Development.IDE.Core.WorkerThread
9695
import Development.IDE.Session.Dependency
9796
import Development.IDE.Session.Diagnostics (renderCradleError)
9897
import Development.IDE.Session.Ghc hiding (Log)
@@ -130,6 +129,7 @@ data Log
130129
| LogNoneCradleFound FilePath
131130
| LogHieBios HieBios.Log
132131
| LogSessionLoadingChanged
132+
| LogSessionWorkerThread LogWorkerThread
133133
| LogSessionNewLoadedFiles ![FilePath]
134134
| LogSessionReloadOnError FilePath ![FilePath]
135135
| LogGetOptionsLoop !FilePath
@@ -140,6 +140,7 @@ deriving instance Show Log
140140

141141
instance Pretty Log where
142142
pretty = \case
143+
LogSessionWorkerThread msg -> pretty msg
143144
LogTime s -> "Time:" <+> pretty s
144145
LogLookupSessionCache path -> "Looking up session cache for" <+> pretty path
145146
LogGetOptionsLoop fp -> "Loop: getOptions for" <+> pretty fp
@@ -365,8 +366,8 @@ runWithDb recorder fp = ContT $ \k -> do
365366
_ <- withWriteDbRetryable deleteMissingRealFiles
366367
_ <- withWriteDbRetryable garbageCollectTypeNames
367368

368-
runContT (withWorkerQueue (writer withWriteDbRetryable)) $ \chan ->
369-
withHieDb fp (\readDb -> k (WithHieDbShield $ makeWithHieDbRetryable recorder rng readDb, chan))
369+
runContT (withWorkerQueue (cmapWithPrio LogSessionWorkerThread recorder) "hiedb thread" (writer withWriteDbRetryable))
370+
$ \chan -> withHieDb fp (\readDb -> k (WithHieDbShield $ makeWithHieDbRetryable recorder rng readDb, chan))
370371
where
371372
writer withHieDbRetryable l = do
372373
-- TODO: probably should let exceptions be caught/logged/handled by top level handler
@@ -632,7 +633,7 @@ newSessionState = do
632633
-- components mapping to the same hie.yaml file are mapped to the same
633634
-- HscEnv which is updated as new components are discovered.
634635

635-
loadSessionWithOptions :: Recorder (WithPriority Log) -> SessionLoadingOptions -> FilePath -> TQueue (IO ()) -> IO (Action IdeGhcSession)
636+
loadSessionWithOptions :: Recorder (WithPriority Log) -> SessionLoadingOptions -> FilePath -> TaskQueue (IO ()) -> IO (Action IdeGhcSession)
636637
loadSessionWithOptions recorder SessionLoadingOptions{..} rootDir que = do
637638
let toAbsolutePath = toAbsolute rootDir -- see Note [Root Directory]
638639

@@ -662,7 +663,7 @@ loadSessionWithOptions recorder SessionLoadingOptions{..} rootDir que = do
662663
-- see Note [Serializing runs in separate thread]
663664
-- Start the 'getOptionsLoop' if the queue is empty
664665
liftIO $ atomically $
665-
Extra.whenM (isEmptyTQueue que) $ do
666+
Extra.whenM (isEmptyTaskQueue que) $ do
666667
let newSessionLoadingOptions = SessionLoadingOptions
667668
{ findCradle = cradleLoc
668669
, ..
@@ -682,7 +683,7 @@ loadSessionWithOptions recorder SessionLoadingOptions{..} rootDir que = do
682683
, sessionLoadingOptions = newSessionLoadingOptions
683684
}
684685

685-
writeTQueue que (runReaderT (getOptionsLoop recorder sessionShake sessionState knownTargetsVar) sessionEnv)
686+
writeTaskQueue que (runReaderT (getOptionsLoop recorder sessionShake sessionState knownTargetsVar) sessionEnv)
686687

687688
-- Each one of deps will be registered as a FileSystemWatcher in the GhcSession action
688689
-- so that we can get a workspace/didChangeWatchedFiles notification when a dep changes.
@@ -701,11 +702,15 @@ loadSessionWithOptions recorder SessionLoadingOptions{..} rootDir que = do
701702
lookupOrWaitCache :: Recorder (WithPriority Log) -> SessionState -> FilePath -> IO (IdeResult HscEnvEq, DependencyInfo)
702703
lookupOrWaitCache recorder sessionState absFile = do
703704
let ncfp = toNormalizedFilePath' absFile
704-
cacheResult <- maybe (return Nothing) (guardedA (checkDependencyInfo . snd)) =<< (atomically $ do
705-
-- wait until target file is not in pendingFiles
706-
Extra.whenM (S.lookup absFile (pendingFiles sessionState)) STM.retry
707-
-- check if in the cache
708-
checkInCache sessionState ncfp)
705+
cacheResult <- maybeM
706+
(return Nothing)
707+
(guardedA (checkDependencyInfo . snd))
708+
(atomically $ do
709+
-- wait until target file is not in pendingFiles
710+
Extra.whenM (S.lookup absFile (pendingFiles sessionState)) STM.retry
711+
-- check if in the cache
712+
checkInCache sessionState ncfp)
713+
709714

710715
logWith recorder Debug $ LogLookupSessionCache absFile
711716

ghcide/src/Development/IDE/Core/Compile.hs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ import Development.IDE.Core.Preprocessor
7575
import Development.IDE.Core.ProgressReporting (progressUpdate)
7676
import Development.IDE.Core.RuleTypes
7777
import Development.IDE.Core.Shake
78+
import Development.IDE.Core.WorkerThread (writeTaskQueue)
7879
import Development.IDE.Core.Tracing (withTrace)
7980
import qualified Development.IDE.GHC.Compat as Compat
8081
import qualified Development.IDE.GHC.Compat as GHC
@@ -941,7 +942,7 @@ indexHieFile se mod_summary srcPath !hash hf = do
941942
-- hiedb doesn't use the Haskell src, so we clear it to avoid unnecessarily keeping it around
942943
let !hf' = hf{hie_hs_src = mempty}
943944
modifyTVar' indexPending $ HashMap.insert srcPath hash
944-
writeTQueue indexQueue $ \withHieDb -> do
945+
writeTaskQueue indexQueue $ \withHieDb -> do
945946
-- We are now in the worker thread
946947
-- Check if a newer index of this file has been scheduled, and if so skip this one
947948
newerScheduled <- atomically $ do

ghcide/src/Development/IDE/Core/FileStore.hs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ import Development.IDE.Core.IdeConfiguration (isWorkspaceFile)
4545
import Development.IDE.Core.RuleTypes
4646
import Development.IDE.Core.Shake hiding (Log)
4747
import qualified Development.IDE.Core.Shake as Shake
48+
import Development.IDE.Core.WorkerThread
4849
import Development.IDE.GHC.Orphans ()
4950
import Development.IDE.Graph
5051
import Development.IDE.Import.DependencyInformation
@@ -304,7 +305,7 @@ typecheckParentsAction recorder nfp = do
304305
setSomethingModified :: VFSModified -> IdeState -> String -> IO [Key] -> IO ()
305306
setSomethingModified vfs state reason actionBetweenSession = do
306307
-- Update database to remove any files that might have been renamed/deleted
307-
atomically $ writeTQueue (indexQueue $ hiedbWriter $ shakeExtras state) (\withHieDb -> withHieDb deleteMissingRealFiles)
308+
atomically $ writeTaskQueue (indexQueue $ hiedbWriter $ shakeExtras state) (\withHieDb -> withHieDb deleteMissingRealFiles)
308309
void $ restartShakeSession (shakeExtras state) vfs reason [] actionBetweenSession
309310

310311
registerFileWatches :: [String] -> LSP.LspT Config IO Bool

ghcide/src/Development/IDE/Core/Shake.hs

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ module Development.IDE.Core.Shake(
2525
IdeState, shakeSessionInit, shakeExtras, shakeDb, rootDir,
2626
ShakeExtras(..), getShakeExtras, getShakeExtrasRules,
2727
KnownTargets(..), Target(..), toKnownFiles, unionKnownTargets, mkKnownTargets,
28-
IdeRule, IdeResult,
28+
IdeRule, IdeResult, RestartQueue,
2929
GetModificationTime(GetModificationTime, GetModificationTime_, missingFileDiagnostics),
3030
shakeOpen, shakeShut,
3131
shakeEnqueue,
@@ -262,12 +262,15 @@ data HieDbWriter
262262
-- | Actions to queue up on the index worker thread
263263
-- The inner `(HieDb -> IO ()) -> IO ()` wraps `HieDb -> IO ()`
264264
-- with (currently) retry functionality
265-
type IndexQueue = TQueue (((HieDb -> IO ()) -> IO ()) -> IO ())
265+
type IndexQueue = TaskQueue (((HieDb -> IO ()) -> IO ()) -> IO ())
266+
type RestartQueue = TaskQueue (IO ())
267+
type LoaderQueue = TaskQueue (IO ())
268+
266269

267270
data ThreadQueue = ThreadQueue {
268271
tIndexQueue :: IndexQueue
269-
, tRestartQueue :: TQueue (IO ())
270-
, tLoaderQueue :: TQueue (IO ())
272+
, tRestartQueue :: RestartQueue
273+
, tLoaderQueue :: LoaderQueue
271274
}
272275

273276
-- Note [Semantic Tokens Cache Location]
@@ -338,9 +341,9 @@ data ShakeExtras = ShakeExtras
338341
-- ^ Default HLS config, only relevant if the client does not provide any Config
339342
, dirtyKeys :: TVar KeySet
340343
-- ^ Set of dirty rule keys since the last Shake run
341-
, restartQueue :: TQueue (IO ())
344+
, restartQueue :: RestartQueue
342345
-- ^ Queue of restart actions to be run.
343-
, loaderQueue :: TQueue (IO ())
346+
, loaderQueue :: LoaderQueue
344347
-- ^ Queue of loader actions to be run.
345348
}
346349

ghcide/src/Development/IDE/Core/WorkerThread.hs

Lines changed: 94 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -7,16 +7,39 @@ Description : This module provides an API for managing worker threads in the IDE
77
see Note [Serializing runs in separate thread]
88
-}
99
module Development.IDE.Core.WorkerThread
10-
(withWorkerQueue, awaitRunInThread)
11-
where
10+
( LogWorkerThread (..),
11+
withWorkerQueue,
12+
awaitRunInThread,
13+
TaskQueue,
14+
isEmptyTaskQueue,
15+
writeTaskQueue,
16+
withWorkerQueueSimple
17+
)
18+
where
1219

1320
import Control.Concurrent.Async (withAsync)
1421
import Control.Concurrent.STM
1522
import Control.Concurrent.Strict (newBarrier, signalBarrier,
1623
waitBarrier)
17-
import Control.Exception.Safe (SomeException, throwIO, try)
18-
import Control.Monad (forever)
24+
import Control.Exception.Safe (SomeException, finally, throwIO,
25+
try)
1926
import Control.Monad.Cont (ContT (ContT))
27+
import qualified Data.Text as T
28+
import Ide.Logger
29+
30+
data LogWorkerThread
31+
= LogThreadEnding !T.Text
32+
| LogThreadEnded !T.Text
33+
| LogSingleWorkStarting !T.Text
34+
| LogSingleWorkEnded !T.Text
35+
deriving (Show)
36+
37+
instance Pretty LogWorkerThread where
38+
pretty = \case
39+
LogThreadEnding t -> "Worker thread ending:" <+> pretty t
40+
LogThreadEnded t -> "Worker thread ended:" <+> pretty t
41+
LogSingleWorkStarting t -> "Worker starting a unit of work: " <+> pretty t
42+
LogSingleWorkEnded t -> "Worker ended a unit of work: " <+> pretty t
2043

2144
{-
2245
Note [Serializing runs in separate thread]
@@ -28,30 +51,78 @@ Like the db writes, session loading in session loader, shake session restarts.
2851
Originally we used various ways to implement this, but it was hard to maintain and error prone.
2952
Moreover, we can not stop these threads uniformly when we are shutting down the server.
3053
-}
54+
data TaskQueue a = TaskQueue (TQueue a)
55+
56+
data ExitOrTask t = Exit | Task t
57+
58+
newTaskQueueIO :: IO (TaskQueue a)
59+
newTaskQueueIO = TaskQueue <$> newTQueueIO
60+
61+
-- | 'withWorkerQueueSimple' is a simplified version of 'withWorkerQueue'
62+
-- for the common case where the worker function is just 'id'.
63+
withWorkerQueueSimple :: Recorder (WithPriority LogWorkerThread) -> T.Text -> ContT () IO (TaskQueue (IO ()))
64+
withWorkerQueueSimple recorder title = withWorkerQueue recorder title id
3165

3266
-- | 'withWorkerQueue' creates a new 'TQueue', and launches a worker
3367
-- thread which polls the queue for requests and runs the given worker
3468
-- function on them.
35-
withWorkerQueue :: (t -> IO a) -> ContT () IO (TQueue t)
36-
withWorkerQueue workerAction = ContT $ \mainAction -> do
37-
q <- newTQueueIO
38-
withAsync (writerThread q) $ \_ -> mainAction q
39-
where
40-
writerThread q =
41-
forever $ do
42-
l <- atomically $ readTQueue q
43-
workerAction l
69+
withWorkerQueue :: Recorder (WithPriority LogWorkerThread) -> T.Text -> (t -> IO ()) -> ContT () IO (TaskQueue t)
70+
withWorkerQueue recorder title workerAction = ContT $ \mainAction -> do
71+
q <- newTaskQueueIO
72+
-- Use a TMVar as a stop flag to coordinate graceful shutdown.
73+
-- The worker thread checks this flag before dequeuing each job; if set, it exits immediately,
74+
-- ensuring that no new work is started after shutdown is requested.
75+
-- This mechanism is necessary because some downstream code may swallow async exceptions,
76+
-- making 'cancel' unreliable for stopping the thread in all cases.
77+
-- If 'cancel' does interrupt the thread (e.g., while blocked in STM or in a cooperative job),
78+
-- the thread exits immediately and never checks the TMVar; in such cases, the stop flag is redundant.
79+
b <- newEmptyTMVarIO
80+
withAsync (writerThread q b) $ \_ -> do
81+
mainAction q
82+
-- if we want to debug the exact location the worker swallows an async exception, we can
83+
-- temporarily comment out the `finally` clause.
84+
`finally` atomically (putTMVar b ())
85+
logWith recorder Debug (LogThreadEnding title)
86+
logWith recorder Debug (LogThreadEnded title)
87+
where
88+
writerThread q b =
89+
-- See above: check stop flag before dequeuing, exit if set, otherwise run next job.
90+
do
91+
task <- atomically $ do
92+
task <- tryReadTaskQueue q
93+
isEm <- isEmptyTMVar b
94+
case (isEm, task) of
95+
(False, _) -> return Exit -- stop flag set, exit
96+
(_, Just t) -> return $ Task t -- got a task, run it
97+
(_, Nothing) -> retry -- no task, wait
98+
case task of
99+
Exit -> return ()
100+
Task t -> do
101+
logWith recorder Debug $ LogSingleWorkStarting title
102+
workerAction t
103+
logWith recorder Debug $ LogSingleWorkEnded title
104+
writerThread q b
105+
44106

45107
-- | 'awaitRunInThread' queues up an 'IO' action to be run by a worker thread,
46108
-- and then blocks until the result is computed. If the action throws an
47109
-- non-async exception, it is rethrown in the calling thread.
48-
awaitRunInThread :: TQueue (IO ()) -> IO result -> IO result
49-
awaitRunInThread q act = do
50-
-- Take an action from TQueue, run it and
51-
-- use barrier to wait for the result
52-
barrier <- newBarrier
53-
atomically $ writeTQueue q $ try act >>= signalBarrier barrier
54-
resultOrException <- waitBarrier barrier
55-
case resultOrException of
56-
Left e -> throwIO (e :: SomeException)
57-
Right r -> return r
110+
awaitRunInThread :: TaskQueue (IO ()) -> IO result -> IO result
111+
awaitRunInThread (TaskQueue q) act = do
112+
-- Take an action from TQueue, run it and
113+
-- use barrier to wait for the result
114+
barrier <- newBarrier
115+
atomically $ writeTQueue q (try act >>= signalBarrier barrier)
116+
resultOrException <- waitBarrier barrier
117+
case resultOrException of
118+
Left e -> throwIO (e :: SomeException)
119+
Right r -> return r
120+
121+
writeTaskQueue :: TaskQueue a -> a -> STM ()
122+
writeTaskQueue (TaskQueue q) = writeTQueue q
123+
124+
isEmptyTaskQueue :: TaskQueue a -> STM Bool
125+
isEmptyTaskQueue (TaskQueue q) = isEmptyTQueue q
126+
127+
tryReadTaskQueue :: TaskQueue a -> STM (Maybe a)
128+
tryReadTaskQueue (TaskQueue q) = tryReadTQueue q

0 commit comments

Comments
 (0)