Skip to content

Commit 187fd23

Browse files
debug deadlock
1 parent 906da42 commit 187fd23

3 files changed

Lines changed: 55 additions & 8 deletions

File tree

src/Simplex/Messaging/Agent.hs

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -145,10 +145,12 @@ module Simplex.Messaging.Agent
145145
where
146146

147147
import Control.Applicative ((<|>))
148+
import Control.Concurrent (myThreadId)
148149
import Control.Concurrent.STM (retry)
149150
import Control.Logger.Simple
150151
import Control.Monad
151152
import Control.Monad.Except
153+
import Debug.Trace (traceIO)
152154
import Control.Monad.Reader
153155
import Control.Monad.Trans.Except
154156
import Crypto.Random (ChaChaDRG)
@@ -1841,6 +1843,9 @@ sendMessagesB_ c reqs connIds = withConnLocks c connIds "sendMessages" $ do
18411843

18421844
enqueueCommand :: AgentClient -> ACorrId -> ConnId -> Maybe SMPServer -> AgentCommand -> AM ()
18431845
enqueueCommand c corrId connId server aCommand = do
1846+
liftIO $ do
1847+
tid <- myThreadId
1848+
traceIO $ "ENQUEUE_CMD connId=" <> show connId <> " cmd=" <> show (agentCommandTag aCommand) <> " tid=" <> show tid
18441849
withStore c $ \db -> createCommand db corrId connId server aCommand
18451850
lift . void $ getAsyncCmdWorker True c connId server
18461851

@@ -1869,7 +1874,13 @@ runCommandProcessing c connId server_ Worker {doWork} = do
18691874
ri <- asks $ messageRetryInterval . config -- different retry interval?
18701875
forever $ do
18711876
endAgentOp c AOSndNetwork
1877+
liftIO $ do
1878+
tid <- myThreadId
1879+
traceIO $ "ASYNC_CMD wait connId=" <> show connId <> " tid=" <> show tid
18721880
lift $ waitForWork doWork
1881+
liftIO $ do
1882+
tid <- myThreadId
1883+
traceIO $ "ASYNC_CMD woke connId=" <> show connId <> " tid=" <> show tid
18731884
liftIO $ throwWhenInactive c
18741885
atomically $ beginAgentOperation c AOSndNetwork
18751886
withWork c doWork (\db -> getPendingServerCommand db connId server_) $ runProcessCmd (riFast ri)
@@ -2161,7 +2172,10 @@ getDeliveryWorker hasWork c sq =
21612172
pure (w, retryLock)
21622173

21632174
submitPendingMsg :: AgentClient -> SndQueue -> AM' ()
2164-
submitPendingMsg c sq = do
2175+
submitPendingMsg c sq@SndQueue {connId = sqConnId} = do
2176+
liftIO $ do
2177+
tid <- myThreadId
2178+
traceIO $ "SUBMIT_PENDING connId=" <> show sqConnId <> " tid=" <> show tid
21652179
atomically $ modifyTVar' (msgDeliveryOp c) $ \s -> s {opsInProgress = opsInProgress s + 1}
21662180
void $ getDeliveryWorker True c sq
21672181

@@ -2170,12 +2184,21 @@ runSmpQueueMsgDelivery c sq@SndQueue {userId, connId, server, queueMode} (Worker
21702184
AgentConfig {messageRetryInterval = ri, messageTimeout, helloTimeout, quotaExceededTimeout} <- asks config
21712185
forever $ do
21722186
endAgentOp c AOSndNetwork
2187+
liftIO $ do
2188+
tid <- myThreadId
2189+
traceIO $ "DELIVERY wait connId=" <> show connId <> " tid=" <> show tid
21732190
lift $ waitForWork doWork
2191+
liftIO $ do
2192+
tid <- myThreadId
2193+
traceIO $ "DELIVERY woke connId=" <> show connId <> " tid=" <> show tid
21742194
liftIO $ throwWhenInactive c
21752195
liftIO $ throwWhenNoDelivery c sq
21762196
atomically $ beginAgentOperation c AOSndNetwork
21772197
withWork c doWork (\db -> getPendingQueueMsg db connId sq) $
21782198
\(rq_, PendingMsgData {msgId, msgType, msgBody, pqEncryption, msgFlags, msgRetryState, internalTs, internalSndId, prevMsgHash, pendingMsgPrepData_}) -> do
2199+
liftIO $ do
2200+
tid <- myThreadId
2201+
traceIO $ "DELIVERY send connId=" <> show connId <> " msgType=" <> show msgType <> " tid=" <> show tid
21792202
endAgentOp c AOMsgDelivery -- this operation begins in submitPendingMsg
21802203
let mId = unId msgId
21812204
ri' = maybe id updateRetryInterval2 msgRetryState ri

src/Simplex/Messaging/Agent/Client.hs

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -195,7 +195,8 @@ module Simplex.Messaging.Agent.Client
195195
where
196196

197197
import Control.Applicative ((<|>))
198-
import Control.Concurrent (ThreadId, killThread)
198+
import Control.Concurrent (ThreadId, killThread, myThreadId)
199+
import Debug.Trace (traceIO)
199200
import Control.Concurrent.Async (Async, uninterruptibleCancel)
200201
import Control.Concurrent.STM (retry)
201202
import Control.Exception (AsyncException (..), BlockedIndefinitelyOnSTM (..))
@@ -1077,9 +1078,11 @@ nonBlockingNotifyEvent :: AgentClient -> ATransmission -> IO ()
10771078
nonBlockingNotifyEvent = notifyEvent_ nonBlockingWriteTBQueue
10781079

10791080
notifyEvent_ :: (TBQueue ATransmission -> ATransmission -> IO ()) -> AgentClient -> ATransmission -> IO ()
1080-
notifyEvent_ write c t@(_, connId, _)
1081+
notifyEvent_ write c t@(_, connId, AEvt _ evt)
10811082
| B.null connId = write (generalQ c) t
10821083
| otherwise = do
1084+
tid <- myThreadId
1085+
traceIO $ "NOTIFY connId=" <> show connId <> " evt=" <> show (aEventTag evt) <> " tid=" <> show tid
10831086
q <- getOrCreateConnWorker c connId
10841087
write q t
10851088

@@ -1096,9 +1099,12 @@ getOrCreateConnWorker c@AgentClient {connWorkers, connWorkerSeq} connId = do
10961099

10971100
connWorkerLoop :: AgentClient -> TBQueue ATransmission -> IO ()
10981101
connWorkerLoop AgentClient {processEvent} q = forever $ do
1099-
t <- atomically $ readTBQueue q
1102+
t@(_, connId, _) <- atomically $ readTBQueue q
1103+
tid <- myThreadId
1104+
traceIO $ "CALLBACK fire connId=" <> show connId <> " tid=" <> show tid
11001105
processEvent t `E.catchAny` \e ->
11011106
logError $ "connWorkerLoop error: " <> tshow e
1107+
traceIO $ "CALLBACK done connId=" <> show connId <> " tid=" <> show tid
11021108

11031109
withInvLock :: AgentClient -> ByteString -> Text -> AM a -> AM a
11041110
withInvLock c key name = ExceptT . withInvLock' c key name . runExceptT

src/Simplex/Messaging/Agent/Lock.hs

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,16 @@ module Simplex.Messaging.Agent.Lock
1010
)
1111
where
1212

13+
import Control.Concurrent (myThreadId)
1314
import Control.Monad (void)
1415
import Control.Monad.Except (ExceptT (..), runExceptT)
1516
import Control.Monad.IO.Unlift
1617
import Data.Functor (($>))
1718
import Data.Set (Set)
1819
import qualified Data.Set as S
1920
import Data.Text (Text)
21+
import qualified Data.Text as T
22+
import Debug.Trace (traceIO)
2023
import UnliftIO.Async (forConcurrently)
2124
import qualified UnliftIO.Exception as E
2225
import UnliftIO.STM
@@ -44,15 +47,30 @@ withLock' lock name =
4447
withGetLock :: MonadUnliftIO m => (k -> STM Lock) -> k -> Text -> m a -> m a
4548
withGetLock getLock key name a =
4649
E.bracket
47-
(atomically $ getPutLock getLock key name)
48-
(atomically . takeTMVar)
50+
(do liftIO $ do
51+
tid <- myThreadId
52+
traceIO $ "LOCK acquire " <> T.unpack name <> " tid=" <> show tid
53+
atomically $ getPutLock getLock key name)
54+
(\l -> do
55+
atomically $ takeTMVar l
56+
liftIO $ do
57+
tid <- myThreadId
58+
traceIO $ "LOCK release " <> T.unpack name <> " tid=" <> show tid)
4959
(const a)
5060

5161
withGetLocks :: MonadUnliftIO m => (k -> STM Lock) -> Set k -> Text -> m a -> m a
5262
withGetLocks getLock keys name = E.bracket holdLocks releaseLocks . const
5363
where
54-
holdLocks = forConcurrently (S.toList keys) $ \key -> atomically $ getPutLock getLock key name
55-
releaseLocks = mapM_ (atomically . takeTMVar)
64+
holdLocks = do
65+
liftIO $ do
66+
tid <- myThreadId
67+
traceIO $ "LOCKS acquire " <> T.unpack name <> " (" <> show (S.size keys) <> " keys) tid=" <> show tid
68+
forConcurrently (S.toList keys) $ \key -> atomically $ getPutLock getLock key name
69+
releaseLocks ls = do
70+
mapM_ (atomically . takeTMVar) ls
71+
liftIO $ do
72+
tid <- myThreadId
73+
traceIO $ "LOCKS release " <> T.unpack name <> " (" <> show (S.size keys) <> " keys) tid=" <> show tid
5674

5775
-- getLock and putTMVar can be in one transaction on the assumption that getLock doesn't write in case the lock already exists,
5876
-- and in case it is created and added to some shared resource (we use TMap) it also helps avoid contention for the newly created lock.

0 commit comments

Comments
 (0)