Skip to content

Commit e1ec7e3

Browse files
authored
Merge pull request #2104 from IntersectMBO/kderme/integrate-node-10.7-lsm
Integrate lsm
2 parents cf5a2b4 + 546c2e5 commit e1ec7e3

22 files changed

Lines changed: 935 additions & 829 deletions

File tree

cabal.project

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -39,11 +39,6 @@ package cardano-db-tool
3939
package cardano-smash-server
4040
ghc-options: -Wall -Werror -Wredundant-constraints -Wincomplete-uni-patterns -Wincomplete-record-updates -Wpartial-fields -Wunused-imports -Wunused-packages
4141

42-
package blockio
43-
-- Use serial block IO to avoid requiring liburing (not available in devx CI).
44-
-- TODO: revert when CI provides liburing or switch to io_uring for better LSM perf.
45-
flags: +serialblockio
46-
4742
package cardano-node
4843
-- We are using cardano-node as a library and we never use the systemd scribe, so there
4944
-- is no benefit to linking against it

cardano-chain-gen/test/Test/Cardano/Db/Mock/Config.hs

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -281,8 +281,15 @@ withConfig staticDir mutableDir cmdLineArgs config action = do
281281
{-# ANN withConfig ("HLint: ignore Redundant pure" :: String) #-}
282282

283283
mkSyncNodeConfig :: FilePath -> CommandLineArgs -> IO SyncNodeConfig
284-
mkSyncNodeConfig configFilePath cmdLineArgs =
285-
readSyncNodeConfig $ mkConfigFile configDir configFilename
284+
mkSyncNodeConfig configFilePath cmdLineArgs = do
285+
cfg <- readSyncNodeConfig $ mkConfigFile configDir configFilename
286+
-- Allow env-var override of the ledger backend so CI can run the
287+
-- full suite against both "inmemory" and "lsm" without per-test config changes.
288+
mBackend <- lookupEnv "DB_SYNC_TEST_LEDGER_BACKEND"
289+
pure $ case mBackend of
290+
Just "lsm" -> cfg {dncLedgerBackend = LedgerBackendLSM Nothing}
291+
Just "inmemory" -> cfg {dncLedgerBackend = LedgerBackendInMemory}
292+
_ -> cfg
286293
where
287294
configFilename = claConfigFilename cmdLineArgs
288295
configDir = mkConfigDir configFilePath

cardano-db-sync/app/cardano-db-sync.hs

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -67,9 +67,10 @@ dbSyncMain = do
6767

6868
stateDirErrorMsg :: [Char]
6969
stateDirErrorMsg =
70-
"Error: If not using --state-dir then make sure to have ledger disabled. "
71-
<> "For more details view https://github.com/IntersectMBO/cardano-db-sync/blob"
72-
<> "/master/doc/syncing-and-rollbacks.md#ledger-state"
70+
"Error: --state-dir is required when ledger is enabled. "
71+
<> "Either provide --state-dir or set \"ledger\": \"disable\" in the config. "
72+
<> "See https://github.com/IntersectMBO/cardano-db-sync/blob"
73+
<> "/master/doc/configuration.md#ledger"
7374

7475
---------------------------------------------------------------------------------------------------
7576
-- Command Line Configurations

cardano-db-sync/cardano-db-sync.cabal

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,7 @@ library
108108
Cardano.DbSync.Era.Util
109109

110110
Cardano.DbSync.Ledger.Event
111+
Cardano.DbSync.Ledger.Snapshot
111112
Cardano.DbSync.Ledger.State
112113
Cardano.DbSync.Ledger.Types
113114

@@ -190,6 +191,8 @@ library
190191
, either
191192
, extra
192193
, filepath
194+
, fs-api
195+
, lsm-tree
193196
, groups
194197
, hasql
195198
, http-client
@@ -204,13 +207,16 @@ library
204207
, ouroboros-consensus
205208
, ouroboros-consensus:cardano
206209
, ouroboros-consensus:diffusion
210+
, ouroboros-consensus:lsm
211+
, resource-registry
207212
, ouroboros-consensus:protocol
208213
, ouroboros-network:api
209214
, ouroboros-network:framework
210215
, ouroboros-network:protocols
211216
, plutus-ledger-api
212217
, prometheus
213218
, psqueues
219+
, random
214220
, random-shuffle
215221
, scientific
216222
, serialise
@@ -250,7 +256,11 @@ executable cardano-db-sync
250256
-Wno-unsafe
251257
-threaded
252258
-rtsopts
253-
"-with-rtsopts=-A16m -N3 --disable-delayed-os-memory-return"
259+
260+
if arch(arm)
261+
ghc-options: "-with-rtsopts=-T -I0 -A16m -N1 --disable-delayed-os-memory-return"
262+
else
263+
ghc-options: "-with-rtsopts=-T -I0 -A16m -qg1 -qb1 -N2 --disable-delayed-os-memory-return"
254264

255265
autogen-modules: Paths_cardano_db_sync
256266
MigrationValidations
@@ -289,7 +299,11 @@ executable http-get-json-metadata
289299
-Wno-unsafe
290300
-threaded
291301
-rtsopts
292-
"-with-rtsopts=-A16m -N3 --disable-delayed-os-memory-return"
302+
303+
if arch(arm)
304+
ghc-options: "-with-rtsopts=-T -I0 -A16m -N1 --disable-delayed-os-memory-return"
305+
else
306+
ghc-options: "-with-rtsopts=-T -I0 -A16m -qg1 -qb1 -N2 --disable-delayed-os-memory-return"
293307

294308
build-depends: base
295309
, ansi-terminal
@@ -318,7 +332,11 @@ executable test-http-get-json-metadata
318332
-Wno-unsafe
319333
-threaded
320334
-rtsopts
321-
"-with-rtsopts=-A16m -N3 --disable-delayed-os-memory-return"
335+
336+
if arch(arm)
337+
ghc-options: "-with-rtsopts=-T -I0 -A16m -N1 --disable-delayed-os-memory-return"
338+
else
339+
ghc-options: "-with-rtsopts=-T -I0 -A16m -qg1 -qb1 -N2 --disable-delayed-os-memory-return"
322340

323341
build-depends: base
324342
, bytestring

cardano-db-sync/src/Cardano/DbSync.hs

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ import Cardano.Slotting.Slot (EpochNo (..))
4545

4646
import qualified Cardano.Db as DB
4747
import Cardano.DbSync.Api
48-
import Cardano.DbSync.Api.Types (InsertOptions (..), RunMigration, SyncEnv (..), SyncOptions (..), envLedgerEnv)
48+
import Cardano.DbSync.Api.Types (InsertOptions (..), LedgerEnv (..), RunMigration, SyncEnv (..), SyncOptions (..), envLedgerEnv)
4949
import Cardano.DbSync.Config (configureLogging)
5050
import Cardano.DbSync.Config.Cardano
5151
import Cardano.DbSync.Config.Types
@@ -54,6 +54,7 @@ import Cardano.DbSync.DbEvent
5454
import Cardano.DbSync.Era
5555
import Cardano.DbSync.Error
5656
import Cardano.DbSync.Ledger.State
57+
import Cardano.DbSync.Ledger.Types (HasLedgerEnv (..))
5758
import Cardano.DbSync.OffChain (runFetchOffChainPoolThread, runFetchOffChainVoteThread)
5859
import Cardano.DbSync.Rollback (handlePostRollbackSnapshots, unsafeRollback)
5960
import Cardano.DbSync.Sync (runSyncNodeClient)
@@ -248,6 +249,10 @@ runSyncNode metricsSetters trce iomgr dbConnSetting runNearTipMigrationFnc syncN
248249

249250
-- communication channel between datalayer thread and chainsync-client thread
250251
threadChannels <- liftIO newThreadChannels
252+
-- 'finally' on the worker pool ensures the LSM session (and any other
253+
-- backend resources) are closed even when db-sync is cancelled or
254+
-- crashes — important for tests that restart db-sync in the same
255+
-- process and need the OS file lock to be released.
251256
liftIO $
252257
mapConcurrently_
253258
id
@@ -257,6 +262,7 @@ runSyncNode metricsSetters trce iomgr dbConnSetting runNearTipMigrationFnc syncN
257262
, runFetchOffChainVoteThread syncEnv
258263
, runLedgerStateWriteThread (getTrace syncEnv) (envLedgerEnv syncEnv)
259264
]
265+
`finally` closeLedgerEnv syncEnv
260266
)
261267
where
262268
useShelleyInit :: SyncNodeConfig -> Bool
@@ -353,3 +359,14 @@ txOutConfigToTableType config = case config of
353359
TxOutConsumed _ (UseTxOutAddress flag) -> if flag then DB.TxOutVariantAddress else DB.TxOutVariantCore
354360
TxOutConsumedPrune _ (UseTxOutAddress flag) -> if flag then DB.TxOutVariantAddress else DB.TxOutVariantCore
355361
TxOutConsumedBootstrap _ (UseTxOutAddress flag) -> if flag then DB.TxOutVariantAddress else DB.TxOutVariantCore
362+
363+
-- | Release backend resources held by the ledger environment.
364+
-- Currently this closes the LSM session (no-op for InMemory and NoLedger).
365+
closeLedgerEnv :: SyncEnv -> IO ()
366+
closeLedgerEnv syncEnv = case envLedgerEnv syncEnv of
367+
HasLedger le -> do
368+
let trce = leTrace le
369+
logInfo trce "closeLedgerEnv: closing LSM session..."
370+
leClose le
371+
logInfo trce "closeLedgerEnv: closed."
372+
NoLedger _ -> pure ()

cardano-db-sync/src/Cardano/DbSync/Api.hs

Lines changed: 17 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -85,14 +85,14 @@ import Cardano.DbSync.Error
8585
import Cardano.DbSync.Ledger.Event (LedgerEvent (..))
8686
import Cardano.DbSync.Ledger.State (
8787
getHeaderHash,
88-
hashToAnnotation,
8988
listKnownSnapshots,
9089
mkHasLedgerEnv,
9190
)
92-
import Cardano.DbSync.Ledger.Types (HasLedgerEnv (..), LedgerStateFile (..), SnapshotPoint (..))
91+
import Cardano.DbSync.Ledger.Types (HasLedgerEnv (..), SnapshotPoint (..))
9392
import Cardano.DbSync.LocalStateQuery
9493
import Cardano.DbSync.Types
9594
import Cardano.DbSync.Util
95+
import Ouroboros.Consensus.Storage.LedgerDB.Snapshots (DiskSnapshot (..))
9696

9797
setConsistentLevel :: SyncEnv -> ConsistentLevel -> IO ()
9898
setConsistentLevel env cst = do
@@ -341,7 +341,15 @@ mkSyncEnv metricSetters trce dbEnv syncOptions protoInfo nw maxLovelaceSupply nw
341341
else pure useNoCache
342342
consistentLevelVar <- newTVarIO Unchecked
343343
indexesVar <- newTVarIO $ enpForceIndexes syncNP
344-
bts <- getBootstrapInProgress trce (isTxOutConsumedBootstrap' syncNodeConfigFromFile) dbEnv
344+
let bootstrapFlag = isTxOutConsumedBootstrap' syncNodeConfigFromFile
345+
case (bootstrapFlag, dncLedgerBackend syncNodeConfigFromFile) of
346+
(True, LedgerBackendLSM _) ->
347+
DB.logAndThrowIO trce $
348+
"bootstrap-tx-out is not supported with ledger_backend=lsm. "
349+
<> "The bootstrap path reads the full UTxO from the in-memory ledger state, "
350+
<> "which is empty under LSM. Use ledger_backend=inmemory or disable bootstrap."
351+
_ -> pure ()
352+
bts <- getBootstrapInProgress trce bootstrapFlag dbEnv
345353
bootstrapVar <- newTVarIO bts
346354
-- Offline Pool + Anchor queues
347355
opwq <- newTBQueueIO 1000
@@ -363,6 +371,7 @@ mkSyncEnv metricSetters trce dbEnv syncOptions protoInfo nw maxLovelaceSupply nw
363371
maxLovelaceSupply
364372
systemStart
365373
syncOptions
374+
(dncLedgerBackend syncNodeConfigFromFile)
366375
(Nothing, False) -> NoLedger <$> mkNoLedgerEnv trce protoInfo nw systemStart
367376
(Just _, False) -> do
368377
logWarning trce $
@@ -472,11 +481,11 @@ verifySnapshotPoint env snapPoints =
472481
catMaybes <$> mapM validLedgerFileToPoint snapPoints
473482
where
474483
validLedgerFileToPoint :: SnapshotPoint -> IO (Maybe (CardanoPoint, Bool))
475-
validLedgerFileToPoint (OnDisk lsf) = do
476-
hashes <- getSlotHash (envDbEnv env) (lsfSlotNo lsf)
477-
let valid = find (\(_, h) -> lsfHash lsf == hashToAnnotation h) hashes
478-
case valid of
479-
Just (slot, hash) | slot == lsfSlotNo lsf -> pure $ convertToDiskPoint slot hash
484+
validLedgerFileToPoint (OnDisk ds) = do
485+
let slot = SlotNo (dsNumber ds)
486+
hashes <- getSlotHash (envDbEnv env) slot
487+
case hashes of
488+
[(s, _h)] | s == slot -> pure $ convertToDiskPoint slot _h
480489
_ -> pure Nothing
481490
validLedgerFileToPoint (InMemory pnt) = do
482491
case pnt of

cardano-db-sync/src/Cardano/DbSync/Config.hs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,7 @@ coalesceConfig pcfg ncfg adjustGenesisPath = do
8787
, dncInsertOptions = extractInsertOptions pcfg
8888
, dncIpfsGateway = endsInSlash <$> pcIpfsGateway pcfg
8989
, dncSnapshotInterval = pcSnapshotInterval pcfg
90+
, dncLedgerBackend = pcLedgerBackend pcfg
9091
}
9192

9293
mkAdjustPath :: SyncPreConfig -> (FilePath -> FilePath)

cardano-db-sync/src/Cardano/DbSync/Config/Types.hs

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ module Cardano.DbSync.Config.Types (
3030
TxOutConfig (..),
3131
UseTxOutAddress (..),
3232
ForceTxIn (..),
33+
LedgerBackend (..),
3334
LedgerInsertConfig (..),
3435
ShelleyInsertConfig (..),
3536
RewardsConfig (..),
@@ -150,6 +151,7 @@ data SyncNodeConfig = SyncNodeConfig
150151
, dncInsertOptions :: !SyncInsertOptions
151152
, dncIpfsGateway :: [Text]
152153
, dncSnapshotInterval :: !SnapshotIntervalConfig
154+
, dncLedgerBackend :: !LedgerBackend
153155
}
154156

155157
data SyncPreConfig = SyncPreConfig
@@ -163,6 +165,7 @@ data SyncPreConfig = SyncPreConfig
163165
, pcInsertConfig :: !SyncInsertConfig
164166
, pcIpfsGateway :: ![Text]
165167
, pcSnapshotInterval :: !SnapshotIntervalConfig
168+
, pcLedgerBackend :: !LedgerBackend
166169
}
167170
deriving (Show)
168171

@@ -224,6 +227,20 @@ newtype UseTxOutAddress = UseTxOutAddress {unUseTxOutAddress :: Bool}
224227
deriving (Eq, Show)
225228
deriving newtype (ToJSON, FromJSON)
226229

230+
-- | Choose the backend for storing ledger tables (UTxO set).
231+
-- 'LedgerBackendInMemory' keeps everything in RAM (current default).
232+
-- 'LedgerBackendLSM' uses LSM trees on disk for lower memory usage.
233+
data LedgerBackend
234+
= LedgerBackendInMemory
235+
| LedgerBackendLSM (Maybe FilePath)
236+
deriving (Eq, Show)
237+
238+
instance FromJSON LedgerBackend where
239+
parseJSON = Aeson.withText "LedgerBackend" $ \case
240+
"inmemory" -> pure LedgerBackendInMemory
241+
"lsm" -> pure (LedgerBackendLSM Nothing)
242+
other -> fail $ "unexpected ledger_backend: " <> show other <> ". Expected \"inmemory\" or \"lsm\"."
243+
227244
data LedgerInsertConfig
228245
= LedgerEnable
229246
| LedgerDisable
@@ -423,6 +440,7 @@ parseGenSyncNodeConfig o =
423440
<*> o .:? "insert_options" .!= def
424441
<*> o .:? "ipfs_gateway" .!= ["https://ipfs.io/ipfs"]
425442
<*> o .:? "snapshot_interval" .!= def
443+
<*> o .:? "ledger_backend" .!= LedgerBackendInMemory
426444

427445
instance FromJSON SyncProtocol where
428446
parseJSON o =

0 commit comments

Comments
 (0)