Skip to content

Commit c9b2597

Browse files
amesgengeo2a
authored andcommitted
[wip] Integrate Predictable Ledger State Snapshots
1 parent 5ca5ed1 commit c9b2597

6 files changed

Lines changed: 91 additions & 22 deletions

File tree

cardano-node/src/Cardano/Node/Configuration/LedgerDB.hs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -73,8 +73,7 @@ noDeprecatedOptions = DeprecatedOptions []
7373

7474
data LedgerDbConfiguration =
7575
LedgerDbConfiguration
76-
NumOfDiskSnapshots
77-
SnapshotInterval
76+
SnapshotPolicyArgs
7877
QueryBatchSize
7978
LedgerDbSelectorFlag
8079
DeprecatedOptions

cardano-node/src/Cardano/Node/Configuration/POM.hs

Lines changed: 46 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ module Cardano.Node.Configuration.POM
2828
where
2929

3030
import Cardano.Crypto (RequiresNetworkMagic (..))
31+
import Cardano.Ledger.BaseTypes
3132
import Cardano.Logging.Types
3233
import Cardano.Network.ConsensusMode (ConsensusMode (..), defaultConsensusMode)
3334
import qualified Cardano.Network.Diffusion.Configuration as Cardano
@@ -47,8 +48,9 @@ import Ouroboros.Consensus.Node (NodeDatabasePaths (..))
4748
import Ouroboros.Consensus.Node.Genesis (GenesisConfig, GenesisConfigFlags,
4849
defaultGenesisConfigFlags, mkGenesisConfig)
4950
import Ouroboros.Consensus.Storage.LedgerDB.Args (QueryBatchSize (..))
50-
import Ouroboros.Consensus.Storage.LedgerDB.Snapshots (NumOfDiskSnapshots (..),
51-
SnapshotInterval (..))
51+
import Ouroboros.Consensus.Storage.LedgerDB.Snapshots (OverrideOrDefault (..),
52+
SnapshotDelayRange (..), SnapshotFrequency (..), SnapshotFrequencyArgs (..),
53+
SnapshotPolicyArgs (..), defaultSnapshotPolicyArgs)
5254
import Ouroboros.Consensus.Storage.LedgerDB.V1.Args (FlushFrequency (..))
5355
import Ouroboros.Network.Diffusion.Configuration as Configuration
5456
import qualified Ouroboros.Network.Diffusion.Configuration as Ouroboros
@@ -510,8 +512,14 @@ instance FromJSON PartialNodeConfiguration where
510512
Nothing -> return Nothing
511513

512514
parseLedgerDbConfig v = do
513-
let snapInterval x = fmap (RequestedSnapshotInterval . secondsToDiffTime) <$> x .:? "SnapshotInterval"
514-
snapNum x = fmap RequestedNumOfDiskSnapshots <$> x .:? "NumOfDiskSnapshots"
515+
-- TODO maybe don't silently convert old format (which was in seconds)
516+
-- to new format (which is in slots), despite these being the same on
517+
-- mainnet?
518+
let snapInterval x = do
519+
si <- x .:? "SnapshotInterval"
520+
when (any (<= 0) si) $ fail $ "Non-positive SnapshotInterval: " <> show si
521+
pure $ Override . SlotNo <$> si
522+
snapNum x = fmap Override <$> x .:? "NumOfDiskSnapshots"
515523

516524
mTopLevelSnapInterval <- snapInterval v
517525
mTopLevelSnapNum <- snapNum v
@@ -525,12 +533,32 @@ instance FromJSON PartialNodeConfiguration where
525533
mLedgerDB <- v .:? "LedgerDB"
526534
case mLedgerDB of
527535
Nothing -> do
528-
let si = fromMaybe DefaultSnapshotInterval mTopLevelSnapInterval
529-
sn = fromMaybe DefaultNumOfDiskSnapshots mTopLevelSnapNum
530-
return $ Just $ LedgerDbConfiguration sn si DefaultQueryBatchSize V2InMemory deprecatedOpts
536+
let si = fromMaybe UseDefault mTopLevelSnapInterval
537+
sn = fromMaybe UseDefault mTopLevelSnapNum
538+
sf = SnapshotFrequencyArgs {
539+
sfaInterval = unsafeNonZero . unSlotNo <$> si
540+
, sfaOffset = UseDefault
541+
, sfaRateLimit = UseDefault
542+
, sfaDelaySnapshotRange = UseDefault
543+
}
544+
spArgs = SnapshotPolicyArgs (SnapshotFrequency sf) sn
545+
return $ Just $ LedgerDbConfiguration spArgs DefaultQueryBatchSize V2InMemory deprecatedOpts
531546
Just ledgerDB -> flip (withObject "LedgerDB") ledgerDB $ \o -> do
532-
ldbSnapInterval <- (getLast . (Last mTopLevelSnapInterval <>) . Last <$> snapInterval o) .!= DefaultSnapshotInterval
533-
ldbSnapNum <- (getLast . (Last mTopLevelSnapNum <>) . Last <$> snapNum o) .!= DefaultNumOfDiskSnapshots
547+
ldbSnapInterval <- (getLast . (Last mTopLevelSnapInterval <>) . Last <$> snapInterval o) .!= UseDefault
548+
ldbSnapNum <- (getLast . (Last mTopLevelSnapNum <>) . Last <$> snapNum o) .!= UseDefault
549+
ldbSnapOffset <- (fmap Override <$> o .:? "SlotOffset") .!= UseDefault
550+
ldbSnapRateLimit<- (fmap (Override . secondsToDiffTime) <$> o .:? "RateLimit") .!= UseDefault
551+
ldbSnapMinDelay <- o .:? "MinDelay"
552+
ldbSnapMaxDelay <- o .:? "MaxDelay"
553+
ldbSnapDelayRange <-
554+
case (ldbSnapMinDelay, ldbSnapMaxDelay) of
555+
(Just minDelay, Just maxDelay) ->
556+
if minDelay <= maxDelay then
557+
pure (Override (SnapshotDelayRange (secondsToDiffTime minDelay) (secondsToDiffTime maxDelay)))
558+
else fail $ "Invalid ledger snapshot delay range, MinDelay > MaxDelay: "
559+
<> show minDelay <> " > " <> show maxDelay
560+
-- use the default delay range if either min or max is unspecified
561+
_ -> pure UseDefault
534562
qsize <- (fmap RequestedQueryBatchSize <$> o .:? "QueryBatchSize") .!= DefaultQueryBatchSize
535563
backend <- o .:? "Backend" .!= "V2InMemory"
536564
selector <- case backend of
@@ -545,7 +573,14 @@ instance FromJSON PartialNodeConfiguration where
545573
lsmPath :: Maybe FilePath <- o .:? "LSMDatabasePath"
546574
pure $ V2LSM lsmPath
547575
_ -> fail $ "Malformed LedgerDB Backend: " <> backend
548-
pure $ Just $ LedgerDbConfiguration ldbSnapNum ldbSnapInterval qsize selector deprecatedOpts
576+
let sf = SnapshotFrequencyArgs {
577+
sfaInterval = unsafeNonZero . unSlotNo <$> ldbSnapInterval
578+
, sfaOffset = ldbSnapOffset
579+
, sfaRateLimit = ldbSnapRateLimit
580+
, sfaDelaySnapshotRange = ldbSnapDelayRange
581+
}
582+
spArgs = SnapshotPolicyArgs (SnapshotFrequency sf) ldbSnapNum
583+
pure $ Just $ LedgerDbConfiguration spArgs qsize selector deprecatedOpts
549584

550585
parseByronProtocol v = do
551586
primary <- v .:? "ByronGenesisFile"
@@ -712,8 +747,7 @@ defaultPartialNodeConfiguration =
712747
, pncLedgerDbConfig =
713748
Last $ Just $
714749
LedgerDbConfiguration
715-
DefaultNumOfDiskSnapshots
716-
DefaultSnapshotInterval
750+
defaultSnapshotPolicyArgs
717751
DefaultQueryBatchSize
718752
V2InMemory
719753
noDeprecatedOptions

cardano-node/src/Cardano/Node/Run.hs

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -656,15 +656,11 @@ handleSimpleNode blockType runP tracers nc networkMagic onKernel = do
656656
Just version_ -> Map.takeWhileAntitone (<= version_)
657657

658658
LedgerDbConfiguration
659-
snapInterval
660-
numSnaps
659+
snapshotPolicyArgs
661660
queryBatchSize
662661
ldbBackend
663662
deprecatedOpts = ncLedgerDbConfig nc
664663

665-
snapshotPolicyArgs :: SnapshotPolicyArgs
666-
snapshotPolicyArgs = SnapshotPolicyArgs numSnaps snapInterval
667-
668664
--------------------------------------------------------------------------------
669665
-- SIGHUP Handlers
670666
--------------------------------------------------------------------------------

cardano-node/src/Cardano/Node/Tracing/Tracers/ChainDB.hs

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ import Ouroboros.Network.Block (MaxSlotNo (..))
5959
import Data.Aeson (Object, Value (String), object, toJSON, (.=))
6060
import qualified Data.ByteString.Base16 as B16
6161
import Data.Int (Int64)
62+
import qualified Data.List.NonEmpty as NonEmpty
6263
import Data.SOP (All, K (..), hcmap, hcollapse)
6364
import Data.Text (Text)
6465
import qualified Data.Text as Text
@@ -1749,6 +1750,14 @@ instance ( StandardHash blk
17491750
LedgerDB.MetadataBackendMismatch ->
17501751
" Snapshot was created for a different backend. Convert it with `snapshot-converter`."
17511752
_ -> ""
1753+
forHuman (LedgerDB.SnapshotRequestDelayed _snapshotRequestTime delayBeforeSnapshotting slots) =
1754+
Text.unwords ["Scheduling to take ledger state snapshots at slots "
1755+
, showT (NonEmpty.toList slots)
1756+
, ", with a randomised delay of"
1757+
, showT delayBeforeSnapshotting
1758+
]
1759+
forHuman (LedgerDB.SnapshotRequestCompleted) = "Completed taking a ledger state snapshot"
1760+
17521761

17531762
forMachine dtals (LedgerDB.TookSnapshot snap pt enclosedTiming) =
17541763
mconcat [ "kind" .= String "TookSnapshot"
@@ -1763,11 +1772,23 @@ instance ( StandardHash blk
17631772
mconcat [ "kind" .= String "InvalidSnapshot"
17641773
, "snapshot" .= forMachine dtals snap
17651774
, "failure" .= show failure ]
1775+
forMachine _dtals (LedgerDB.SnapshotRequestDelayed snapshotRequestTime delayBeforeSnapshotting slots) =
1776+
mconcat [ "kind" .= String "TraceLedgerDBEvent.LedgerDBSnapshotEvent.SnapshotRequestDelayed"
1777+
, "requestTime" .= show snapshotRequestTime
1778+
, "delayBeforeSnapshotting " .= show delayBeforeSnapshotting
1779+
, "slots" .= show slots
1780+
]
1781+
forMachine _dtals (LedgerDB.SnapshotRequestCompleted) =
1782+
mconcat [ "kind" .= String "TraceLedgerDBEvent.LedgerDBSnapshotEvent.SnapshotRequestCompleted"
1783+
]
1784+
17661785

17671786
instance MetaTrace (LedgerDB.TraceSnapshotEvent blk) where
17681787
namespaceFor LedgerDB.TookSnapshot {} = Namespace [] ["TookSnapshot"]
17691788
namespaceFor LedgerDB.DeletedSnapshot {} = Namespace [] ["DeletedSnapshot"]
17701789
namespaceFor LedgerDB.InvalidSnapshot {} = Namespace [] ["InvalidSnapshot"]
1790+
namespaceFor LedgerDB.SnapshotRequestDelayed {} = Namespace [] ["SnapshotRequestDelayed"]
1791+
namespaceFor LedgerDB.SnapshotRequestCompleted {} = Namespace [] ["SnapshotRequestCompleted"]
17711792

17721793
severityFor (Namespace _ ["TookSnapshot"]) _ = Just Info
17731794
severityFor (Namespace _ ["DeletedSnapshot"]) _ = Just Debug
@@ -1786,6 +1807,10 @@ instance MetaTrace (LedgerDB.TraceSnapshotEvent blk) where
17861807
, " seems to be from an old node or different backend, it will"
17871808
, " be deleted"
17881809
]
1810+
documentFor (Namespace _ ["SnapshotRequestDelayed"]) = Just
1811+
"A delayed snapshot requested was issued. The snapshot will be initiated at the specified timestamp, with the specified delay and for the specified slots"
1812+
documentFor (Namespace _ ["SnapshotRequestCompleted"]) = Just
1813+
"The delayed snapshot request was completed"
17891814
documentFor _ = Nothing
17901815

17911816
allNamespaces =

cardano-node/src/Cardano/Tracing/OrphanInstances/Consensus.hs

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,7 @@ import Data.Aeson (Value (..))
9191
import qualified Data.Aeson as Aeson
9292
import Data.Foldable (Foldable (..))
9393
import Data.Function (on)
94+
import qualified Data.List.NonEmpty as NonEmpty
9495
import Data.Proxy
9596
import Data.Text (Text, pack)
9697
import qualified Data.Text as Text
@@ -185,6 +186,8 @@ instance HasSeverityAnnotation (ChainDB.TraceEvent blk) where
185186
LedgerDB.InitFailureRead (LedgerDB.ReadMetadataError _ LedgerDB.MetadataBackendMismatch) -> Warning
186187
LedgerDB.InitFailureRead (LedgerDB.ReadMetadataError _ LedgerDB.MetadataFileDoesNotExist) -> Warning
187188
_ -> Error
189+
LedgerDB.SnapshotRequestDelayed {} -> Info
190+
LedgerDB.SnapshotRequestCompleted -> Info
188191
LedgerDB.LedgerReplayEvent {} -> Info
189192
LedgerDB.LedgerDBForkerEvent {} -> Debug
190193
LedgerDB.LedgerDBFlavorImplEvent {} -> Debug
@@ -628,6 +631,11 @@ instance ( ConvertRawHash blk
628631
", duration: " <> showT t
629632
LedgerDB.DeletedSnapshot snap ->
630633
"Deleted old snapshot " <> showT snap
634+
LedgerDB.SnapshotRequestDelayed _snapshotRequestTime delayBeforeSnapshotting slots ->
635+
"Scheduling to take ledger state snapshots at slots " <> showT (NonEmpty.toList slots)
636+
<> ", with randomised delay of"
637+
<> showT delayBeforeSnapshotting
638+
LedgerDB.SnapshotRequestCompleted -> "Completed taking a ledger state snapshot"
631639
LedgerDB.LedgerReplayEvent ev' -> case ev' of
632640
LedgerDB.TraceReplayStartEvent ev'' -> case ev'' of
633641
LedgerDB.ReplayFromGenesis ->
@@ -1104,6 +1112,14 @@ instance ( ConvertRawHash blk
11041112
mconcat [ "kind" .= String "TraceLedgerDBEvent.LedgerDBSnapshotEvent.InvalidSnapshot"
11051113
, "snapshot" .= toObject verb snap
11061114
, "failure" .= show failure ]
1115+
LedgerDB.SnapshotRequestDelayed snapshotRequestTime delayBeforeSnapshotting slots ->
1116+
mconcat [ "kind" .= String "TraceLedgerDBEvent.LedgerDBSnapshotEvent.SnapshotRequestDelayed"
1117+
, "requestTime" .= show snapshotRequestTime
1118+
, "delayBeforeSnapshotting " .= show delayBeforeSnapshotting
1119+
, "slots" .= show slots]
1120+
LedgerDB.SnapshotRequestCompleted ->
1121+
mconcat [ "kind" .= String "TraceLedgerDBEvent.LedgerDBSnapshotEvent.SnapshotRequestCompleted"
1122+
]
11071123
LedgerDB.LedgerReplayEvent ev' -> case ev' of
11081124
LedgerDB.TraceReplayStartEvent ev'' -> case ev'' of
11091125
LedgerDB.ReplayFromGenesis ->

cardano-node/test/Test/Cardano/Node/POM.hs

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,7 @@ import Cardano.Tracing.Config (PartialTraceOptions (..), defaultPartia
2424
import Ouroboros.Consensus.Node (NodeDatabasePaths (..))
2525
import Ouroboros.Consensus.Node.Genesis (disableGenesisConfig)
2626
import Ouroboros.Consensus.Storage.LedgerDB.Args
27-
import Ouroboros.Consensus.Storage.LedgerDB.Snapshots (NumOfDiskSnapshots (..),
28-
SnapshotInterval (..))
27+
import Ouroboros.Consensus.Storage.LedgerDB.Snapshots (defaultSnapshotPolicyArgs)
2928
import Ouroboros.Network.Block (SlotNo (..))
3029
import Ouroboros.Network.PeerSelection.PeerSharing (PeerSharing (..))
3130
import Ouroboros.Network.TxSubmission.Inbound.V2.Types
@@ -297,7 +296,7 @@ eExpectedConfig = do
297296
, ncConsensusMode = PraosMode
298297
, ncGenesisConfig = disableGenesisConfig
299298
, ncResponderCoreAffinityPolicy = NoResponderCoreAffinity
300-
, ncLedgerDbConfig = LedgerDbConfiguration DefaultNumOfDiskSnapshots DefaultSnapshotInterval DefaultQueryBatchSize V2InMemory noDeprecatedOptions
299+
, ncLedgerDbConfig = LedgerDbConfiguration defaultSnapshotPolicyArgs DefaultQueryBatchSize V2InMemory noDeprecatedOptions
301300
, ncRpcConfig
302301
, ncTxSubmissionLogicVersion = TxSubmissionLogicV1
303302
, ncTxSubmissionInitDelay = defaultTxSubmissionInitDelay

0 commit comments

Comments
 (0)