@@ -48,43 +48,57 @@ func (sc *checkpointCreateImportImpl) Run(ctx context.Context, childEnv *env.Env
4848 return err
4949 }
5050
51+ src := sc .Net .ComputeWorkers ()[0 ]
52+ srcCtrl , err := oasis .NewController (src .SocketPath ())
53+ if err != nil {
54+ return fmt .Errorf ("failed to create controller for the source node: %w" , err )
55+ }
56+
5157 // Use height - 3 so that blocks at h, h+1, h+2 all exist in the block store.
52- blk , err := sc . Net . Controller () .Consensus .GetBlock (ctx , consensus .HeightLatest )
58+ blk , err := srcCtrl .Consensus .GetBlock (ctx , consensus .HeightLatest )
5359 if err != nil {
5460 return fmt .Errorf ("failed to get latest consensus block: %w" , err )
5561 }
56- height := blk .Height - 3
57-
58- rtBlk , err := sc .Net .ClientController ().Roothash .GetLatestBlock (ctx , & roothash.RuntimeRequest {
62+ candidateHeight := blk .Height - 3
63+ rtState , err := srcCtrl .Roothash .GetRuntimeState (ctx , & roothash.RuntimeRequest {
5964 RuntimeID : KeyValueRuntimeID ,
60- Height : height ,
65+ Height : candidateHeight ,
6166 })
6267 if err != nil {
63- return fmt .Errorf ("failed to get runtime block at height %d: %w" , height , err )
68+ return fmt .Errorf ("failed to get runtime state for height %d: %w" , candidateHeight , err )
6469 }
65- round := rtBlk .Header .Round
6670
67- sc . Logger . Info ( "creating checkpoints" ,
68- "height" , height ,
69- "round" , round ,
70- "runtime_id" , KeyValueRuntimeID ,
71- )
71+ // Pick runtime state's LastBlockHeight as the consensus checkpoint height else
72+ // runtime light history indexer might miss authoritative light block for the
73+ // corresponding runtime round.
74+ cpRound := rtState . LastBlock . Header . Round
75+ cpHeight := rtState . LastBlockHeight
7276
73- cpDir := filepath .Join (childEnv .Dir (), "checkpoint" )
77+ // Ensure runtime round is synced before stopping the node and creating a checkpoint for it.
78+ if err := srcCtrl .WaitRuntimeRound (ctx , KeyValueRuntimeID , cpRound ); err != nil {
79+ return fmt .Errorf ("waiting runtime round %d: %w" , cpRound , err )
80+ }
7481
7582 // Stop compute worker 0 (source node).
76- source := sc .Net .ComputeWorkers ()[0 ]
77- if err := source .StopGracefully (); err != nil {
83+ if err := src .StopGracefully (); err != nil {
7884 return fmt .Errorf ("failed to stop source compute worker: %w" , err )
7985 }
8086
8187 // Create checkpoints from the source node's data.
88+ cpDir := filepath .Join (childEnv .Dir (), "checkpoint" )
89+
90+ sc .Logger .Info ("creating checkpoints" ,
91+ "height" , cpHeight ,
92+ "round" , cpRound ,
93+ "runtime_id" , KeyValueRuntimeID ,
94+ )
95+
8296 args := []string {
8397 "storage" , "checkpoint" , "create" ,
84- "--config" , source .ConfigFile (),
85- "--height" , fmt .Sprintf ("%d" , height ),
98+ "--config" , src .ConfigFile (),
99+ "--height" , fmt .Sprintf ("%d" , cpHeight ),
86100 "--runtime" , KeyValueRuntimeID .Hex (),
87- "--round" , fmt .Sprintf ("%d" , round ),
101+ "--round" , fmt .Sprintf ("%d" , cpRound ),
88102 "--output-dir" , cpDir ,
89103 "--debug.dont_blame_oasis" ,
90104 "--debug.allow_test_keys" ,
@@ -96,7 +110,7 @@ func (sc *checkpointCreateImportImpl) Run(ctx context.Context, childEnv *env.Env
96110 sc .Logger .Info ("checkpoints created successfully" )
97111
98112 // Start the source compute worker again.
99- if err := source .Start (); err != nil {
113+ if err := src .Start (); err != nil {
100114 return fmt .Errorf ("failed to restart source compute worker: %w" , err )
101115 }
102116
@@ -135,15 +149,48 @@ func (sc *checkpointCreateImportImpl) Run(ctx context.Context, childEnv *env.Env
135149 return fmt .Errorf ("failed to start target node: %w" , err )
136150 }
137151
138- // Wait for the target node to sync.
139- sc .Logger .Info ("waiting for target node to sync" )
140- ctrl , err := oasis .NewController (target .SocketPath ())
152+ targetCtrl , err := oasis .NewController (target .SocketPath ())
141153 if err != nil {
142154 return fmt .Errorf ("failed to create controller for target node: %w" , err )
143155 }
144- if err := ctrl .WaitReady (ctx ); err != nil {
156+
157+ // Ensure target node syncs to the tip of the chain from the imported checkpoints.
158+ sc .Logger .Info ("waiting for target node to sync" )
159+ if err := targetCtrl .WaitReady (ctx ); err != nil {
145160 return fmt .Errorf ("target node failed to sync: %w" , err )
146161 }
162+ sc .Logger .Info ("target node is ready" )
163+
164+ // Manually ensure that runtime state was synced up to the latest round as
165+ // WaitReady only guarantees consensus sync.
166+ latestBlk , err := sc .Net .ClientController ().Roothash .GetLatestBlock (ctx , & roothash.RuntimeRequest {
167+ RuntimeID : KeyValueRuntimeID ,
168+ Height : consensus .HeightLatest ,
169+ })
170+ if err != nil {
171+ return fmt .Errorf ("failed to get latest runtime block: %w" , err )
172+ }
173+ latestRound := latestBlk .Header .Round
174+ sc .Logger .Info ("waiting the target node to have runtime state synced" )
175+ if err := targetCtrl .WaitRuntimeRound (ctx , KeyValueRuntimeID , latestRound ); err != nil {
176+ return fmt .Errorf ("waiting synced runtime round %d: %w" , latestRound , err )
177+ }
178+ sc .Logger .Info ("target node has runtime state synced" )
179+
180+ // Ensure target synced from the imported checkpoint and not from the genesis.
181+ status , err := targetCtrl .NodeController .GetStatus (ctx )
182+ if err != nil {
183+ return fmt .Errorf ("failed to get target node status: %w" , err )
184+ }
185+ if lastRetainedHeight := status .Consensus .LastRetainedHeight ; lastRetainedHeight != cpHeight {
186+ sc .Logger .Info ("last retained height is not equal to the imported checkpoint height" ,
187+ "cp_height" , cpHeight ,
188+ "last_retained_height" , lastRetainedHeight )
189+ return fmt .Errorf ("failed to ensure consensus synced from the imported checkpoint" )
190+ }
191+ // No need to assert target node didn't sync runtime state from the genesis,
192+ // since runtime genesis sync cannot succeed with a missing runtime light history
193+ // (the case when consensus is synced using an imported checkpoint, asserted above).
147194
148195 return nil
149196}
0 commit comments