@@ -40,6 +40,21 @@ const (
4040 freezerBatchLimit = 30000
4141)
4242
43+ // freezerCleanupMargin is the number of blocks to keep in the key-value
44+ // database after they have been frozen into the ancient store. This acts
45+ // as a safety margin: after an unclean shutdown, repair() truncates
46+ // unflushed freezer writes. The data still exists in LevelDB and can be
47+ // re-frozen. Without this margin, a crash could leave blocks missing
48+ // from both stores, making the node unable to start (especially for L2
49+ // nodes without peers).
50+ //
51+ // Set to freezerBatchLimit because each freeze cycle writes at most
52+ // freezerBatchLimit blocks to the ancient store. If the node crashes
53+ // before these writes are fsynced (SyncAncient), repair() may truncate
54+ // the unflushed entries on restart. Keeping this many blocks in LevelDB
55+ // ensures the truncated data can be re-frozen.
56+ var freezerCleanupMargin uint64 = freezerBatchLimit
57+
4358// chainFreezer is a wrapper of chain ancient store with additional chain freezing
4459// feature. The background thread will keep moving ancient chain segments from
4560// key-value database to flat files for saving space on live database.
@@ -220,13 +235,37 @@ func (f *chainFreezer) freeze(db ethdb.KeyValueStore) {
220235 if err := f .SyncAncient (); err != nil {
221236 log .Crit ("Failed to flush frozen tables" , "err" , err )
222237 }
223- // Wipe out all data from the active database
238+ // Delete blocks from LevelDB that are safely behind the freeze point.
239+ // Recently-frozen blocks are kept in both stores so that after an
240+ // unclean shutdown, repair()-truncated entries can be re-frozen.
241+ frozen , _ = f .Ancients () // no error will occur, safe to ignore; reload after freezeRange
242+ cleanupStart := uint64 (1 ) // always keep genesis
243+ if prev , ok := ReadFreezerCleanupTail (db ); ok && prev > 1 {
244+ cleanupStart = prev
245+ } else if frozen > freezerCleanupMargin {
246+ // First run with safety margin: prior code already deleted frozen
247+ // blocks from LevelDB immediately, so skip ahead to avoid pointless
248+ // reads of already-deleted blocks.
249+ cleanupStart = frozen - freezerCleanupMargin
250+ WriteFreezerCleanupTail (db , cleanupStart )
251+ }
252+ cleanupLimit := uint64 (0 )
253+ if frozen > freezerCleanupMargin {
254+ cleanupLimit = frozen - freezerCleanupMargin
255+ }
256+ // Cap per-cycle work to avoid stalling when cleanup has a large backlog
257+ // (e.g., first run after upgrade). During catch-up the node keeps more
258+ // blocks in LevelDB than ultimately needed (extra disk, not extra latency).
259+ if cleanupLimit > cleanupStart + freezerBatchLimit {
260+ cleanupLimit = cleanupStart + freezerBatchLimit
261+ }
262+ // Wipe out canonical data from the active database.
224263 batch := db .NewBatch ()
225- for i := 0 ; i < len ( ancients ); i ++ {
226- // Always keep the genesis block in active database
227- if first + uint64 ( i ) != 0 {
228- DeleteBlockWithoutNumber (batch , ancients [ i ], first + uint64 ( i ) )
229- DeleteCanonicalHash (batch , first + uint64 ( i ) )
264+ for number := cleanupStart ; number < cleanupLimit ; number ++ {
265+ hash := ReadCanonicalHash ( nfdb , number )
266+ if hash != (common. Hash {}) {
267+ DeleteBlockWithoutNumber (batch , hash , number )
268+ DeleteCanonicalHash (batch , number )
230269 }
231270 }
232271 if err := batch .Write (); err != nil {
@@ -236,15 +275,11 @@ func (f *chainFreezer) freeze(db ethdb.KeyValueStore) {
236275
237276 // Wipe out side chains also and track dangling side chains
238277 var dangling []common.Hash
239- frozen , _ = f .Ancients () // Needs reload after during freezeRange
240- for number := first ; number < frozen ; number ++ {
241- // Always keep the genesis block in active database
242- if number != 0 {
243- dangling = ReadAllHashes (db , number )
244- for _ , hash := range dangling {
245- log .Trace ("Deleting side chain" , "number" , number , "hash" , hash )
246- DeleteBlock (batch , hash , number )
247- }
278+ for number := cleanupStart ; number < cleanupLimit ; number ++ {
279+ dangling = ReadAllHashes (db , number )
280+ for _ , hash := range dangling {
281+ log .Trace ("Deleting side chain" , "number" , number , "hash" , hash )
282+ DeleteBlock (batch , hash , number )
248283 }
249284 }
250285 if err := batch .Write (); err != nil {
@@ -253,37 +288,38 @@ func (f *chainFreezer) freeze(db ethdb.KeyValueStore) {
253288 batch .Reset ()
254289
255290 // Step into the future and delete any dangling side chains
256- if frozen > 0 {
257- tip := frozen
258- for len (dangling ) > 0 {
259- drop := make (map [common.Hash ]struct {})
260- for _ , hash := range dangling {
261- log .Debug ("Dangling parent from Freezer" , "number" , tip - 1 , "hash" , hash )
262- drop [hash ] = struct {}{}
291+ tip := cleanupLimit
292+ for len (dangling ) > 0 {
293+ drop := make (map [common.Hash ]struct {})
294+ for _ , hash := range dangling {
295+ log .Debug ("Dangling parent from Freezer" , "number" , tip - 1 , "hash" , hash )
296+ drop [hash ] = struct {}{}
297+ }
298+ children := ReadAllHashes (db , tip )
299+ for i := 0 ; i < len (children ); i ++ {
300+ // Dig up the child and ensure it's dangling
301+ child := ReadHeader (nfdb , children [i ], tip )
302+ if child == nil {
303+ log .Error ("Missing dangling header" , "number" , tip , "hash" , children [i ])
304+ continue
263305 }
264- children := ReadAllHashes (db , tip )
265- for i := 0 ; i < len (children ); i ++ {
266- // Dig up the child and ensure it's dangling
267- child := ReadHeader (nfdb , children [i ], tip )
268- if child == nil {
269- log .Error ("Missing dangling header" , "number" , tip , "hash" , children [i ])
270- continue
271- }
272- if _ , ok := drop [child .ParentHash ]; ! ok {
273- children = append (children [:i ], children [i + 1 :]... )
274- i --
275- continue
276- }
277- // Delete all block data associated with the child
278- log .Debug ("Deleting dangling block" , "number" , tip , "hash" , children [i ], "parent" , child .ParentHash )
279- DeleteBlock (batch , children [i ], tip )
306+ if _ , ok := drop [child .ParentHash ]; ! ok {
307+ children = append (children [:i ], children [i + 1 :]... )
308+ i --
309+ continue
280310 }
281- dangling = children
282- tip ++
283- }
284- if err := batch .Write (); err != nil {
285- log .Crit ("Failed to delete dangling side blocks" , "err" , err )
311+ // Delete all block data associated with the child
312+ log .Debug ("Deleting dangling block" , "number" , tip , "hash" , children [i ], "parent" , child .ParentHash )
313+ DeleteBlock (batch , children [i ], tip )
286314 }
315+ dangling = children
316+ tip ++
317+ }
318+ if err := batch .Write (); err != nil {
319+ log .Crit ("Failed to delete dangling side blocks" , "err" , err )
320+ }
321+ if cleanupStart < cleanupLimit {
322+ WriteFreezerCleanupTail (db , cleanupLimit )
287323 }
288324
289325 // Log something friendly for the user
0 commit comments