@@ -35,6 +35,11 @@ const (
3535 // chunkWalkMaxReported truncates per-category result lists so a large file
3636 // with many missing chunks does not flood the log.
3737 chunkWalkMaxReported = 50
38+ // chunkWalkMaxBytes caps the file size for which the local split + chunk
39+ // walk runs. Larger files skip the walk: at ~200 MB the walk takes tens
40+ // of seconds on a healthy cluster and minutes on a slow one; beyond that
41+ // it stops being a useful per-failure diagnostic.
42+ chunkWalkMaxBytes int64 = 200 * 1024 * 1024
3843)
3944
4045// Options represents smoke test options
@@ -201,13 +206,20 @@ func (c *Check) run(ctx context.Context, cluster orchestration.Cluster, o Option
201206
202207 // Pre-compute the chunk address tree locally so we can pin-point a
203208 // missing chunk if download later fails. Deterministic for the same
204- // (data, rLevel) input — matches what bee would produce.
205- splitRoot , allChunks , splitErr := topohealth .SplitChunkAddresses (ctx , txData , rLevel )
206- if splitErr != nil {
207- c .logger .Errorf ("local chunk split failed for size %d: %v" , contentSize , splitErr )
208- allChunks = nil // fall back to root-only diagnostics
209+ // (data, rLevel) input — matches what bee would produce. Skipped
210+ // for files above chunkWalkMaxBytes since the walk grows linearly
211+ // with chunk count and stops being a useful per-failure tool.
212+ var allChunks []topohealth.ChunkInfo
213+ if contentSize <= chunkWalkMaxBytes {
214+ splitRoot , chunks , splitErr := topohealth .SplitChunkAddresses (ctx , txData , rLevel )
215+ if splitErr != nil {
216+ c .logger .Errorf ("local chunk split failed for size %d: %v" , contentSize , splitErr )
217+ } else {
218+ allChunks = chunks
219+ c .logger .Infof ("local split produced %d chunks (root=%s)" , len (allChunks ), splitRoot )
220+ }
209221 } else {
210- c .logger .Infof ("local split produced %d chunks (root=%s) " , len ( allChunks ), splitRoot )
222+ c .logger .Infof ("file size %d > %d (chunkWalkMaxBytes); skipping local split and on-failure chunk walk " , contentSize , chunkWalkMaxBytes )
211223 }
212224
213225 if c .probe (ctx , topohealth .PhasePreUpload , uploader , thresholds ) == topohealth .StatusUnhealthy {
@@ -400,7 +412,8 @@ func (c *Check) onFailureDump(ctx context.Context, cluster orchestration.Cluster
400412 go func () {
401413 defer wg .Done ()
402414 if len (allChunks ) == 0 {
403- c .logger .Warningf ("on_failure: no pre-computed chunk list (split failed earlier); skipping chunk walk" )
415+ // allChunks is empty either because the file size exceeded
416+ // chunkWalkMaxBytes or because the local split errored
404417 return
405418 }
406419 c .walkChunksOnFailure (ctx , cluster , root , allChunks )
0 commit comments