Skip to content

Commit c683a14

Browse files
feat(smoke): cap local chunk split
1 parent b52e93a commit c683a14

1 file changed

Lines changed: 20 additions & 7 deletions

File tree

pkg/check/smoke/smoke.go

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,11 @@ const (
3535
// chunkWalkMaxReported truncates per-category result lists so a large file
3636
// with many missing chunks does not flood the log.
3737
chunkWalkMaxReported = 50
38+
// chunkWalkMaxBytes caps the file size for which the local split + chunk
39+
// walk runs. Larger files skip the walk: at ~200 MB the walk takes tens
40+
// of seconds on a healthy cluster and minutes on a slow one; beyond that
41+
// it stops being a useful per-failure diagnostic.
42+
chunkWalkMaxBytes int64 = 200 * 1024 * 1024
3843
)
3944

4045
// Options represents smoke test options
@@ -201,13 +206,20 @@ func (c *Check) run(ctx context.Context, cluster orchestration.Cluster, o Option
201206

202207
// Pre-compute the chunk address tree locally so we can pin-point a
203208
// missing chunk if download later fails. Deterministic for the same
204-
// (data, rLevel) input — matches what bee would produce.
205-
splitRoot, allChunks, splitErr := topohealth.SplitChunkAddresses(ctx, txData, rLevel)
206-
if splitErr != nil {
207-
c.logger.Errorf("local chunk split failed for size %d: %v", contentSize, splitErr)
208-
allChunks = nil // fall back to root-only diagnostics
209+
// (data, rLevel) input — matches what bee would produce. Skipped
210+
// for files above chunkWalkMaxBytes since the walk grows linearly
211+
// with chunk count and stops being a useful per-failure tool.
212+
var allChunks []topohealth.ChunkInfo
213+
if contentSize <= chunkWalkMaxBytes {
214+
splitRoot, chunks, splitErr := topohealth.SplitChunkAddresses(ctx, txData, rLevel)
215+
if splitErr != nil {
216+
c.logger.Errorf("local chunk split failed for size %d: %v", contentSize, splitErr)
217+
} else {
218+
allChunks = chunks
219+
c.logger.Infof("local split produced %d chunks (root=%s)", len(allChunks), splitRoot)
220+
}
209221
} else {
210-
c.logger.Infof("local split produced %d chunks (root=%s)", len(allChunks), splitRoot)
222+
c.logger.Infof("file size %d > %d (chunkWalkMaxBytes); skipping local split and on-failure chunk walk", contentSize, chunkWalkMaxBytes)
211223
}
212224

213225
if c.probe(ctx, topohealth.PhasePreUpload, uploader, thresholds) == topohealth.StatusUnhealthy {
@@ -400,7 +412,8 @@ func (c *Check) onFailureDump(ctx context.Context, cluster orchestration.Cluster
400412
go func() {
401413
defer wg.Done()
402414
if len(allChunks) == 0 {
403-
c.logger.Warningf("on_failure: no pre-computed chunk list (split failed earlier); skipping chunk walk")
415+
// allChunks is empty either because the file size exceeded
416+
// chunkWalkMaxBytes or because the local split errored
404417
return
405418
}
406419
c.walkChunksOnFailure(ctx, cluster, root, allChunks)

0 commit comments

Comments
 (0)