@@ -11,6 +11,7 @@ import (
1111 "github.com/harmony-one/harmony/core"
1212 "github.com/harmony-one/harmony/core/types"
1313 "github.com/harmony-one/harmony/internal/utils"
14+ "github.com/harmony-one/harmony/p2p/stream/common/requestmanager"
1415 syncProto "github.com/harmony-one/harmony/p2p/stream/protocols/sync"
1516 sttypes "github.com/harmony-one/harmony/p2p/stream/types"
1617 "github.com/ledgerwatch/erigon-lib/kv"
@@ -183,20 +184,24 @@ func (b *StageBodies) Exec(ctx context.Context, firstCycle bool, invalidBlockRev
183184 return nil
184185}
185186
186- // IdentifySyncedStreams roughly find the synced streams.
187+ // identifySyncedStreams queries all available streams for their current block number
188+ // and returns those at or above targetHeight.
189+ // Results map: streamID → error (nil = synced, non-nil = failure reason).
190+ // Streams below target or with context errors are not recorded.
191+ // Failed streams are only punished when synced streams exist; otherwise the
192+ // stream pool is preserved to avoid cascading removal during systemic issues.
187193func (b * StageBodies ) identifySyncedStreams (ctx context.Context , s * StageState , targetHeight uint64 , excludeIDs []sttypes.StreamID ) (streams []sttypes.StreamID , err error ) {
194+ results := sttypes .NewSafeMap [sttypes.StreamID , error ]()
188195 var (
189- synced = make ( map [sttypes. StreamID ] uint64 )
190- lock sync. Mutex
191- wg sync. WaitGroup
196+ wg sync. WaitGroup
197+ syncedCount int32
198+ failedCount int32
192199 )
193200
194201 numStreams := b .configs .protocol .NumStreams ()
195202 streamIDs := b .configs .protocol .GetStreamIDs ()
196203
197- // ask all streams for height
198204 for i := 0 ; i < numStreams ; i ++ {
199- // skip excluded streams
200205 excluded := false
201206 if len (excludeIDs ) > 0 {
202207 for _ , excludedStreamID := range excludeIDs {
@@ -222,44 +227,70 @@ func (b *StageBodies) identifySyncedStreams(ctx context.Context, s *StageState,
222227 bn , _ , err = b .configs .protocol .GetCurrentBlockNumber (ctx , syncProto .WithWhitelist ([]sttypes.StreamID {stid }))
223228 }
224229 if err != nil {
225- b .configs .logger .Err (err ).Str ("streamID" , string (stid )).
226- Msg (WrapStagedSyncMsg ("[identifySyncedStreams] getCurrentNumber request failed" ))
227-
228230 if errors .Is (err , context .Canceled ) || errors .Is (err , context .DeadlineExceeded ) {
229- // Do not remove stream when failure is due to context cancelation or deadline; only mark as failed.
230- b .configs .protocol .StreamFailed (stid , "getCurrentNumber request failed" )
231- } else {
232- b .configs .protocol .RemoveStream (stid , "getCurrentNumber request failed" )
231+ return
233232 }
233+ results .Set (stid , err )
234+ atomic .AddInt32 (& failedCount , 1 )
234235 return
235236 }
236237
237- if bn < targetHeight {
238- return
238+ if bn >= targetHeight {
239+ results .Set (stid , nil )
240+ atomic .AddInt32 (& syncedCount , 1 )
239241 }
240-
241- lock .Lock ()
242- synced [stid ] = bn
243- lock .Unlock ()
244242 }(stID , targetHeight )
245243 }
246244
247- // Wait for all goroutines to finish
248245 wg .Wait ()
249246
250- // If no valid block number results were received, return an error
251- if len (synced ) == 0 {
247+ if syncedCount == 0 {
248+ if failedCount > 0 {
249+ b .configs .logger .Warn ().
250+ Int32 ("failedStreams" , failedCount ).
251+ Msg (WrapStagedSyncMsg ("[identifySyncedStreams] no synced streams found; skipping punishment to preserve stream pool" ))
252+ }
252253 return nil , ErrZeroBlockResponse
253254 }
254255
255- // Compute synced streams array
256- for st := range synced {
257- streams = append (streams , st )
258- }
256+ streams = make ([]sttypes.StreamID , 0 , syncedCount )
257+ results .Iterate (func (stid sttypes.StreamID , err error ) {
258+ if err == nil {
259+ streams = append (streams , stid )
260+ } else {
261+ b .handleIdentifyStreamFailure (s , stid , err )
262+ }
263+ })
259264
260265 return streams , nil
261266}
262267
268+ func (b * StageBodies ) handleIdentifyStreamFailure (s * StageState , stid sttypes.StreamID , err error ) {
269+ severity := requestmanager .ClassifyRequestError (err )
270+
271+ switch severity {
272+ case requestmanager .RequestErrorSkip :
273+ b .configs .logger .Debug ().Err (err ).Str ("streamID" , string (stid )).
274+ Msg (WrapStagedSyncMsg ("[identifySyncedStreams] skipping non-stream error" ))
275+
276+ case requestmanager .RequestErrorCritical :
277+ b .configs .logger .Warn ().Err (err ).Str ("streamID" , string (stid )).
278+ Msg (WrapStagedSyncMsg ("[identifySyncedStreams] removing stream due to critical error" ))
279+ if s .state .bnCache != nil {
280+ s .state .bnCache .RemoveStream (stid )
281+ }
282+ b .configs .protocol .RemoveStream (stid , "identifySyncedStreams: critical protocol error" )
283+
284+ default :
285+ b .configs .logger .Info ().Err (err ).Str ("streamID" , string (stid )).
286+ Msg (WrapStagedSyncMsg ("[identifySyncedStreams] marking stream as failed" ))
287+ if s .state .bnCache != nil {
288+ s .state .bnCache .InvalidateStream (stid )
289+ }
290+ b .configs .protocol .StreamFailed (stid , "identifySyncedStreams: request failed" )
291+ }
292+ }
293+
263294func (b * StageBodies ) runDownloadLoop (ctx context.Context , tx kv.RwTx , gbm * downloadManager , s * StageState , wl []sttypes.StreamID , startBlockNumber uint64 , startTime time.Time ) {
264295 currentBlock := startBlockNumber
265296 concurrency := s .state .config .Concurrency
0 commit comments