11import { AdaptiveTimeout , Queue } from '@libp2p/utils'
22import drain from 'it-drain'
33import { TypedEventEmitter , setMaxListeners } from 'main-event'
4- import { PROVIDERS_VALIDITY , REPROVIDE_CONCURRENCY , REPROVIDE_INTERVAL , REPROVIDE_MAX_QUEUE_SIZE , REPROVIDE_THRESHOLD , REPROVIDE_TIMEOUT } from './constants.ts'
4+ import { PROVIDERS_VALIDITY , REPROVIDE_CONCURRENCY , REPROVIDE_INTERVAL , REPROVIDE_MAX_QUEUE_SIZE , REPROVIDE_SORT_BATCH_SIZE , REPROVIDE_THRESHOLD , REPROVIDE_TIMEOUT } from './constants.ts'
55import { convertBuffer , parseProviderKey , readProviderTime , timeOperationMethod } from './utils.ts'
66import type { ContentRouting } from './content-routing/index.ts'
77import type { OperationMetrics } from './kad-dht.ts'
@@ -27,6 +27,7 @@ export interface ReproviderInit {
2727 operationMetrics : OperationMetrics
2828 concurrency ?: number
2929 maxQueueSize ?: number
30+ sortBatchSize ?: number
3031 threshold ?: number
3132 validity ?: number
3233 interval ?: number
@@ -52,6 +53,7 @@ export class Reprovider extends TypedEventEmitter<ReprovideEvents> {
5253 private running : boolean
5354 private shutdownController ?: AbortController
5455 private readonly reprovideThreshold : number
56+ private readonly sortBatchSize : number
5557 private readonly contentRouting : ContentRouting
5658 private readonly datastorePrefix : string
5759 private readonly addressManager : AddressManager
@@ -78,6 +80,7 @@ export class Reprovider extends TypedEventEmitter<ReprovideEvents> {
7880 this . addressManager = components . addressManager
7981 this . datastorePrefix = `${ init . datastorePrefix } /provider`
8082 this . reprovideThreshold = init . threshold ?? REPROVIDE_THRESHOLD
83+ this . sortBatchSize = init . sortBatchSize ?? REPROVIDE_SORT_BATCH_SIZE
8184 this . maxQueueSize = init . maxQueueSize ?? REPROVIDE_MAX_QUEUE_SIZE
8285 this . validity = init . validity ?? PROVIDERS_VALIDITY
8386 this . interval = init . interval ?? REPROVIDE_INTERVAL
@@ -121,14 +124,18 @@ export class Reprovider extends TypedEventEmitter<ReprovideEvents> {
121124 * reprovided consecutively. Since nearby CIDs in the keyspace share the
122125 * same K closest peers, connections opened for one CID are likely to be
123126 * reused for the next, reducing the number of new dials per reprovide run.
127+ *
128+ * To avoid unbounded memory growth, CIDs are sorted and queued in batches
129+ * of at most `sortBatchSize` entries.
124130 */
125131 private async processRecords ( options ?: AbortOptions ) : Promise < void > {
126132 try {
127133 this . safeDispatchEvent ( 'reprovide:start' )
128134 this . log ( 'starting reprovide/cleanup' )
129135
130- // collect CIDs that need reproviding so we can sort them before queueing
131- const toReprovide : CID [ ] = [ ]
136+ // Accumulate CIDs for batched Kademlia-key sorting. Flushed every
137+ // sortBatchSize entries so the array never grows without bound.
138+ const batch : CID [ ] = [ ]
132139
133140 // Get all provider entries from the datastore
134141 for await ( const entry of this . datastore . query ( {
@@ -155,40 +162,19 @@ export class Reprovider extends TypedEventEmitter<ReprovideEvents> {
155162 // collect for reproviding
156163 if ( this . shouldReprovide ( isSelf , expires ) ) {
157164 this . log ( 'scheduling reprovide of %c' , cid )
158- toReprovide . push ( cid )
165+ batch . push ( cid )
166+
167+ if ( batch . length >= this . sortBatchSize ) {
168+ await this . sortAndQueueBatch ( batch . splice ( 0 ) , options )
169+ }
159170 }
160171 } catch ( err : any ) {
161172 this . log . error ( 'error processing datastore key %s - %s' , entry . key , err . message )
162173 }
163174 }
164175
165- // sort collected CIDs by their Kademlia key so XOR-adjacent CIDs are
166- // queued consecutively — peers responsible for one CID are likely to
167- // also be responsible for adjacent CIDs, so connections can be reused
168- if ( toReprovide . length > 1 ) {
169- const kadKeys = await Promise . all (
170- toReprovide . map ( cid => convertBuffer ( cid . multihash . bytes , options ) )
171- )
172-
173- const sortable = toReprovide . map ( ( cid , i ) => ( { cid, kadKey : kadKeys [ i ] } ) )
174- sortable . sort ( ( a , b ) => {
175- for ( let i = 0 ; i < a . kadKey . length ; i ++ ) {
176- if ( a . kadKey [ i ] !== b . kadKey [ i ] ) {
177- return a . kadKey [ i ] - b . kadKey [ i ]
178- }
179- }
180- return 0
181- } )
182-
183- toReprovide . splice ( 0 , toReprovide . length , ...sortable . map ( ( { cid } ) => cid ) )
184- }
185-
186- // queue reprovides in Kademlia key order
187- for ( const cid of toReprovide ) {
188- this . queueReprovide ( cid )
189- . catch ( err => {
190- this . log . error ( 'could not reprovide %c - %e' , cid , err )
191- } )
176+ if ( batch . length > 0 ) {
177+ await this . sortAndQueueBatch ( batch , options )
192178 }
193179
194180 this . log ( 'reprovide/cleanup successful' )
@@ -208,6 +194,33 @@ export class Reprovider extends TypedEventEmitter<ReprovideEvents> {
208194 }
209195 }
210196
197+ private async sortAndQueueBatch ( batch : CID [ ] , options ?: AbortOptions ) : Promise < void > {
198+ if ( batch . length > 1 ) {
199+ const kadKeys = await Promise . all (
200+ batch . map ( cid => convertBuffer ( cid . multihash . bytes , options ) )
201+ )
202+
203+ const sortable = batch . map ( ( cid , i ) => ( { cid, kadKey : kadKeys [ i ] } ) )
204+ sortable . sort ( ( a , b ) => {
205+ for ( let i = 0 ; i < a . kadKey . length ; i ++ ) {
206+ if ( a . kadKey [ i ] !== b . kadKey [ i ] ) {
207+ return a . kadKey [ i ] - b . kadKey [ i ]
208+ }
209+ }
210+ return 0
211+ } )
212+
213+ batch = sortable . map ( ( { cid } ) => cid )
214+ }
215+
216+ for ( const cid of batch ) {
217+ this . queueReprovide ( cid )
218+ . catch ( err => {
219+ this . log . error ( 'could not reprovide %c - %e' , cid , err )
220+ } )
221+ }
222+ }
223+
211224 /**
212225 * Determines if a record should be reprovided
213226 */
0 commit comments