@@ -2,6 +2,7 @@ package datadog.trace.common.sampling
22
33import datadog.trace.api.DDTags
44import datadog.trace.api.sampling.PrioritySampling
5+ import datadog.trace.api.time.ControllableTimeSource
56import datadog.trace.common.writer.ListWriter
67import datadog.trace.common.writer.LoggingWriter
78import datadog.trace.common.writer.ddagent.DDAgentApi
@@ -222,6 +223,171 @@ class RateByServiceTraceSamplerTest extends DDCoreSpecification {
222223 ' manual.keep' | true | PrioritySampling . USER_KEEP
223224 }
224225
226+ def " shouldCap returns false when rate decreases or stays same" () {
227+ expect :
228+ ! RateByServiceTraceSampler . shouldCap(0.8 , 0.4 )
229+ ! RateByServiceTraceSampler . shouldCap(0.5 , 0.5 )
230+ ! RateByServiceTraceSampler . shouldCap(0.5 , 1.0 ) // 1.0 <= 0.5 * 2, no cap needed
231+ }
232+
233+ def " shouldCap returns false when old rate is zero" () {
234+ expect :
235+ ! RateByServiceTraceSampler . shouldCap(0.0 , 0.5 )
236+ ! RateByServiceTraceSampler . shouldCap(0.0 , 1.0 )
237+ }
238+
239+ def " shouldCap returns true when new rate exceeds 2x old rate" () {
240+ expect :
241+ RateByServiceTraceSampler . shouldCap(0.1 , 1.0 )
242+ RateByServiceTraceSampler . shouldCap(0.2 , 0.8 )
243+ RateByServiceTraceSampler . shouldCap(0.1 , 0.3 )
244+ }
245+
246+ def " cappedRate returns 2x old rate" () {
247+ expect :
248+ RateByServiceTraceSampler . cappedRate(0.1 ) == 0.2
249+ RateByServiceTraceSampler . cappedRate(0.2 ) == 0.4
250+ RateByServiceTraceSampler . cappedRate(0.4 ) == 0.8
251+ }
252+
253+ def " ramp-up caps rate increases at 2x per interval" () {
254+ setup :
255+ def time = new ControllableTimeSource ()
256+ time. set(1_000_000_000L)
257+ RateByServiceTraceSampler serviceSampler = new RateByServiceTraceSampler (time)
258+ def tolerance = 0.01
259+
260+ // Set initial rate to 0.1
261+ String response = ' {"rate_by_service": {"service:foo,env:bar":0.1, "service:,env:":0.1}}'
262+ serviceSampler. onResponse(" traces" , serializer. fromJson(response))
263+
264+ expect :
265+ Math . abs(serviceSampler. serviceRates. getSampler(" bar" , " foo" ). sampleRate - 0.1 ) < tolerance
266+
267+ when : " agent restart sends rate 1.0, first interval"
268+ time. advance(RateByServiceTraceSampler . RAMP_UP_INTERVAL_NANOS )
269+ response = ' {"rate_by_service": {"service:foo,env:bar":1.0, "service:,env:":1.0}}'
270+ serviceSampler. onResponse(" traces" , serializer. fromJson(response))
271+
272+ then : " rate is capped at 2x = 0.2"
273+ Math . abs(serviceSampler. serviceRates. getSampler(" bar" , " foo" ). sampleRate - 0.2 ) < tolerance
274+ Math . abs(serviceSampler. serviceRates. getFallbackSampler(). sampleRate - 0.2 ) < tolerance
275+
276+ when : " second interval"
277+ time. advance(RateByServiceTraceSampler . RAMP_UP_INTERVAL_NANOS )
278+ serviceSampler. onResponse(" traces" , serializer. fromJson(response))
279+
280+ then : " rate doubles to 0.4"
281+ Math . abs(serviceSampler. serviceRates. getSampler(" bar" , " foo" ). sampleRate - 0.4 ) < tolerance
282+ Math . abs(serviceSampler. serviceRates. getFallbackSampler(). sampleRate - 0.4 ) < tolerance
283+
284+ when : " third interval"
285+ time. advance(RateByServiceTraceSampler . RAMP_UP_INTERVAL_NANOS )
286+ serviceSampler. onResponse(" traces" , serializer. fromJson(response))
287+
288+ then : " rate doubles to 0.8"
289+ Math . abs(serviceSampler. serviceRates. getSampler(" bar" , " foo" ). sampleRate - 0.8 ) < tolerance
290+ Math . abs(serviceSampler. serviceRates. getFallbackSampler(). sampleRate - 0.8 ) < tolerance
291+
292+ when : " fourth interval"
293+ time. advance(RateByServiceTraceSampler . RAMP_UP_INTERVAL_NANOS )
294+ serviceSampler. onResponse(" traces" , serializer. fromJson(response))
295+
296+ then : " rate reaches target 1.0 (2x=1.6 > 1.0)"
297+ Math . abs(serviceSampler. serviceRates. getSampler(" bar" , " foo" ). sampleRate - 1.0 ) < tolerance
298+ Math . abs(serviceSampler. serviceRates. getFallbackSampler(). sampleRate - 1.0 ) < tolerance
299+ }
300+
301+ def " ramp-down applies immediately" () {
302+ setup :
303+ def time = new ControllableTimeSource ()
304+ time. set(1_000_000_000L)
305+ RateByServiceTraceSampler serviceSampler = new RateByServiceTraceSampler (time)
306+ def tolerance = 0.01
307+
308+ // Set initial rate to 0.8
309+ String response = ' {"rate_by_service": {"service:foo,env:bar":0.8, "service:,env:":0.8}}'
310+ serviceSampler. onResponse(" traces" , serializer. fromJson(response))
311+
312+ when : " rate decreases to 0.2"
313+ response = ' {"rate_by_service": {"service:foo,env:bar":0.2, "service:,env:":0.2}}'
314+ serviceSampler. onResponse(" traces" , serializer. fromJson(response))
315+
316+ then : " decrease is applied immediately"
317+ Math . abs(serviceSampler. serviceRates. getSampler(" bar" , " foo" ). sampleRate - 0.2 ) < tolerance
318+ Math . abs(serviceSampler. serviceRates. getFallbackSampler(). sampleRate - 0.2 ) < tolerance
319+ }
320+
321+ def " rate increase blocked during cooldown" () {
322+ setup :
323+ def time = new ControllableTimeSource ()
324+ time. set(1_000_000_000L)
325+ RateByServiceTraceSampler serviceSampler = new RateByServiceTraceSampler (time)
326+ def tolerance = 0.01
327+
328+ // Set initial rate to 0.1
329+ String response = ' {"rate_by_service": {"service:foo,env:bar":0.1}}'
330+ serviceSampler. onResponse(" traces" , serializer. fromJson(response))
331+
332+ when : " rate jumps, first capped increase"
333+ time. advance(RateByServiceTraceSampler . RAMP_UP_INTERVAL_NANOS )
334+ response = ' {"rate_by_service": {"service:foo,env:bar":1.0}}'
335+ serviceSampler. onResponse(" traces" , serializer. fromJson(response))
336+
337+ then : " capped to 0.2"
338+ Math . abs(serviceSampler. serviceRates. getSampler(" bar" , " foo" ). sampleRate - 0.2 ) < tolerance
339+
340+ when : " try again immediately (within cooldown)"
341+ serviceSampler. onResponse(" traces" , serializer. fromJson(response))
342+
343+ then : " rate stays at 0.2 because cooldown hasn't elapsed"
344+ Math . abs(serviceSampler. serviceRates. getSampler(" bar" , " foo" ). sampleRate - 0.2 ) < tolerance
345+
346+ when : " after cooldown elapsed"
347+ time. advance(RateByServiceTraceSampler . RAMP_UP_INTERVAL_NANOS )
348+ serviceSampler. onResponse(" traces" , serializer. fromJson(response))
349+
350+ then : " rate doubles to 0.4"
351+ Math . abs(serviceSampler. serviceRates. getSampler(" bar" , " foo" ). sampleRate - 0.4 ) < tolerance
352+ }
353+
354+ def " cooldown not reset by blocked increase" () {
355+ setup :
356+ def time = new ControllableTimeSource ()
357+ time. set(1_000_000_000L)
358+ RateByServiceTraceSampler serviceSampler = new RateByServiceTraceSampler (time)
359+ def tolerance = 0.01
360+
361+ // Set initial low rate
362+ String response = ' {"rate_by_service": {"service:foo,env:bar":0.01}}'
363+ serviceSampler. onResponse(" traces" , serializer. fromJson(response))
364+
365+ expect :
366+ Math . abs(serviceSampler. serviceRates. getSampler(" bar" , " foo" ). sampleRate - 0.01 ) < tolerance
367+
368+ when : " wait for cooldown, apply increase: 0.01 -> 0.02"
369+ time. advance(RateByServiceTraceSampler . RAMP_UP_INTERVAL_NANOS )
370+ response = ' {"rate_by_service": {"service:foo,env:bar":1.0}}'
371+ serviceSampler. onResponse(" traces" , serializer. fromJson(response))
372+
373+ then : " rate is capped at 2x = 0.02"
374+ Math . abs(serviceSampler. serviceRates. getSampler(" bar" , " foo" ). sampleRate - 0.02 ) < tolerance
375+
376+ when : " before cooldown elapses, send another increase - rate should be held and lastCapped NOT reset"
377+ time. advance((long ) (RateByServiceTraceSampler . RAMP_UP_INTERVAL_NANOS / 2 ))
378+ serviceSampler. onResponse(" traces" , serializer. fromJson(response))
379+
380+ then : " rate stays at 0.02 (cooldown)"
381+ Math . abs(serviceSampler. serviceRates. getSampler(" bar" , " foo" ). sampleRate - 0.02 ) < tolerance
382+
383+ when : " wait remaining half of cooldown from the original cap - should allow next ramp-up"
384+ time. advance((long ) (RateByServiceTraceSampler . RAMP_UP_INTERVAL_NANOS / 2 ))
385+ serviceSampler. onResponse(" traces" , serializer. fromJson(response))
386+
387+ then : " rate doubles to 0.04 because lastCapped was NOT reset by the blocked increase"
388+ Math . abs(serviceSampler. serviceRates. getSampler(" bar" , " foo" ). sampleRate - 0.04 ) < tolerance
389+ }
390+
225391 def " not setting forced tracing via tag or setting it wrong value not causing exception" () {
226392 setup :
227393 def sampler = new RateByServiceTraceSampler ()
0 commit comments