@@ -5,8 +5,6 @@ package exemplar // import "go.opentelemetry.io/otel/sdk/metric/exemplar"
55
66import (
77 "context"
8- "math"
9- "math/rand/v2"
108 "sync"
119 "time"
1210
@@ -26,7 +24,10 @@ func FixedSizeReservoirProvider(k int) ReservoirProvider {
2624// sample each one. If there are more than k, the Reservoir will then randomly
2725// sample all additional measurement with a decreasing probability.
2826func NewFixedSizeReservoir (k int ) * FixedSizeReservoir {
29- return newFixedSizeReservoir (newStorage (k ))
27+ return & FixedSizeReservoir {
28+ nt : newNextTracker (k ),
29+ storage : make ([]measurement , k ),
30+ }
3031}
3132
3233var _ Reservoir = & FixedSizeReservoir {}
@@ -37,41 +38,9 @@ var _ Reservoir = &FixedSizeReservoir{}
3738// additional measurement with a decreasing probability.
3839type FixedSizeReservoir struct {
3940 reservoir.ConcurrentSafe
40- * storage
41- mu sync.Mutex
42-
43- // count is the number of measurement seen.
44- count int64
45- // next is the next count that will store a measurement at a random index
46- // once the reservoir has been filled.
47- next int64
48- // w is the largest random number in a distribution that is used to compute
49- // the next next.
50- w float64
51- }
52-
53- func newFixedSizeReservoir (s * storage ) * FixedSizeReservoir {
54- r := & FixedSizeReservoir {
55- storage : s ,
56- }
57- r .reset ()
58- return r
59- }
60-
61- // randomFloat64 returns, as a float64, a uniform pseudo-random number in the
62- // open interval (0.0,1.0).
63- func (* FixedSizeReservoir ) randomFloat64 () float64 {
64- // TODO: Use an algorithm that avoids rejection sampling. For example:
65- //
66- // const precision = 1 << 53 // 2^53
67- // // Generate an integer in [1, 2^53 - 1]
68- // v := rand.Uint64() % (precision - 1) + 1
69- // return float64(v) / float64(precision)
70- f := rand .Float64 ()
71- for f == 0 {
72- f = rand .Float64 ()
73- }
74- return f
41+ mu sync.Mutex
42+ storage []measurement
43+ nt * nextTracker
7544}
7645
7746// Offer accepts the parameters associated with a measurement. The
@@ -86,108 +55,12 @@ func (*FixedSizeReservoir) randomFloat64() float64 {
8655// parameters are the value and dropped (filtered) attributes of the
8756// measurement respectively.
8857func (r * FixedSizeReservoir ) Offer (ctx context.Context , t time.Time , n Value , a []attribute.KeyValue ) {
89- // The following algorithm is "Algorithm L" from Li, Kim-Hung (4 December
90- // 1994). "Reservoir-Sampling Algorithms of Time Complexity
91- // O(n(1+log(N/n)))". ACM Transactions on Mathematical Software. 20 (4):
92- // 481–493 (https://dl.acm.org/doi/10.1145/198429.198435).
93- //
94- // A high-level overview of "Algorithm L":
95- // 0) Pre-calculate the random count greater than the storage size when
96- // an exemplar will be replaced.
97- // 1) Accept all measurements offered until the configured storage size is
98- // reached.
99- // 2) Loop:
100- // a) When the pre-calculate count is reached, replace a random
101- // existing exemplar with the offered measurement.
102- // b) Calculate the next random count greater than the existing one
103- // which will replace another exemplars
104- //
105- // The way a "replacement" count is computed is by looking at `n` number of
106- // independent random numbers each corresponding to an offered measurement.
107- // Of these numbers the smallest `k` (the same size as the storage
108- // capacity) of them are kept as a subset. The maximum value in this
109- // subset, called `w` is used to weight another random number generation
110- // for the next count that will be considered.
111- //
112- // By weighting the next count computation like described, it is able to
113- // perform a uniformly-weighted sampling algorithm based on the number of
114- // samples the reservoir has seen so far. The sampling will "slow down" as
115- // more and more samples are offered so as to reduce a bias towards those
116- // offered just prior to the end of the collection.
117- //
118- // This algorithm is preferred because of its balance of simplicity and
119- // performance. It will compute three random numbers (the bulk of
120- // computation time) for each item that becomes part of the reservoir, but
121- // it does not spend any time on items that do not. In particular it has an
122- // asymptotic runtime of O(k(1 + log(n/k)) where n is the number of
123- // measurements offered and k is the reservoir size.
124- //
125- // See https://en.wikipedia.org/wiki/Reservoir_sampling for an overview of
126- // this and other reservoir sampling algorithms. See
127- // https://github.com/MrAlias/reservoir-sampling for a performance
128- // comparison of reservoir sampling algorithms.
129-
13058 r .mu .Lock ()
13159 defer r .mu .Unlock ()
132- if int (r .count ) < cap (r .measurements ) {
133- r .store (ctx , int (r .count ), t , n , a )
134- } else if r .count == r .next {
135- // Overwrite a random existing measurement with the one offered.
136- idx := int (rand .Int64N (int64 (cap (r .measurements ))))
137- r .store (ctx , idx , t , n , a )
138- r .advance ()
60+ sampled , idx := r .nt .shouldSample ()
61+ if sampled {
62+ r .storage [idx ].store (ctx , t , n , a )
13963 }
140- r .count ++
141- }
142-
143- // reset resets r to the initial state.
144- func (r * FixedSizeReservoir ) reset () {
145- // This resets the number of exemplars known.
146- r .count = 0
147- // Random index inserts should only happen after the storage is full.
148- r .next = int64 (cap (r .measurements ))
149-
150- // Initial random number in the series used to generate r.next.
151- //
152- // This is set before r.advance to reset or initialize the random number
153- // series. Without doing so it would always be 0 or never restart a new
154- // random number series.
155- //
156- // This maps the uniform random number in (0,1) to a geometric distribution
157- // over the same interval. The mean of the distribution is inversely
158- // proportional to the storage capacity.
159- r .w = math .Exp (math .Log (r .randomFloat64 ()) / float64 (cap (r .measurements )))
160-
161- r .advance ()
162- }
163-
164- // advance updates the count at which the offered measurement will overwrite an
165- // existing exemplar.
166- func (r * FixedSizeReservoir ) advance () {
167- // Calculate the next value in the random number series.
168- //
169- // The current value of r.w is based on the max of a distribution of random
170- // numbers (i.e. `w = max(u_1,u_2,...,u_k)` for `k` equal to the capacity
171- // of the storage and each `u` in the interval (0,w)). To calculate the
172- // next r.w we use the fact that when the next exemplar is selected to be
173- // included in the storage an existing one will be dropped, and the
174- // corresponding random number in the set used to calculate r.w will also
175- // be replaced. The replacement random number will also be within (0,w),
176- // therefore the next r.w will be based on the same distribution (i.e.
177- // `max(u_1,u_2,...,u_k)`). Therefore, we can sample the next r.w by
178- // computing the next random number `u` and take r.w as `w * u^(1/k)`.
179- r .w *= math .Exp (math .Log (r .randomFloat64 ()) / float64 (cap (r .measurements )))
180- // Use the new random number in the series to calculate the count of the
181- // next measurement that will be stored.
182- //
183- // Given 0 < r.w < 1, each iteration will result in subsequent r.w being
184- // smaller. This translates here into the next next being selected against
185- // a distribution with a higher mean (i.e. the expected value will increase
186- // and replacements become less likely)
187- //
188- // Important to note, the new r.next will always be at least 1 more than
189- // the last r.next.
190- r .next += int64 (math .Log (r .randomFloat64 ())/ math .Log (1 - r .w )) + 1
19164}
19265
19366// Collect returns all the held exemplars.
@@ -196,10 +69,17 @@ func (r *FixedSizeReservoir) advance() {
19669func (r * FixedSizeReservoir ) Collect (dest * []Exemplar ) {
19770 r .mu .Lock ()
19871 defer r .mu .Unlock ()
199- r .storage .Collect (dest )
72+ * dest = reset (* dest , len (r .storage ), len (r .storage ))
73+ var n int
74+ for i := range r .storage {
75+ if r .storage [i ].exemplar (& (* dest )[n ]) {
76+ n ++
77+ }
78+ }
79+ * dest = (* dest )[:n ]
20080 // Call reset here even though it will reset r.count and restart the random
20181 // number series. This will persist any old exemplars as long as no new
20282 // measurements are offered, but it will also prioritize those new
20383 // measurements that are made over the older collection cycle ones.
204- r .reset ()
84+ r .nt . reset ()
20585}
0 commit comments