Skip to content

Commit db70d84

Browse files
authored
Merge pull request #315 from DataDog/vickenty/skd
Support floating point observations in Sketch
2 parents ad2c959 + df7d828 commit db70d84

2 files changed

Lines changed: 57 additions & 20 deletions

File tree

dogstatsd-http-core/src/main/java/com/datadoghq/dogstatsd/Sketch.java

Lines changed: 50 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,8 @@ public class Sketch {
3434
int size;
3535
int head;
3636

37+
private double[] values = new double[0];
38+
3739
double min;
3840
double max;
3941
double sum;
@@ -110,40 +112,66 @@ public long count() {
110112
}
111113

112114
/**
113-
* Builds the sketch from the given values. The {@code values} array is modified in place
114-
* (sorted); callers that need to preserve the original ordering should pass a copy.
115+
* Builds the sketch from the given values.
115116
*
116-
* @param values the observations to include in the sketch; sorted in place
117-
* @param sampleRate the sampling rate used to collect {@code values}, in {@code (0, 1]}. Each
118-
* observation is weighted by {@code 1 / sampleRate} when accumulating counts and sums.
117+
* @param observations the observations to include in the sketch
118+
* @param sampleRate the sampling rate used to collect {@code observations}, in {@code (0, 1]}.
119+
* Each observation is weighted by {@code 1 / sampleRate} when accumulating counts and sums.
119120
* Rates below ~1.08e-19 saturate the per-observation weight; bin counts and the total
120121
* {@code count} field saturate at {@link Long#MAX_VALUE} on overflow.
121122
*/
122-
public void build(long[] values, double sampleRate) {
123-
if (Double.isNaN(sampleRate) || sampleRate <= 0 || sampleRate > 1) {
124-
throw new IllegalArgumentException("sampleRate is out of range");
123+
public void build(long[] observations, double sampleRate) {
124+
validateSampleRate(sampleRate);
125+
reset();
126+
if (observations == null || observations.length == 0) {
127+
return;
128+
}
129+
ensureCapacity(observations.length);
130+
for (int i = 0; i < observations.length; i++) {
131+
values[i] = observations[i];
125132
}
133+
buildInner(observations.length, sampleRate);
134+
}
126135

136+
/**
137+
* Builds the sketch from the given values.
138+
*
139+
* @param observations the observations to include in the sketch
140+
* @param sampleRate the sampling rate used to collect {@code observations}, in {@code (0, 1]}.
141+
* Each observation is weighted by {@code 1 / sampleRate} when accumulating counts and sums.
142+
* Rates below ~1.08e-19 saturate the per-observation weight; bin counts and the total
143+
* {@code count} field saturate at {@link Long#MAX_VALUE} on overflow.
144+
*/
145+
public void build(double[] observations, double sampleRate) {
146+
validateSampleRate(sampleRate);
127147
reset();
128-
buildInner(values, sampleRate);
148+
if (observations == null || observations.length == 0) {
149+
return;
150+
}
151+
ensureCapacity(observations.length);
152+
System.arraycopy(observations, 0, values, 0, observations.length);
153+
buildInner(observations.length, sampleRate);
129154
}
130155

131-
private void buildInner(final long[] values, double sampleRate) {
132-
if (values == null || values.length == 0) {
133-
return;
156+
private static void validateSampleRate(double sampleRate) {
157+
if (Double.isNaN(sampleRate) || sampleRate <= 0 || sampleRate > 1) {
158+
throw new IllegalArgumentException("sampleRate is out of range");
134159
}
160+
}
135161

136-
Arrays.sort(values);
162+
private void buildInner(int length, double sampleRate) {
163+
Arrays.sort(values, 0, length);
137164

138165
final long sampleSize = (long) (1 / sampleRate);
139166
min = values[0];
140-
max = values[values.length - 1];
141-
count = satMul(sampleSize, values.length);
167+
max = values[length - 1];
168+
count = satMul(sampleSize, length);
142169

143170
short topKey = negInfKey - 1;
144171
long topCount = 0;
145172

146-
for (long val : values) {
173+
for (int i = 0; i < length; i++) {
174+
double val = values[i];
147175
sum += val / sampleRate;
148176

149177
short key = key(val);
@@ -162,6 +190,12 @@ private void buildInner(final long[] values, double sampleRate) {
162190
append(topKey, topCount);
163191
}
164192

193+
private void ensureCapacity(int needed) {
194+
if (values.length < needed) {
195+
values = new double[needed];
196+
}
197+
}
198+
165199
private void reset() {
166200
min = 0;
167201
max = 0;

dogstatsd-http-core/src/test/java/com/datadoghq/dogstatsd/SketchTest.java

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
import static org.junit.Assert.assertEquals;
1212
import static org.junit.Assert.assertThrows;
1313

14+
import java.util.Arrays;
1415
import org.junit.Test;
1516
import org.junit.function.ThrowingRunnable;
1617

@@ -47,7 +48,7 @@ public void keys() {
4748

4849
@Test
4950
public void basic() {
50-
s.build(null, 1);
51+
s.build((long[]) null, 1);
5152
assertEquals(0, s.min(), 0);
5253
assertEquals(0, s.max(), 0);
5354
assertEquals(0, s.sum(), 0);
@@ -112,11 +113,13 @@ public void binMerge() {
112113
values[i] = val;
113114
values[i + values.length / 2] = -val;
114115
}
116+
long[] sortedValues = values.clone();
117+
Arrays.sort(sortedValues);
115118
s.build(values, 1);
116-
assertEquals(values[0], s.min(), 0);
117-
assertEquals(values[values.length - 1], s.max(), 0);
119+
assertEquals(sortedValues[0], s.min(), 0);
120+
assertEquals(sortedValues[sortedValues.length - 1], s.max(), 0);
118121

119-
final short foldedKey = Sketch.key(values[4]);
122+
final short foldedKey = Sketch.key(sortedValues[4]);
120123

121124
s.bins(
122125
new Sketch.BinConsumer() {

0 commit comments

Comments
 (0)