Skip to content

Commit ae0605a

Browse files
committed
Support floating point observations in Sketch
To support both long and double, copy provided values to an internal double buffer (possibly with conversion), and build sketch from that. This also removes the need for the caller to copy the values.
1 parent 7ac5c1a commit ae0605a

2 files changed

Lines changed: 56 additions & 19 deletions

File tree

dogstatsd-http-core/src/main/java/com/datadoghq/dogstatsd/Sketch.java

Lines changed: 49 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,8 @@ public class Sketch {
3434
int size;
3535
int head;
3636

37+
private double[] values = new double[0];
38+
3739
double min;
3840
double max;
3941
double sum;
@@ -110,40 +112,66 @@ public long count() {
110112
}
111113

112114
/**
113-
* Builds the sketch from the given values. The {@code values} array is modified in place
114-
* (sorted); callers that need to preserve the original ordering should pass a copy.
115+
* Builds the sketch from the given values.
115116
*
116-
* @param values the observations to include in the sketch; sorted in place
117-
* @param sampleRate the sampling rate used to collect {@code values}, in {@code (0, 1]}. Each
117+
* @param observations the observations to include in the sketch
118+
* @param sampleRate the sampling rate used to collect {@code observations}, in {@code (0, 1]}. Each
118119
* observation is weighted by {@code 1 / sampleRate} when accumulating counts and sums.
119120
* Rates below ~1.08e-19 saturate the per-observation weight; bin counts and the total
120121
* {@code count} field saturate at {@link Long#MAX_VALUE} on overflow.
121122
*/
122-
public void build(long[] values, double sampleRate) {
123-
if (Double.isNaN(sampleRate) || sampleRate <= 0 || sampleRate > 1) {
124-
throw new IllegalArgumentException("sampleRate is out of range");
123+
public void build(long[] observations, double sampleRate) {
124+
validateSampleRate(sampleRate);
125+
reset();
126+
if (observations == null || observations.length == 0) {
127+
return;
128+
}
129+
ensureCapacity(observations.length);
130+
for (int i = 0; i < observations.length; i++) {
131+
values[i] = observations[i];
125132
}
133+
buildInner(observations.length, sampleRate);
134+
}
126135

136+
/**
137+
* Builds the sketch from the given values.
138+
*
139+
* @param observations the observations to include in the sketch
140+
* @param sampleRate the sampling rate used to collect {@code observations}, in {@code (0, 1]}. Each
141+
* observation is weighted by {@code 1 / sampleRate} when accumulating counts and sums.
142+
* Rates below ~1.08e-19 saturate the per-observation weight; bin counts and the total
143+
* {@code count} field saturate at {@link Long#MAX_VALUE} on overflow.
144+
*/
145+
public void build(double[] observations, double sampleRate) {
146+
validateSampleRate(sampleRate);
127147
reset();
128-
buildInner(values, sampleRate);
148+
if (observations == null || observations.length == 0) {
149+
return;
150+
}
151+
ensureCapacity(observations.length);
152+
System.arraycopy(observations, 0, values, 0, observations.length);
153+
buildInner(observations.length, sampleRate);
129154
}
130155

131-
private void buildInner(final long[] values, double sampleRate) {
132-
if (values == null || values.length == 0) {
133-
return;
156+
private static void validateSampleRate(double sampleRate) {
157+
if (Double.isNaN(sampleRate) || sampleRate <= 0 || sampleRate > 1) {
158+
throw new IllegalArgumentException("sampleRate is out of range");
134159
}
160+
}
135161

136-
Arrays.sort(values);
162+
private void buildInner(int length, double sampleRate) {
163+
Arrays.sort(values, 0, length);
137164

138165
final long sampleSize = (long) (1 / sampleRate);
139166
min = values[0];
140-
max = values[values.length - 1];
141-
count = satMul(sampleSize, values.length);
167+
max = values[length - 1];
168+
count = satMul(sampleSize, length);
142169

143170
short topKey = negInfKey - 1;
144171
long topCount = 0;
145172

146-
for (long val : values) {
173+
for (int i = 0; i < length; i++) {
174+
double val = values[i];
147175
sum += val / sampleRate;
148176

149177
short key = key(val);
@@ -162,6 +190,12 @@ private void buildInner(final long[] values, double sampleRate) {
162190
append(topKey, topCount);
163191
}
164192

193+
private void ensureCapacity(int needed) {
194+
if (values.length < needed) {
195+
values = new double[needed];
196+
}
197+
}
198+
165199
private void reset() {
166200
min = 0;
167201
max = 0;

dogstatsd-http-core/src/test/java/com/datadoghq/dogstatsd/SketchTest.java

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
import static org.junit.Assert.assertEquals;
1212
import static org.junit.Assert.assertThrows;
1313

14+
import java.util.Arrays;
1415
import org.junit.Test;
1516
import org.junit.function.ThrowingRunnable;
1617

@@ -47,7 +48,7 @@ public void keys() {
4748

4849
@Test
4950
public void basic() {
50-
s.build(null, 1);
51+
s.build((long[]) null, 1);
5152
assertEquals(0, s.min(), 0);
5253
assertEquals(0, s.max(), 0);
5354
assertEquals(0, s.sum(), 0);
@@ -112,11 +113,13 @@ public void binMerge() {
112113
values[i] = val;
113114
values[i + values.length / 2] = -val;
114115
}
116+
long[] sortedValues = values.clone();
117+
Arrays.sort(sortedValues);
115118
s.build(values, 1);
116-
assertEquals(values[0], s.min(), 0);
117-
assertEquals(values[values.length - 1], s.max(), 0);
119+
assertEquals(sortedValues[0], s.min(), 0);
120+
assertEquals(sortedValues[sortedValues.length - 1], s.max(), 0);
118121

119-
final short foldedKey = Sketch.key(values[4]);
122+
final short foldedKey = Sketch.key(sortedValues[4]);
120123

121124
s.bins(
122125
new Sketch.BinConsumer() {

0 commit comments

Comments
 (0)