Skip to content

Commit 3a21f44

Browse files
authored
Merge pull request #307 from DataDog/vickenty/dhs
Add sketch builder for dogstatsd-http
2 parents 261a501 + 3ec63a2 commit 3a21f44

5 files changed

Lines changed: 482 additions & 0 deletions

File tree

.circleci/config.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ default_steps: &default_steps
2020

2121
- run: |
2222
mvn clean install
23+
- run: cd dogstatsd-http-core && mvn clean install
2324
- run: cd dogstatsd-http-serializer && mvn clean install
2425

2526
jobs:

dogstatsd-http-core/.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
/target/

dogstatsd-http-core/pom.xml

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
<project xmlns="http://maven.apache.org/POM/4.0.0"
2+
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
3+
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
4+
<modelVersion>4.0.0</modelVersion>
5+
6+
<groupId>com.datadoghq</groupId>
7+
<artifactId>dogstatsd-http-core</artifactId>
8+
<packaging>jar</packaging>
9+
<name>dogstatsd-http-core</name>
10+
<version>1.0.0-SNAPSHOT</version>
11+
<description>Core primitives for the DogStatsD HTTP client.</description>
12+
<url>https://github.com/DataDog/java-dogstatsd-client</url>
13+
14+
<properties>
15+
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
16+
</properties>
17+
18+
<licenses>
19+
<license>
20+
<name>The MIT License (MIT)</name>
21+
<url>http://opensource.org/licenses/MIT</url>
22+
<distribution>repo</distribution>
23+
</license>
24+
</licenses>
25+
26+
<scm>
27+
<url>https://github.com/DataDog/java-dogstatsd-client</url>
28+
<connection>scm:git:git@github.com:DataDog/java-dogstatsd-client.git</connection>
29+
<developerConnection>scm:git:git@github.com:Datadog/java-dogstatsd-client.git</developerConnection>
30+
</scm>
31+
32+
<developers>
33+
<developer>
34+
<id>datadog</id>
35+
<name>Datadog developers</name>
36+
<email>dev@datadoghq.com</email>
37+
</developer>
38+
</developers>
39+
40+
<dependencies>
41+
<dependency>
42+
<groupId>junit</groupId>
43+
<artifactId>junit</artifactId>
44+
<version>4.13.1</version>
45+
<scope>test</scope>
46+
</dependency>
47+
</dependencies>
48+
49+
<profiles>
50+
<profile>
51+
<id>spotless</id>
52+
<activation>
53+
<jdk>[17.0,)</jdk>
54+
</activation>
55+
<build>
56+
<plugins>
57+
<plugin>
58+
<groupId>com.diffplug.spotless</groupId>
59+
<artifactId>spotless-maven-plugin</artifactId>
60+
<version>2.45.0</version>
61+
<configuration>
62+
<java>
63+
<googleJavaFormat>
64+
<version>1.28.0</version>
65+
<style>AOSP</style>
66+
</googleJavaFormat>
67+
</java>
68+
</configuration>
69+
<executions>
70+
<execution>
71+
<goals>
72+
<goal>check</goal>
73+
</goals>
74+
</execution>
75+
</executions>
76+
</plugin>
77+
</plugins>
78+
</build>
79+
</profile>
80+
</profiles>
81+
82+
<build>
83+
<plugins>
84+
<plugin>
85+
<groupId>org.apache.maven.plugins</groupId>
86+
<artifactId>maven-compiler-plugin</artifactId>
87+
<version>3.8.1</version>
88+
<configuration>
89+
<source>1.7</source>
90+
<target>1.7</target>
91+
</configuration>
92+
</plugin>
93+
<plugin>
94+
<groupId>org.apache.maven.plugins</groupId>
95+
<artifactId>maven-surefire-plugin</artifactId>
96+
<version>2.19</version>
97+
</plugin>
98+
</plugins>
99+
</build>
100+
</project>
Lines changed: 214 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,214 @@
1+
/* Unless explicitly stated otherwise all files in this repository are
2+
* licensed under the Apache 2.0 License.
3+
*
4+
* This product includes software developed at Datadog
5+
* (https://www.datadoghq.com/) Copyright 2026 Datadog, Inc.
6+
*/
7+
8+
package com.datadoghq.dogstatsd;
9+
10+
import java.util.Arrays;
11+
12+
/**
13+
* Reusable DDSketch builder. Consumes a batch of observations and populates sum, min, max, count
14+
* and distribution bins accordingly.
15+
*
16+
* <p>This implementation maintains at most 4096 bins with 64-bit counters. Number of bins is a hard
17+
* limit and is enforced by the intake.
18+
*
19+
* <p>Prioritizes accuracy of higher key bins (higher percentiles) over lower ones when number of
20+
* bins exceeds the limit.
21+
*/
22+
public class Sketch {
23+
static final double gamma = 130.0 / 128;
24+
static final double minValue = 1e-9;
25+
static final double logGamma = Math.log(gamma);
26+
static final int bias = 1 - (int) (Math.floor(Math.log(minValue) / logGamma));
27+
static final short posInfKey = (1 << 15) - 1;
28+
static final short negInfKey = -posInfKey;
29+
static final int binLimit = 4096;
30+
31+
// Growable ring buffer up to binLimit elements.
32+
short[] binKeys = new short[0];
33+
long[] binCounts = new long[0];
34+
int size;
35+
int head;
36+
37+
double min;
38+
double max;
39+
double sum;
40+
long count;
41+
42+
static short key(double value) {
43+
if (value < 0) {
44+
return (short) -key(-value);
45+
}
46+
47+
if (value < minValue) {
48+
return 0;
49+
}
50+
51+
int key = (int) Math.rint(Math.log(value) / logGamma) + bias;
52+
if (key > posInfKey) {
53+
return posInfKey;
54+
}
55+
return (short) key;
56+
}
57+
58+
/** Receives (key, count) pairs from {@link #bins(BinConsumer)}. */
59+
public interface BinConsumer {
60+
void consumeBin(short key, long count);
61+
}
62+
63+
/**
64+
* @return the number of populated bins fed to {@link #bins(BinConsumer)}.
65+
*/
66+
public int size() {
67+
return size;
68+
}
69+
70+
/** Feeds each populated bin to {@code consumer} in order. */
71+
public void bins(BinConsumer consumer) {
72+
int idx = head;
73+
for (int i = 0; i < size; i++) {
74+
consumer.consumeBin(binKeys[idx], binCounts[idx]);
75+
idx++;
76+
if (idx == binKeys.length) {
77+
idx = 0;
78+
}
79+
}
80+
}
81+
82+
/**
83+
* @return the minimum observation in the most recent batch, or {@code 0} if the batch was
84+
* empty.
85+
*/
86+
public double min() {
87+
return min;
88+
}
89+
90+
/**
91+
* @return the maximum observation in the most recent batch, or {@code 0} if the batch was
92+
* empty.
93+
*/
94+
public double max() {
95+
return max;
96+
}
97+
98+
/**
99+
* @return the sum of observations in the most recent batch.
100+
*/
101+
public double sum() {
102+
return sum;
103+
}
104+
105+
/**
106+
* @return the total count of observations represented by the sketch.
107+
*/
108+
public long count() {
109+
return count;
110+
}
111+
112+
/**
113+
* Builds the sketch from the given values. The {@code values} array is modified in place
114+
* (sorted); callers that need to preserve the original ordering should pass a copy.
115+
*
116+
* @param values the observations to include in the sketch; sorted in place
117+
* @param sampleRate the sampling rate used to collect {@code values}, in {@code (0, 1]}. Each
118+
* observation is weighted by {@code 1 / sampleRate} when accumulating counts and sums.
119+
* Rates below ~1.08e-19 saturate the per-observation weight; bin counts and the total
120+
* {@code count} field saturate at {@link Long#MAX_VALUE} on overflow.
121+
*/
122+
public void build(long[] values, double sampleRate) {
123+
if (Double.isNaN(sampleRate) || sampleRate <= 0 || sampleRate > 1) {
124+
throw new IllegalArgumentException("sampleRate is out of range");
125+
}
126+
127+
reset();
128+
buildInner(values, sampleRate);
129+
}
130+
131+
private void buildInner(final long[] values, double sampleRate) {
132+
if (values == null || values.length == 0) {
133+
return;
134+
}
135+
136+
Arrays.sort(values);
137+
138+
final long sampleSize = (long) (1 / sampleRate);
139+
min = values[0];
140+
max = values[values.length - 1];
141+
count = satMul(sampleSize, values.length);
142+
143+
short topKey = negInfKey - 1;
144+
long topCount = 0;
145+
146+
for (long val : values) {
147+
sum += val / sampleRate;
148+
149+
short key = key(val);
150+
151+
if (key == topKey) {
152+
topCount = satAdd(topCount, sampleSize);
153+
} else {
154+
if (topCount > 0) {
155+
append(topKey, topCount);
156+
}
157+
topKey = key;
158+
topCount = sampleSize;
159+
}
160+
}
161+
162+
append(topKey, topCount);
163+
}
164+
165+
private void reset() {
166+
min = 0;
167+
max = 0;
168+
sum = 0;
169+
count = 0;
170+
size = 0;
171+
head = 0;
172+
}
173+
174+
private void append(short key, long count) {
175+
if (size >= binLimit) {
176+
int next = head + 1;
177+
if (next == binLimit) {
178+
next = 0;
179+
}
180+
binCounts[next] = satAdd(binCounts[next], binCounts[head]);
181+
binKeys[head] = key;
182+
binCounts[head] = count;
183+
head = next;
184+
return;
185+
}
186+
187+
if (size == binKeys.length) {
188+
int cap = Math.max(4, size * 2);
189+
binKeys = Arrays.copyOf(binKeys, cap);
190+
binCounts = Arrays.copyOf(binCounts, cap);
191+
}
192+
binKeys[size] = key;
193+
binCounts[size] = count;
194+
size++;
195+
}
196+
197+
// a >= 0 && b >= 0
198+
private static long satAdd(long a, long b) {
199+
long r = a + b;
200+
if (r < 0) {
201+
r = Long.MAX_VALUE;
202+
}
203+
return r;
204+
}
205+
206+
// a >= 0 && b >= 0
207+
private static long satMul(long a, long b) {
208+
long r = a * b;
209+
if ((a > Integer.MAX_VALUE || b > Integer.MAX_VALUE) && a != 0 && r / a != b) {
210+
r = Long.MAX_VALUE;
211+
}
212+
return r;
213+
}
214+
}

0 commit comments

Comments
 (0)