|
| 1 | +/* Unless explicitly stated otherwise all files in this repository are |
| 2 | + * licensed under the Apache 2.0 License. |
| 3 | + * |
| 4 | + * This product includes software developed at Datadog |
| 5 | + * (https://www.datadoghq.com/) Copyright 2026 Datadog, Inc. |
| 6 | + */ |
| 7 | + |
| 8 | +package com.datadoghq.dogstatsd; |
| 9 | + |
| 10 | +import java.util.Arrays; |
| 11 | + |
| 12 | +/** |
| 13 | + * Reusable DDSketch builder. Consumes a batch of observations and populates sum, min, max, count |
| 14 | + * and distribution bins accordingly. |
| 15 | + * |
| 16 | + * <p>This implementation maintains at most 4096 bins with 64-bit counters. Number of bins is a hard |
| 17 | + * limit and is enforced by the intake. |
| 18 | + * |
| 19 | + * <p>Prioritizes accuracy of higher key bins (higher percentiles) over lower ones when number of |
| 20 | + * bins exceeds the limit. |
| 21 | + */ |
| 22 | +public class Sketch { |
| 23 | + static final double gamma = 130.0 / 128; |
| 24 | + static final double minValue = 1e-9; |
| 25 | + static final double logGamma = Math.log(gamma); |
| 26 | + static final int bias = 1 - (int) (Math.floor(Math.log(minValue) / logGamma)); |
| 27 | + static final short posInfKey = (1 << 15) - 1; |
| 28 | + static final short negInfKey = -posInfKey; |
| 29 | + static final int binLimit = 4096; |
| 30 | + |
| 31 | + // Growable ring buffer up to binLimit elements. |
| 32 | + short[] binKeys = new short[0]; |
| 33 | + long[] binCounts = new long[0]; |
| 34 | + int size; |
| 35 | + int head; |
| 36 | + |
| 37 | + double min; |
| 38 | + double max; |
| 39 | + double sum; |
| 40 | + long count; |
| 41 | + |
| 42 | + static short key(double value) { |
| 43 | + if (value < 0) { |
| 44 | + return (short) -key(-value); |
| 45 | + } |
| 46 | + |
| 47 | + if (value < minValue) { |
| 48 | + return 0; |
| 49 | + } |
| 50 | + |
| 51 | + int key = (int) Math.rint(Math.log(value) / logGamma) + bias; |
| 52 | + if (key > posInfKey) { |
| 53 | + return posInfKey; |
| 54 | + } |
| 55 | + return (short) key; |
| 56 | + } |
| 57 | + |
| 58 | + /** Receives (key, count) pairs from {@link #bins(BinConsumer)}. */ |
| 59 | + public interface BinConsumer { |
| 60 | + void consumeBin(short key, long count); |
| 61 | + } |
| 62 | + |
| 63 | + /** |
| 64 | + * @return the number of populated bins fed to {@link #bins(BinConsumer)}. |
| 65 | + */ |
| 66 | + public int size() { |
| 67 | + return size; |
| 68 | + } |
| 69 | + |
| 70 | + /** Feeds each populated bin to {@code consumer} in order. */ |
| 71 | + public void bins(BinConsumer consumer) { |
| 72 | + int idx = head; |
| 73 | + for (int i = 0; i < size; i++) { |
| 74 | + consumer.consumeBin(binKeys[idx], binCounts[idx]); |
| 75 | + idx++; |
| 76 | + if (idx == binKeys.length) { |
| 77 | + idx = 0; |
| 78 | + } |
| 79 | + } |
| 80 | + } |
| 81 | + |
| 82 | + /** |
| 83 | + * @return the minimum observation in the most recent batch, or {@code 0} if the batch was |
| 84 | + * empty. |
| 85 | + */ |
| 86 | + public double min() { |
| 87 | + return min; |
| 88 | + } |
| 89 | + |
| 90 | + /** |
| 91 | + * @return the maximum observation in the most recent batch, or {@code 0} if the batch was |
| 92 | + * empty. |
| 93 | + */ |
| 94 | + public double max() { |
| 95 | + return max; |
| 96 | + } |
| 97 | + |
| 98 | + /** |
| 99 | + * @return the sum of observations in the most recent batch. |
| 100 | + */ |
| 101 | + public double sum() { |
| 102 | + return sum; |
| 103 | + } |
| 104 | + |
| 105 | + /** |
| 106 | + * @return the total count of observations represented by the sketch. |
| 107 | + */ |
| 108 | + public long count() { |
| 109 | + return count; |
| 110 | + } |
| 111 | + |
| 112 | + /** |
| 113 | + * Builds the sketch from the given values. The {@code values} array is modified in place |
| 114 | + * (sorted); callers that need to preserve the original ordering should pass a copy. |
| 115 | + * |
| 116 | + * @param values the observations to include in the sketch; sorted in place |
| 117 | + * @param sampleRate the sampling rate used to collect {@code values}, in {@code (0, 1]}. Each |
| 118 | + * observation is weighted by {@code 1 / sampleRate} when accumulating counts and sums. |
| 119 | + * Rates below ~1.08e-19 saturate the per-observation weight; bin counts and the total |
| 120 | + * {@code count} field saturate at {@link Long#MAX_VALUE} on overflow. |
| 121 | + */ |
| 122 | + public void build(long[] values, double sampleRate) { |
| 123 | + if (Double.isNaN(sampleRate) || sampleRate <= 0 || sampleRate > 1) { |
| 124 | + throw new IllegalArgumentException("sampleRate is out of range"); |
| 125 | + } |
| 126 | + |
| 127 | + reset(); |
| 128 | + buildInner(values, sampleRate); |
| 129 | + } |
| 130 | + |
| 131 | + private void buildInner(final long[] values, double sampleRate) { |
| 132 | + if (values == null || values.length == 0) { |
| 133 | + return; |
| 134 | + } |
| 135 | + |
| 136 | + Arrays.sort(values); |
| 137 | + |
| 138 | + final long sampleSize = (long) (1 / sampleRate); |
| 139 | + min = values[0]; |
| 140 | + max = values[values.length - 1]; |
| 141 | + count = satMul(sampleSize, values.length); |
| 142 | + |
| 143 | + short topKey = negInfKey - 1; |
| 144 | + long topCount = 0; |
| 145 | + |
| 146 | + for (long val : values) { |
| 147 | + sum += val / sampleRate; |
| 148 | + |
| 149 | + short key = key(val); |
| 150 | + |
| 151 | + if (key == topKey) { |
| 152 | + topCount = satAdd(topCount, sampleSize); |
| 153 | + } else { |
| 154 | + if (topCount > 0) { |
| 155 | + append(topKey, topCount); |
| 156 | + } |
| 157 | + topKey = key; |
| 158 | + topCount = sampleSize; |
| 159 | + } |
| 160 | + } |
| 161 | + |
| 162 | + append(topKey, topCount); |
| 163 | + } |
| 164 | + |
| 165 | + private void reset() { |
| 166 | + min = 0; |
| 167 | + max = 0; |
| 168 | + sum = 0; |
| 169 | + count = 0; |
| 170 | + size = 0; |
| 171 | + head = 0; |
| 172 | + } |
| 173 | + |
| 174 | + private void append(short key, long count) { |
| 175 | + if (size >= binLimit) { |
| 176 | + int next = head + 1; |
| 177 | + if (next == binLimit) { |
| 178 | + next = 0; |
| 179 | + } |
| 180 | + binCounts[next] = satAdd(binCounts[next], binCounts[head]); |
| 181 | + binKeys[head] = key; |
| 182 | + binCounts[head] = count; |
| 183 | + head = next; |
| 184 | + return; |
| 185 | + } |
| 186 | + |
| 187 | + if (size == binKeys.length) { |
| 188 | + int cap = Math.max(4, size * 2); |
| 189 | + binKeys = Arrays.copyOf(binKeys, cap); |
| 190 | + binCounts = Arrays.copyOf(binCounts, cap); |
| 191 | + } |
| 192 | + binKeys[size] = key; |
| 193 | + binCounts[size] = count; |
| 194 | + size++; |
| 195 | + } |
| 196 | + |
| 197 | + // a >= 0 && b >= 0 |
| 198 | + private static long satAdd(long a, long b) { |
| 199 | + long r = a + b; |
| 200 | + if (r < 0) { |
| 201 | + r = Long.MAX_VALUE; |
| 202 | + } |
| 203 | + return r; |
| 204 | + } |
| 205 | + |
| 206 | + // a >= 0 && b >= 0 |
| 207 | + private static long satMul(long a, long b) { |
| 208 | + long r = a * b; |
| 209 | + if ((a > Integer.MAX_VALUE || b > Integer.MAX_VALUE) && a != 0 && r / a != b) { |
| 210 | + r = Long.MAX_VALUE; |
| 211 | + } |
| 212 | + return r; |
| 213 | + } |
| 214 | +} |
0 commit comments