Skip to content

Commit ad2c959

Browse files
authored
Merge pull request #312 from DataDog/vickenty/ftl
AGTMETRICS-489 Add telemetry to dogstatsd-http-forwarder
2 parents 5466050 + 1c73ab6 commit ad2c959

9 files changed

Lines changed: 694 additions & 57 deletions

File tree

dogstatsd-http-forwarder/src/main/java/com/datadoghq/dogstatsd/http/forwarder/BoundedQueue.java

Lines changed: 59 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -12,25 +12,29 @@
1212
import java.util.concurrent.locks.Condition;
1313
import java.util.concurrent.locks.Lock;
1414
import java.util.concurrent.locks.ReentrantLock;
15+
import java.util.function.LongSupplier;
1516

1617
class BoundedQueue {
17-
// Key represents a tuple of integers (tries, clock).
18+
// Key represents a tuple of integers (tries, clock). enqueuedAtNanos records when the
19+
// payload first entered the queue (in System.nanoTime() units) and is preserved across
20+
// requeues so that "age of oldest item" remains accurate after retries.
1821
static class Key implements Comparable<Key> {
1922
final long tries;
2023
final long clock;
24+
final long enqueuedAtNanos;
2125

2226
Key(long clock) {
23-
this.tries = 0;
24-
this.clock = clock;
27+
this(0, clock, System.nanoTime());
2528
}
2629

27-
private Key(long tries, long clock) {
30+
private Key(long tries, long clock, long enqueuedAtNanos) {
2831
this.tries = tries;
2932
this.clock = clock;
33+
this.enqueuedAtNanos = enqueuedAtNanos;
3034
}
3135

3236
Key next() {
33-
return new Key(tries + 1, clock);
37+
return new Key(tries + 1, clock, enqueuedAtNanos);
3438
}
3539

3640
@Override
@@ -52,17 +56,36 @@ public int compareTo(Key o) {
5256

5357
final TreeMap<Key, byte[]> items = new TreeMap<>();
5458

55-
long droppedItems;
56-
long droppedBytes;
59+
final Telemetry telemetry;
60+
final LongSupplier nanos;
5761

5862
Lock lock = new ReentrantLock();
5963
Condition notEmpty = lock.newCondition();
6064
Condition notFull = lock.newCondition();
6165

62-
BoundedQueue(long maxBytes, long maxTries, WhenFull whenFull) {
66+
BoundedQueue(long maxBytes, long maxTries, WhenFull whenFull, Telemetry telemetry) {
67+
this(maxBytes, maxTries, whenFull, telemetry, System::nanoTime);
68+
}
69+
70+
BoundedQueue(
71+
long maxBytes,
72+
long maxTries,
73+
WhenFull whenFull,
74+
Telemetry telemetry,
75+
LongSupplier nanos) {
6376
this.maxBytes = maxBytes;
6477
this.maxTries = maxTries;
6578
this.whenFull = whenFull;
79+
this.telemetry = telemetry;
80+
this.nanos = nanos;
81+
}
82+
83+
long droppedPayloads;
84+
long droppedBytes;
85+
86+
private void recordDrop(long bytes) {
87+
droppedPayloads++;
88+
droppedBytes += bytes;
6689
}
6790

6891
void add(byte[] item) throws InterruptedException {
@@ -72,8 +95,7 @@ void add(byte[] item) throws InterruptedException {
7295
void requeue(Map.Entry<Key, byte[]> item) throws InterruptedException {
7396
Key nextKey = item.getKey().next();
7497
if (nextKey.tries > maxTries) {
75-
droppedItems++;
76-
droppedBytes += item.getValue().length;
98+
telemetry.onDrop(1, item.getValue().length);
7799
return;
78100
}
79101
put(nextKey, item.getValue(), WhenFull.DROP);
@@ -82,7 +104,7 @@ void requeue(Map.Entry<Key, byte[]> item) throws InterruptedException {
82104
// Must be called when lock is held.
83105
private Key newKey() {
84106
clock++;
85-
return new Key(clock);
107+
return new Key(0, clock, nanos.getAsLong());
86108
}
87109

88110
private void put(Key key, byte[] item, WhenFull whenFull) throws InterruptedException {
@@ -96,7 +118,13 @@ private void put(Key key, byte[] item, WhenFull whenFull) throws InterruptedExce
96118
bytes += item.length;
97119
notEmpty.signal();
98120
} finally {
121+
long droppedPayloads = this.droppedPayloads;
122+
long droppedBytes = this.droppedBytes;
123+
this.droppedPayloads = 0;
124+
this.droppedBytes = 0;
99125
lock.unlock();
126+
// Avoid potential lock ordering issues.
127+
telemetry.onDrop(droppedPayloads, droppedBytes);
100128
}
101129
}
102130

@@ -108,9 +136,8 @@ private void ensureSpace(int length, WhenFull whenFull) throws InterruptedExcept
108136
switch (whenFull) {
109137
case DROP:
110138
Map.Entry<Key, byte[]> last = items.pollLastEntry();
111-
droppedItems++;
112-
droppedBytes += last.getValue().length;
113139
bytes -= last.getValue().length;
140+
recordDrop(last.getValue().length);
114141
break;
115142
case BLOCK:
116143
notFull.await();
@@ -133,4 +160,23 @@ Map.Entry<Key, byte[]> next() throws InterruptedException {
133160
lock.unlock();
134161
}
135162
}
163+
164+
void snapshot(long now, Telemetry.Snapshot s) {
165+
lock.lock();
166+
try {
167+
long oldestAge = 0L;
168+
for (Key k : items.keySet()) {
169+
long age = now - k.enqueuedAtNanos;
170+
if (age > oldestAge) {
171+
oldestAge = age;
172+
}
173+
}
174+
s.queuePayloads = items.size();
175+
s.queueBytes = bytes;
176+
s.queueMaxBytes = maxBytes;
177+
s.oldestEnqueuedAgeNanos = oldestAge;
178+
} finally {
179+
lock.unlock();
180+
}
181+
}
136182
}

dogstatsd-http-forwarder/src/main/java/com/datadoghq/dogstatsd/http/forwarder/Forwarder.java

Lines changed: 43 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ public class Forwarder extends Thread {
3939
String localData;
4040
String externalData;
4141

42-
int responseOk, responseBadRequest, responseOther;
42+
final Telemetry telemetry;
4343

4444
/**
4545
* Creates a new forwarder targeting the given URL.
@@ -60,7 +60,8 @@ public Forwarder(
6060
Duration connectTimeout,
6161
Duration requestTimeout) {
6262
this.url = url;
63-
this.queue = new BoundedQueue(maxRequestsBytes, maxTries, whenFull);
63+
this.telemetry = new Telemetry();
64+
this.queue = new BoundedQueue(maxRequestsBytes, maxTries, whenFull, this.telemetry);
6465
this.requestTimeout = requestTimeout;
6566
this.client =
6667
HttpClient.newBuilder()
@@ -69,6 +70,14 @@ public Forwarder(
6970
.build();
7071
}
7172

73+
/**
74+
* Captures a snapshot of the forwarder's telemetry counters and queue state, clearing delta
75+
* counters so subsequent snapshots report activity since this call.
76+
*/
77+
public Telemetry.Snapshot snapshot() {
78+
return telemetry.snapshot(queue);
79+
}
80+
7281
/** Runs the forwarding loop, delivering queued payloads until the thread is interrupted. */
7382
@Override
7483
public void run() {
@@ -93,6 +102,7 @@ public void run() {
93102
*/
94103
public void send(byte[] payload) throws InterruptedException {
95104
queue.add(payload);
105+
telemetry.onEnqueue(payload.length);
96106
}
97107

98108
void runOnce(Map.Entry<BoundedQueue.Key, byte[]> item) throws InterruptedException {
@@ -119,37 +129,49 @@ void runOnce(Map.Entry<BoundedQueue.Key, byte[]> item) throws InterruptedExcepti
119129
logger.log(
120130
Level.INFO, "response {0}: {1}", new Object[] {res.statusCode(), res.body()});
121131

122-
switch (res.statusCode()) {
123-
case 400:
124-
responseBadRequest++;
125-
onSuccess();
126-
break;
127-
case 200:
128-
responseOk++;
129-
onSuccess();
130-
break;
131-
default:
132-
responseOther++;
133-
onError();
134-
queue.requeue(item);
135-
}
132+
handleResponse(res.statusCode(), item);
136133
} catch (IOException ex) {
137134
logger.log(Level.WARNING, "error sending request: {0}", ex.toString());
138-
responseOther++;
139-
onError();
140-
queue.requeue(item);
135+
handleTransportError(item);
141136
}
142137

143138
backoff();
144139
}
145140

141+
void handleResponse(int code, Map.Entry<BoundedQueue.Key, byte[]> item)
142+
throws InterruptedException {
143+
int len = item.getValue().length;
144+
switch (code) {
145+
case 400:
146+
telemetry.onResponse(code, len, false);
147+
telemetry.onDrop(1, len);
148+
decreaseBackoff();
149+
break;
150+
case 200:
151+
telemetry.onResponse(code, len, true);
152+
decreaseBackoff();
153+
break;
154+
default:
155+
telemetry.onResponse(code, len, false);
156+
increaseBackoff();
157+
queue.requeue(item);
158+
}
159+
}
160+
161+
void handleTransportError(Map.Entry<BoundedQueue.Key, byte[]> item)
162+
throws InterruptedException {
163+
telemetry.onTransportError(item.getValue().length);
164+
increaseBackoff();
165+
queue.requeue(item);
166+
}
167+
146168
int delay;
147169

148-
void onSuccess() {
170+
void decreaseBackoff() {
149171
delay >>= 4;
150172
}
151173

152-
void onError() {
174+
void increaseBackoff() {
153175
if (delay < 64) delay <<= 1;
154176
if (delay == 0) delay = 1;
155177
}
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
/* Unless explicitly stated otherwise all files in this repository are
2+
* licensed under the Apache 2.0 License.
3+
*
4+
* This product includes software developed at Datadog
5+
* (https://www.datadoghq.com/) Copyright 2026 Datadog, Inc.
6+
*/
7+
8+
package com.datadoghq.dogstatsd.http.forwarder;
9+
10+
/** Canonical telemetry labels for HTTP response codes. */
11+
final class HttpCode {
12+
13+
private static final String[] CATEGORIES = {"0xx", "1xx", "2xx", "3xx", "4xx", "5xx"};
14+
15+
private static final String[] NAMES = new String[506];
16+
17+
static {
18+
for (int i = 0; i < NAMES.length; i++) {
19+
NAMES[i] = CATEGORIES[i / 100];
20+
}
21+
NAMES[0] = "0";
22+
NAMES[200] = "200";
23+
NAMES[400] = "400";
24+
NAMES[403] = "403";
25+
NAMES[404] = "404";
26+
NAMES[429] = "429";
27+
NAMES[500] = "500";
28+
NAMES[501] = "501";
29+
NAMES[502] = "502";
30+
NAMES[503] = "503";
31+
NAMES[504] = "504";
32+
NAMES[505] = "505";
33+
}
34+
35+
private HttpCode() {}
36+
37+
static String name(int code) {
38+
if (code >= 0 && code < NAMES.length) {
39+
return NAMES[code];
40+
}
41+
int category = code / 100;
42+
if (category >= 0 && category < CATEGORIES.length) {
43+
return CATEGORIES[category];
44+
}
45+
return "xxx";
46+
}
47+
}

0 commit comments

Comments
 (0)