Skip to content

Commit e56f0ff

Browse files
committed
dedup
1 parent 90dc3dd commit e56f0ff

File tree

9 files changed

+196
-3
lines changed

9 files changed

+196
-3
lines changed

README.md

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ If you work in a legacy app and ask things like "Can we remove this?" or "Is thi
1616
- [Configure](#configure)
1717
- [Run an Application with JCT](#run-an-application-with-jct)
1818
- [Available Processors](#available-processors)
19+
- [Stack Volume Control (All vs New Stacks)](#stack-volume-control-all-vs-new-stacks)
1920
- [Message Format](#message-format)
2021
- [Logging](#logging)
2122
- [Hello World Walkthrough](#hello-world-walkthrough)
@@ -168,6 +169,40 @@ Quick chooser:
168169
- Pick `udp` if you optimize for throughput and can tolerate some loss
169170
- Pick `tcp` if you want better transport reliability for ELK pipelines
170171

172+
## Stack Volume Control (All vs New Stacks)
173+
174+
On busy systems, writing every single captured stack can create huge event volume.
175+
If your main question is only "Was this path hit at least once?", you can report only new stacks.
176+
177+
Processor flags:
178+
179+
- `processor.reportAllStacks`
180+
- `true` (default): report every captured event
181+
- `false`: report only first-seen stack hashes within the current dedup window
182+
- `processor.stackHashResetIntervalMillis`
183+
- Periodically clears remembered stack hashes to cap memory usage in long-running JVMs
184+
- Default: `300000` (5 minutes)
185+
186+
Example: report only new stacks (good for high-traffic legacy systems)
187+
188+
```yaml
189+
processor:
190+
fullQualifiedClass: de.marcelsauer.profiler.processor.udp.AsyncUdpStackProcessor
191+
udpHost: localhost
192+
udpPort: 9999
193+
reportAllStacks: false
194+
stackHashResetIntervalMillis: 300000
195+
```
196+
197+
Example: report all stacks (full event stream)
198+
199+
```yaml
200+
processor:
201+
fullQualifiedClass: de.marcelsauer.profiler.processor.file.AsyncFileWritingStackProcessor
202+
stackFolderName: /tmp/stacks/
203+
reportAllStacks: true
204+
```
205+
171206
## Message Format
172207
173208
```json

doc/config-sample-file.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,3 +10,7 @@ classes:
1010
processor:
1111
fullQualifiedClass: de.marcelsauer.profiler.processor.file.AsyncFileWritingStackProcessor
1212
stackFolderName: /tmp/stacks/
13+
# true = report every captured stack event
14+
reportAllStacks: true
15+
# used in dedup mode (reportAllStacks: false) to reset remembered hashes
16+
stackHashResetIntervalMillis: 300000

doc/config-sample-helloworld-tcp.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,4 +10,8 @@ processor:
1010
tcpPort: 9999
1111
tcpConnectTimeoutMillis: 1000
1212
tcpReconnectDelayMillis: 1000
13+
# true = report every captured stack event
14+
reportAllStacks: true
15+
# used in dedup mode (reportAllStacks: false)
16+
stackHashResetIntervalMillis: 300000
1317

doc/config-sample-helloworld-udp.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,3 +8,7 @@ processor:
88
fullQualifiedClass: de.marcelsauer.profiler.processor.udp.AsyncUdpStackProcessor
99
udpHost: localhost
1010
udpPort: 9999
11+
# report only first-seen stacks in a time window
12+
reportAllStacks: false
13+
# reset dedup hashes every 5 minutes to cap memory usage
14+
stackHashResetIntervalMillis: 300000

doc/config-sample-helloworld.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,3 +7,5 @@ classes:
77
processor:
88
fullQualifiedClass: de.marcelsauer.profiler.processor.file.AsyncFileWritingStackProcessor
99
stackFolderName: /tmp/stacks/
10+
reportAllStacks: true
11+
stackHashResetIntervalMillis: 300000

src/main/java/de/marcelsauer/profiler/config/Processor.java

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,17 @@
88
public class Processor {
99
public String fullQualifiedClass;
1010

11+
/**
12+
* true: report every captured stack event
13+
* false: report only first-seen stack hashes until periodic reset
14+
*/
15+
public boolean reportAllStacks = true;
16+
17+
/**
18+
* reset interval for remembered stack hashes in dedup mode to cap memory usage
19+
*/
20+
public long stackHashResetIntervalMillis = 300_000L;
21+
1122
public String udpHost = "localhost";
1223
public int udpPort = 9999;
1324

src/main/java/de/marcelsauer/profiler/processor/AbstractAsyncStackProcessor.java

Lines changed: 59 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,13 @@
11
package de.marcelsauer.profiler.processor;
22

3+
import de.marcelsauer.profiler.config.Config;
34
import org.apache.log4j.Logger;
45

56
import java.util.ArrayList;
67
import java.util.Collection;
8+
import java.util.HashSet;
79
import java.util.List;
10+
import java.util.Set;
811
import java.util.concurrent.*;
912
import java.util.concurrent.atomic.AtomicBoolean;
1013
import java.util.concurrent.atomic.AtomicInteger;
@@ -28,6 +31,9 @@ public abstract class AbstractAsyncStackProcessor implements StackProcessor {
2831
private final BlockingQueue<RecordingEvent> workerQueue = new ArrayBlockingQueue<>(CAPACITY);
2932
private final AtomicBoolean started = new AtomicBoolean(false);
3033
private final List<RecordingEvent> drainBuffer = new ArrayList<>(DRAIN_BATCH_SIZE);
34+
private final List<RecordingEvent> dedupBuffer = new ArrayList<>(DRAIN_BATCH_SIZE);
35+
private final Set<Integer> seenStackHashes = new HashSet<>();
36+
private long nextStackHashResetAtMillis = 0L;
3137

3238
/**
3339
* acting like cron
@@ -56,6 +62,7 @@ public void process(RecordingEvent event) {
5662
@Override
5763
public void start() {
5864
logger.info("starting " + this.getClass().getName());
65+
resetDedupWindow();
5966
doStart();
6067
startScheduler();
6168
}
@@ -112,11 +119,17 @@ private void flushQueue() {
112119
if (snapshot.isEmpty()) {
113120
return;
114121
}
115-
doProcess(snapshot);
116-
successfullyProcessedStacksCounter.addAndGet(snapshot.size());
122+
123+
Collection<RecordingEvent> reportableSnapshot = filterReportableStacks(snapshot);
124+
if (reportableSnapshot.isEmpty()) {
125+
continue;
126+
}
127+
128+
doProcess(reportableSnapshot);
129+
successfullyProcessedStacksCounter.addAndGet(reportableSnapshot.size());
117130
logger.info(
118131
String.format("delegated %d stacks to %s. successfully processed so far %d",
119-
snapshot.size(),
132+
reportableSnapshot.size(),
120133
this.getClass().getSimpleName(),
121134
successfullyProcessedStacksCounter.intValue()));
122135
}
@@ -128,6 +141,49 @@ private Collection<RecordingEvent> drainNextBatch() {
128141
return drainBuffer;
129142
}
130143

144+
private Collection<RecordingEvent> filterReportableStacks(Collection<RecordingEvent> snapshot) {
145+
if (isReportAllStacks()) {
146+
return snapshot;
147+
}
148+
149+
maybeResetSeenStackHashes();
150+
151+
dedupBuffer.clear();
152+
for (RecordingEvent event : snapshot) {
153+
if (seenStackHashes.add(event.getStackHash())) {
154+
dedupBuffer.add(event);
155+
}
156+
}
157+
return dedupBuffer;
158+
}
159+
160+
private void maybeResetSeenStackHashes() {
161+
long now = currentTimeMillis();
162+
if (now < nextStackHashResetAtMillis) {
163+
return;
164+
}
165+
166+
seenStackHashes.clear();
167+
nextStackHashResetAtMillis = now + getStackHashResetIntervalMillis();
168+
}
169+
170+
private void resetDedupWindow() {
171+
seenStackHashes.clear();
172+
nextStackHashResetAtMillis = currentTimeMillis() + getStackHashResetIntervalMillis();
173+
}
174+
175+
protected boolean isReportAllStacks() {
176+
return Config.get().processor.reportAllStacks;
177+
}
178+
179+
protected long getStackHashResetIntervalMillis() {
180+
return Math.max(1_000L, Config.get().processor.stackHashResetIntervalMillis);
181+
}
182+
183+
protected long currentTimeMillis() {
184+
return System.currentTimeMillis();
185+
}
186+
131187
class WorkQueueProcessorTask implements Runnable {
132188

133189
@Override

src/main/java/de/marcelsauer/profiler/processor/RecordingEvent.java

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,12 @@
66
public class RecordingEvent {
77
public final Stack stack;
88
public final long timestampMillis;
9+
private final int stackHash;
910

1011
public RecordingEvent(Stack stack) {
1112
this.stack = stack;
1213
this.timestampMillis = System.currentTimeMillis();
14+
this.stackHash = calculateStackHash();
1315
}
1416

1517
List<Stack.StackEntry> getStackEntries() {
@@ -33,4 +35,16 @@ public String asJson() {
3335
sb.append("]}");
3436
return sb.toString();
3537
}
38+
39+
public int getStackHash() {
40+
return stackHash;
41+
}
42+
43+
private int calculateStackHash() {
44+
int result = 1;
45+
for (Stack.StackEntry entry : getStackEntries()) {
46+
result = 31 * result + (entry.methodName != null ? entry.methodName.hashCode() : 0);
47+
}
48+
return result;
49+
}
3650
}

src/test/java/de/marcelsauer/profiler/processor/AbstractAsyncStackProcessorTest.java

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,15 +43,59 @@ public void thatQueueIsDrainedInBatchesWithoutLosingEvents() {
4343
}
4444
}
4545

46+
@Test
47+
public void thatOnlyNewStacksAreReportedWhenDedupModeIsEnabled() {
48+
TestProcessor processor = new TestProcessor(false, 60_000L);
49+
processor.start();
50+
51+
processor.process(newEvent("m1"));
52+
processor.process(newEvent("m1"));
53+
processor.process(newEvent("m2"));
54+
processor.process(newEvent("m1"));
55+
56+
processor.stop();
57+
58+
assertEquals(2, processor.getProcessedCount());
59+
}
60+
61+
@Test
62+
public void thatSeenHashesAreResetAfterConfiguredInterval() throws InterruptedException {
63+
TestProcessor processor = new TestProcessor(false, 1000L);
64+
processor.start();
65+
66+
processor.process(newEvent("m1"));
67+
Thread.sleep(1200L);
68+
69+
processor.advanceClockMillis(2000L);
70+
processor.process(newEvent("m1"));
71+
Thread.sleep(1200L);
72+
73+
processor.stop();
74+
75+
assertEquals(2, processor.getProcessedCount());
76+
}
77+
4678
private RecordingEvent newEvent(String methodName) {
4779
Stack.StackEntry entry = new Stack.StackEntry(methodName, 1L, 0);
4880
return new RecordingEvent(new Stack(Collections.singletonList(entry)));
4981
}
5082

5183
private static class TestProcessor extends AbstractAsyncStackProcessor {
84+
private final boolean reportAllStacks;
85+
private final long stackHashResetIntervalMillis;
86+
private long nowMillis;
5287
private final AtomicInteger processedCount = new AtomicInteger();
5388
private final List<Integer> batchSizes = Collections.synchronizedList(new ArrayList<Integer>());
5489

90+
TestProcessor() {
91+
this(true, 300_000L);
92+
}
93+
94+
TestProcessor(boolean reportAllStacks, long stackHashResetIntervalMillis) {
95+
this.reportAllStacks = reportAllStacks;
96+
this.stackHashResetIntervalMillis = stackHashResetIntervalMillis;
97+
}
98+
5599
@Override
56100
protected void doStart() {
57101
// no-op
@@ -68,6 +112,25 @@ protected void doProcess(Collection<RecordingEvent> snapshots) {
68112
processedCount.addAndGet(snapshots.size());
69113
}
70114

115+
@Override
116+
protected boolean isReportAllStacks() {
117+
return reportAllStacks;
118+
}
119+
120+
@Override
121+
protected long getStackHashResetIntervalMillis() {
122+
return stackHashResetIntervalMillis;
123+
}
124+
125+
@Override
126+
protected long currentTimeMillis() {
127+
return nowMillis;
128+
}
129+
130+
void advanceClockMillis(long millis) {
131+
nowMillis += millis;
132+
}
133+
71134
int getProcessedCount() {
72135
return processedCount.get();
73136
}

0 commit comments

Comments
 (0)