Skip to content

Commit 49100cb

Browse files
committed
Making cache more configurable & clean-up
- added ability to configure cache size - for both tag names & values - factored shared code into Caching static utility class - added tests for Caching class & size determination logic
1 parent 6ab19b0 commit 49100cb

9 files changed

Lines changed: 281 additions & 139 deletions

File tree

dd-trace-api/src/main/java/datadog/trace/api/config/GeneralConfig.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,8 @@ public final class GeneralConfig {
105105
public static final String JDK_SOCKET_ENABLED = "jdk.socket.enabled";
106106

107107
public static final String OPTIMIZED_MAP_ENABLED = "optimized.map.enabled";
108-
public static final String UTF8_CACHE_ENABLED = "utf8.cache.enabled";
108+
public static final String TAG_NAME_UTF8_CACHE_SIZE = "tag.name.utf8.cache.size";
109+
public static final String TAG_VALUE_UTF8_CACHE_SIZE = "tag.value.utf8.cache.size";
109110
public static final String STACK_TRACE_LENGTH_LIMIT = "stack.trace.length.limit";
110111

111112
public static final String SSI_INJECTION_ENABLED = "injection.enabled";
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
package datadog.trace.common.writer.ddagent;
2+
3+
import java.util.Arrays;
4+
5+
/** Some common static functions used by simple & generational caches */
6+
final class Caching {
7+
private Caching() {}
8+
9+
/**
10+
* Provides the cache size that holds the requestedCapacity
11+
*
12+
* @param requestedCapacity > 0
13+
* @return size >= requestedCapacity
14+
*/
15+
static final int cacheSizeFor(int requestedCapacity) {
16+
int pow;
17+
for (pow = 1; pow < requestedCapacity; pow *= 2) ;
18+
return pow;
19+
}
20+
21+
/** Provides an "adjusted" (e.g. non-zero) hash for the given String */
22+
static final int adjHash(String value) {
23+
int hash = value.hashCode();
24+
return (hash == 0) ? 0xDA7AD06 : hash;
25+
}
26+
27+
/** Resets markers to zero */
28+
static final void reset(int[] marks) {
29+
Arrays.fill(marks, 0);
30+
}
31+
32+
/**
33+
* Changes the mark status of the corresponding slot in the marking array. If there was previously
34+
* a matching mark, resets the slot to zero and returns true If there was previously a mismatching
35+
* mark, updates the slot and returns false
36+
*
37+
* <p>A return value of true indicates that the requested value has likely been seen previously
38+
* and cache entry should be created.
39+
*/
40+
static final boolean mark(int[] marks, int newAdjHash) {
41+
int index = bucketIndex(marks, newAdjHash);
42+
43+
// This is the 4th iteration of the marking strategy
44+
// First version - used a mark entry, but that would prematurely
45+
// burn a slot in the cache
46+
// Second version - used a mark boolean, that worked well, but
47+
// was a overly permissive in allowing the next request to the same slot
48+
// to immediately create a CacheEntry
49+
// Third version - used a mark hash that to match exactly,
50+
// that could lead to access order fights over the cache slot
51+
// So this version is a hybrid of 2nd & 3rd, using a bloom filter
52+
// that effectively degenerates to a boolean
53+
54+
// This approach provides a nice balance when there's an A-B-A access pattern
55+
// The first A will mark the slot
56+
// Then B will mark the slot with A | B
57+
// Then either A or B can claim and reset the slot
58+
59+
int priorMarkHash = marks[index];
60+
boolean match = ((priorMarkHash & newAdjHash) == newAdjHash);
61+
if (match) {
62+
marks[index] = 0;
63+
} else {
64+
marks[index] = priorMarkHash | newAdjHash;
65+
}
66+
return match;
67+
}
68+
69+
/** Provides the corresponding index into the marking array */
70+
static final int bucketIndex(int[] marks, int adjHash) {
71+
return adjHash & (marks.length - 1);
72+
}
73+
74+
/**
75+
* Provides the corresponding index into an entry array Assumes that array size was determined by
76+
* using {@Caching#cacheSizeFor}
77+
*/
78+
static final <E> int bucketIndex(E[] entries, int adjHash) {
79+
return adjHash & (entries.length - 1);
80+
}
81+
}

dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/GenerationalUtf8Cache.java

Lines changed: 55 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
import datadog.communication.serialization.EncodingCache;
44
import datadog.trace.common.writer.ddagent.SimpleUtf8Cache.CacheEntry;
55
import java.nio.charset.StandardCharsets;
6-
import java.util.Arrays;
76

87
/**
98
* 2-level generational cache of UTF8 values - primarily intended to be used for tag values
@@ -65,6 +64,9 @@
6564
* provide better cache utilization.
6665
*/
6766
public final class GenerationalUtf8Cache implements EncodingCache {
67+
static final int MAX_EDEN_CAPACITY = 512;
68+
static final int MAX_TENURED_CAPACITY = 1024;
69+
6870
private static final int MAX_EDEN_PROBES = 4;
6971
private static final int MAX_TENURED_PROBES = 8;
7072

@@ -75,6 +77,9 @@ public final class GenerationalUtf8Cache implements EncodingCache {
7577
private static final double PURGE_THRESHOLD = 0.25D;
7678
private static final double PROMOTION_THRESHOLD_ADJ_FACTOR = 1.5;
7779

80+
private static final double EDEN_PROPORTION = 1D / 3D;
81+
private static final double TENURED_PROPORTION = 1 - EDEN_PROPORTION;
82+
7883
private static final int MAX_ENTRY_LEN = 256;
7984

8085
private final CacheEntry[] edenEntries;
@@ -92,15 +97,40 @@ public final class GenerationalUtf8Cache implements EncodingCache {
9297
int edenEvictions = 0;
9398
int tenuredEvictions = 0;
9499

95-
public GenerationalUtf8Cache() {
100+
public GenerationalUtf8Cache(int capacity) {
96101
this.accessTimeMs = System.currentTimeMillis();
97102

103+
int edenCapacity = (int) (capacity * EDEN_PROPORTION);
104+
int edenSize = Caching.cacheSizeFor(Math.min(edenCapacity, MAX_EDEN_CAPACITY));
105+
98106
// These sizes must be powers of 2
99-
this.edenEntries = new CacheEntry[64];
100-
this.edenMarkers = new int[64];
107+
this.edenEntries = new CacheEntry[edenSize];
108+
this.edenMarkers = new int[edenSize];
109+
110+
int tenuredCapacity = (int) (capacity * TENURED_PROPORTION);
111+
int tenuredSize = Caching.cacheSizeFor(Math.min(tenuredCapacity, MAX_TENURED_CAPACITY));
101112

102113
// The size must be a power of 2
103-
this.tenuredEntries = new CacheEntry[128];
114+
this.tenuredEntries = new CacheEntry[tenuredSize];
115+
}
116+
117+
public GenerationalUtf8Cache(int edenCapacity, int tenuredCapacity) {
118+
this.accessTimeMs = System.currentTimeMillis();
119+
120+
int edenSize = Caching.cacheSizeFor(Math.min(tenuredCapacity, MAX_EDEN_CAPACITY));
121+
this.edenEntries = new CacheEntry[edenSize];
122+
this.edenMarkers = new int[edenSize];
123+
124+
int tenuredSize = Caching.cacheSizeFor(Math.min(tenuredCapacity, MAX_TENURED_CAPACITY));
125+
this.tenuredEntries = new CacheEntry[tenuredSize];
126+
}
127+
128+
public int edenCapacity() {
129+
return this.edenEntries.length;
130+
}
131+
132+
public int tenuredCapacity() {
133+
return this.tenuredEntries.length;
104134
}
105135

106136
/** Updates access time used @link {@link #getUtf8(String, String)} to the provided value */
@@ -129,25 +159,9 @@ public synchronized void recalibrate() {
129159
public void recalibrate(long accessTimeMs) {
130160
this.accessTimeMs = accessTimeMs;
131161

132-
CacheEntry[] edenEntries = this.edenEntries;
133-
for (int i = 0; i < edenEntries.length; ++i) {
134-
CacheEntry entry = edenEntries[i];
135-
if (entry == null) continue;
136-
137-
boolean purge = entry.decay();
138-
if (purge) edenEntries[i] = null;
139-
}
140-
141-
Arrays.fill(this.edenMarkers, 0);
142-
143-
CacheEntry[] tenuredEntries = this.tenuredEntries;
144-
for (int i = 0; i < tenuredEntries.length; ++i) {
145-
CacheEntry entry = tenuredEntries[i];
146-
if (entry == null) continue;
147-
148-
boolean purge = entry.decay();
149-
if (purge) tenuredEntries[i] = null;
150-
}
162+
recalibrate(this.edenEntries);
163+
Caching.reset(this.edenMarkers);
164+
recalibrate(this.tenuredEntries);
151165

152166
int totalPromotions = this.promotions + this.earlyPromotions;
153167
if (totalPromotions == 0 && this.promotionThreshold >= MIN_PROMOTION_TRESHOLD) {
@@ -164,6 +178,16 @@ public void recalibrate(long accessTimeMs) {
164178
this.tenuredEvictions = 0;
165179
}
166180

181+
static final void recalibrate(CacheEntry[] entries) {
182+
for (int i = 0; i < entries.length; ++i) {
183+
CacheEntry entry = entries[i];
184+
if (entry == null) continue;
185+
186+
boolean purge = entry.decay();
187+
if (purge) entries[i] = null;
188+
}
189+
}
190+
167191
@Override
168192
public byte[] encode(CharSequence charSeq) {
169193
if (charSeq instanceof String) {
@@ -186,7 +210,7 @@ public final byte[] getUtf8(String value) {
186210
public final byte[] getUtf8(String value, long accessTimeMs) {
187211
if (value.length() > MAX_ENTRY_LEN) return CacheEntry.utf8(value);
188212

189-
int adjHash = CacheEntry.adjHash(value);
213+
int adjHash = Caching.adjHash(value);
190214
long lookupTimeMs = this.accessTimeMs;
191215

192216
CacheEntry[] tenuredEntries = this.tenuredEntries;
@@ -220,7 +244,7 @@ public final byte[] getUtf8(String value, long accessTimeMs) {
220244
return edenEntry.utf8();
221245
}
222246

223-
boolean wasMarked = mark(this.edenMarkers, adjHash);
247+
boolean wasMarked = Caching.mark(this.edenMarkers, adjHash);
224248

225249
// If slot isn't marked, this is likely the first request
226250
// Don't create an entry yet
@@ -265,7 +289,7 @@ public final byte[] getUtf8(String value, long accessTimeMs) {
265289
}
266290

267291
static final int findAvailableIndex(CacheEntry[] entries, int numProbes, int newAdjHash) {
268-
int initialBucketIndex = initialBucketIndex(entries, newAdjHash);
292+
int initialBucketIndex = Caching.bucketIndex(entries, newAdjHash);
269293
for (int probe = 0, index = initialBucketIndex; probe < numProbes; ++probe, ++index) {
270294
if (index >= entries.length) index = 0;
271295

@@ -280,7 +304,7 @@ static final int findFirstAvailableOrMfuIndex(
280304
double mfuScore = Double.MIN_VALUE;
281305
int mfuIndex = -1;
282306

283-
int initialBucketIndex = initialBucketIndex(entries, newAdjHash);
307+
int initialBucketIndex = Caching.bucketIndex(entries, newAdjHash);
284308
for (int probe = 0, index = initialBucketIndex; probe < numProbes; ++probe, ++index) {
285309
if (index >= entries.length) index = 0;
286310

@@ -296,37 +320,8 @@ static final int findFirstAvailableOrMfuIndex(
296320
return mfuIndex;
297321
}
298322

299-
static final boolean mark(int[] marks, int newAdjHash) {
300-
int index = initialBucketIndex(marks, newAdjHash);
301-
302-
// This is the 4th iteration of the marking strategy
303-
// First version - used a mark entry, but that would prematurely
304-
// burn a slot in the cache
305-
// Second version - used a mark boolean, that worked well, but
306-
// was a overly permissive in allowing the next request to the same slot
307-
// to immediately create a CacheEntry
308-
// Third version - used a mark hash that to match exactly,
309-
// that could lead to access order fights over the cache slot
310-
// So this version is a hybrid of 2nd & 3rd, using a bloom filter
311-
// that effectively degenerates to a boolean
312-
313-
// This approach provides a nice balance when there's an A-B-A access pattern
314-
// The first A will mark the slot
315-
// Then B will mark the slot with A | B
316-
// Then either A or B can claim and reset the slot
317-
318-
int priorMarkHash = marks[index];
319-
boolean match = ((priorMarkHash & newAdjHash) == newAdjHash);
320-
if (match) {
321-
marks[index] = 0;
322-
} else {
323-
marks[index] = priorMarkHash | newAdjHash;
324-
}
325-
return match;
326-
}
327-
328323
static final boolean lfuInsert(CacheEntry[] entries, int numProbes, CacheEntry newEntry) {
329-
int initialBucketIndex = initialBucketIndex(entries, newEntry.adjHash());
324+
int initialBucketIndex = Caching.bucketIndex(entries, newEntry.adjHash());
330325

331326
// initial scan to see if there's an empty slot or marker entry is already present
332327
double lowestScore = Double.MAX_VALUE;
@@ -353,7 +348,7 @@ static final boolean lfuInsert(CacheEntry[] entries, int numProbes, CacheEntry n
353348
}
354349

355350
static final boolean lruInsert(CacheEntry[] entries, int numProbes, CacheEntry newEntry) {
356-
int initialBucketIndex = initialBucketIndex(entries, newEntry.adjHash());
351+
int initialBucketIndex = Caching.bucketIndex(entries, newEntry.adjHash());
357352

358353
// initial scan to see if there's an empty slot or entry is already present
359354
long lowestUsedMs = Long.MAX_VALUE;
@@ -378,17 +373,9 @@ static final boolean lruInsert(CacheEntry[] entries, int numProbes, CacheEntry n
378373
return true;
379374
}
380375

381-
static final int initialBucketIndex(CacheEntry[] entries, int adjHash) {
382-
return adjHash & (entries.length - 1);
383-
}
384-
385-
static final int initialBucketIndex(int[] marks, int adjHash) {
386-
return adjHash & (marks.length - 1);
387-
}
388-
389376
static final int lookupEntryIndex(
390377
CacheEntry[] entries, int numProbes, int adjHash, String value) {
391-
int initialBucketIndex = initialBucketIndex(entries, adjHash);
378+
int initialBucketIndex = Caching.bucketIndex(entries, adjHash);
392379
for (int probe = 0, index = initialBucketIndex; probe < numProbes; ++probe, ++index) {
393380
if (index >= entries.length) index = 0;
394381

@@ -456,11 +443,6 @@ boolean isPurgeable() {
456443
return (this.score < PURGE_THRESHOLD);
457444
}
458445

459-
static final int adjHash(String value) {
460-
int hash = value.hashCode();
461-
return (hash == 0) ? 0xDA7AD06 : hash;
462-
}
463-
464446
static final byte[] utf8(String value) {
465447
return value.getBytes(StandardCharsets.UTF_8);
466448
}

0 commit comments

Comments
 (0)