From 8a59b290bc2b9b1e748cc53a776d618f1e88e411 Mon Sep 17 00:00:00 2001 From: Douglas Q Hawkins Date: Thu, 28 Aug 2025 13:05:55 -0400 Subject: [PATCH 01/23] UTF8 caching for v0.4 This change adds UTF-8 encoding caching to optimize v0.4 payload construction. Since String#getBytes is intrinsified these caches actually perform worse throughput wise than an uncached conversion. However, the caches are useful in reducing allocation from UTF-8 conversions. For tags, a "simple" cache is used. The simple cache is a single level cache -- that uses hashing combined with linear probing. To avoid, cache churn and unnecessary allocation of a CacheEntry, the simple cache uses a first request marking scheme that typically avoids creating a CacheEntry for values that are requested only once. Eviction from the "simple" cache is done based on LFU policy. For tag values, a more complicated generational cache is used. The generational cache combines the delayed CacheEntry logic of the simple cache with a 2nd-level for resilience. Frequently used entries are "promoted" to the higher level cache. The 1st level of the generational cache uses a LFU eviction policy. The 2nd of the generational cache uses a LRU eviction policy. For the value use cache, the generational policy provided 2x increase in hit rate over the simple cache. --- .../trace/api/config/GeneralConfig.java | 1 + .../common/writer/ddagent/Utf8Benchmark.java | 160 +++++++ .../writer/ddagent/GenerationalUtf8Cache.java | 438 ++++++++++++++++++ .../writer/ddagent/SimpleUtf8Cache.java | 227 +++++++++ .../writer/ddagent/TraceMapperV0_4.java | 39 +- .../ddagent/GenerationalUtf8CacheTest.java | 168 +++++++ .../writer/ddagent/SimpleUtf8CacheTest.java | 129 ++++++ .../main/java/datadog/trace/api/Config.java | 7 + 8 files changed, 1154 insertions(+), 15 deletions(-) create mode 100644 dd-trace-core/src/jmh/java/datadog/trace/common/writer/ddagent/Utf8Benchmark.java create mode 100644 dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/GenerationalUtf8Cache.java create mode 100644 dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/SimpleUtf8Cache.java create mode 100644 dd-trace-core/src/test/java/datadog/trace/common/writer/ddagent/GenerationalUtf8CacheTest.java create mode 100644 dd-trace-core/src/test/java/datadog/trace/common/writer/ddagent/SimpleUtf8CacheTest.java diff --git a/dd-trace-api/src/main/java/datadog/trace/api/config/GeneralConfig.java b/dd-trace-api/src/main/java/datadog/trace/api/config/GeneralConfig.java index 588854a2069..ee24889233a 100644 --- a/dd-trace-api/src/main/java/datadog/trace/api/config/GeneralConfig.java +++ b/dd-trace-api/src/main/java/datadog/trace/api/config/GeneralConfig.java @@ -105,6 +105,7 @@ public final class GeneralConfig { public static final String JDK_SOCKET_ENABLED = "jdk.socket.enabled"; public static final String OPTIMIZED_MAP_ENABLED = "optimized.map.enabled"; + public static final String UTF8_CACHE_ENABLED = "utf8.cache.enabled"; public static final String STACK_TRACE_LENGTH_LIMIT = "stack.trace.length.limit"; public static final String SSI_INJECTION_ENABLED = "injection.enabled"; diff --git a/dd-trace-core/src/jmh/java/datadog/trace/common/writer/ddagent/Utf8Benchmark.java b/dd-trace-core/src/jmh/java/datadog/trace/common/writer/ddagent/Utf8Benchmark.java new file mode 100644 index 00000000000..4a7d87ab4ce --- /dev/null +++ b/dd-trace-core/src/jmh/java/datadog/trace/common/writer/ddagent/Utf8Benchmark.java @@ -0,0 +1,160 @@ +package datadog.trace.common.writer.ddagent; + +import java.nio.charset.StandardCharsets; +import java.util.concurrent.ThreadLocalRandom; + +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.Warmup; +import org.openjdk.jmh.infra.Blackhole; + +import datadog.trace.api.DDTags; +import datadog.utf8.GeneratedTagUtf8Cache; +import datadog.utf8.SimpleUtf8Cache; +import datadog.utf8.Tags; + +import datadog.utf8.GenerationalUtf8Cache; +import datadog.utf8.ValueUtf8CacheBackup; + +/** + * This benchmark isn't really intended to used to measure throughput, + * but rather to be used with "-prof gc" to check bytes / op. + * + * Since {@link String#getBytes(java.nio.charset.Charset)} is intrinsified + * the caches typically perform worse throughput wise, the benefit of the + * caches is to reduce allocation. + */ +@BenchmarkMode(Mode.Throughput) +public class Utf8Benchmark { + static final int NUM_LOOKUPS = 10_000; + + static final String[] TAGS = { + "_dd.asm.keep", + "ci.provider", + "language", + "db.statement", + "ci.job.url", + "ci.pipeline.url", + "db.pool", + "http.forwarder", + "db.warehouse", + "custom" + }; + + static int pos = 0; + static int standardVal = 0; + + static final String nextTag() { + if ( pos == TAGS.length - 1 ) { + pos = 0; + } else { + pos += 1; + } + return TAGS[pos]; + } + + static final String nextValue(String tag) { + if ( tag == "custom" ) { + return nextCustomValue(); + } else { + return nextStandardValue(tag); + } + } + + static final String nextCustomValue() { + return "custom" + ThreadLocalRandom.current().nextInt(); + } + + static final String nextStandardValue(String tag) { + return tag + ThreadLocalRandom.current().nextInt(20); + } + + @Benchmark + public static final String tagUtf8_baseline() { + return nextTag(); + } + + @Benchmark + public static final byte[] tagUtf8_nocache() { + String tag = nextTag(); + return tag.getBytes(StandardCharsets.UTF_8); + } + + @Benchmark + public static final byte[] tagUtf8_w_generatedCache() { + String tag = nextTag(); + + byte[] cache = GeneratedTagUtf8Cache.lookup(tag); + if ( cache != null ) return cache; + + return tag.getBytes(StandardCharsets.UTF_8); + } + + static final SimpleUtf8Cache TAG_CACHE = new SimpleUtf8Cache(); + + @Benchmark + public static final byte[] tagUtf8_w_cache() { + String tag = nextTag(); + + byte[] cache = TAG_CACHE.getUtf8(tag); + if ( cache != null ) return cache; + + return tag.getBytes(StandardCharsets.UTF_8); + } + + @Benchmark + public static final void valueUtf8_baseline(Blackhole bh) { + for ( int i = 0; i < NUM_LOOKUPS; ++i ) { + String tag = nextTag(); + String value = nextValue(tag); + + bh.consume(tag); + bh.consume(value); + } + } + + static final GenerationalUtf8Cache VALUE_CACHE = new GenerationalUtf8Cache(); + + @Benchmark + public static final void valueUtf8_cache_generational(Blackhole bh) { + GenerationalUtf8Cache valueCache = VALUE_CACHE; + valueCache.recalibrate(); + + for ( int i = 0; i < NUM_LOOKUPS; ++i ) { + String tag = nextTag(); + String value = nextValue(tag); + + byte[] lookup = valueCache.getUtf8(value); + bh.consume(lookup); + } + } + + static final SimpleUtf8Cache SIMPLE_VALUE_CACHE = new SimpleUtf8Cache(); + @Benchmark + public static final void valueUtf8_cache_simple(Blackhole bh) { + SimpleUtf8Cache valueCache = SIMPLE_VALUE_CACHE; + valueCache.recalibrate(); + + for ( int i = 0; i < NUM_LOOKUPS; ++i ) { + String tag = nextTag(); + String value = nextValue(tag); + + byte[] lookup = valueCache.getUtf8(value); + bh.consume(lookup); + } + } + + @Benchmark + public static final void valueUtf8_nocache(Blackhole bh) { + for ( int i = 0; i < NUM_LOOKUPS; ++i ) { + String tag = nextTag(); + String value = nextValue(tag); + + bh.consume(tag); + bh.consume(value.getBytes(StandardCharsets.UTF_8)); + } + } +} diff --git a/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/GenerationalUtf8Cache.java b/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/GenerationalUtf8Cache.java new file mode 100644 index 00000000000..3f1dcd96469 --- /dev/null +++ b/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/GenerationalUtf8Cache.java @@ -0,0 +1,438 @@ +package datadog.trace.common.writer.ddagent; + +import datadog.communication.serialization.EncodingCache; +import java.nio.charset.StandardCharsets; +import java.util.Arrays; + +/** + * 2-level generational cache of UTF8 values - primarily intended to be used for tag values + * + * Cache is designed to take advantage of low cardinality tags to avoid + * repeated UTF8 encodings while also minimizing cache overhead and + * churn from high cardinality tags. + * + * NOTE: The aim of this cache is to reduce allocation overhead -- not CPU overhead. + * Using the cache has higher CPU overhead than simply calling {@link String#getBytes(java.nio.charset.Charset)}. + * + * The cache is thread safe. + */ +/* + * Cache works by using a 2-level promotion based scheme. + * + * Thread safety is achieved through using CacheEntry objects where the key data + * fields are final. + * + * Updating of the cache and bookkeeping are deliberately allowed to be racy to + * minimize CPU overhead and lock contention. + * + * The first time a value is requested, the value isn't cached and no CacheEntry + * is created. Without this refinement, the cost for constructing + * CacheEntry for unique values would negate the benefit of the cache. + * + * These first requests are tracked via edenMarkers which indicate if there was + * previously an unsatisfied request to the same initial cache line. + * + * If there was a request, then CacheEntry is created and stored into edenEntries. + * NOTE: The eden line marking process is imprecise and subject to request + * ordering issues, but given that low cardinality entries are more likely to repeat + * next. + * + * If a collision occurs in the cache, linear probing is used to check other slots. + * New cache entries fill any available slot within the probing window. + * + * If a subsequent request, finds a matching item in edenEntries. The hit count + * of the CacheEntry is bumped. If the CacheEntry exceeds the current promotion + * threshold, then the CacheEntry is inserted into promotionEntries -- freeing up + * a slot in edenEntries. If there isn't an available slot in promotionEntries, + * the LRU: least recently used promotionEntry is evicted. + * + * If there are no available slots in edenEntries for a newly created CacheEntry... + * + * Attempt to early promote the MFU: most frequently used CacheEntry from + * edenEntries to promotedEntries (without eviction). + * + * If there's no space in promotedEntries to promote the MFU, then evict the + * LFU: least frequently used entry from edenEntries instead. + * + * + * LRU based eviction of the promotedEntries works on tagging with the last hit time. + * The access time can be provided directly to ValueUtf8Cache#getUtf8 or can + * be refreshed periodically by calling ValueUtf8Cache#updateAccessTime. + * + * If there's a natural transaction boundary around the UTF8 cache, + * calling ValueUtf8Cache#recalibrateThresholds will adjust promotion + * thresholds to provide better cache utilization. + */ +public final class GenerationalUtf8Cache implements EncodingCache { + private static final int MAX_PROBES = 8; + + private static final int MIN_PROMOTION_TRESHOLD = 2; + private static final int INITIAL_PROMOTION_THRESHOLD = 10; + + private static final double HIT_DECAY = 0.8D; + private static final double PURGE_THRESHOLD = 0.25D; + + private final CacheEntry[] edenEntries; + private final boolean[] edenMarkers; + + private final CacheEntry[] promotedEntries; + + private long accessTimeMs; + private double promotionThreshold = INITIAL_PROMOTION_THRESHOLD; + + int edenHits = 0; + int promotedHits = 0; + int earlyPromotions = 0; + int promotions = 0; + int edenEvictions = 0; + int promotedEvictions = 0; + + public GenerationalUtf8Cache() { + this.accessTimeMs = System.currentTimeMillis(); + + // These sizes must be powers of 2 + this.edenEntries = new CacheEntry[256]; + this.edenMarkers = new boolean[256]; + + // The size must be a power of 2 + this.promotedEntries = new CacheEntry[512]; + } + + /** + * Updates access time used @link {@link #getUtf8(String, String)} to the provided value + */ + public void updateAccessTime(long accessTimeMs) { + this.accessTimeMs = accessTimeMs; + } + + /** + * Updates access time to the @link {@link System#currentTimeMillis()} + */ + public void refreshAcessTime() { + this.updateAccessTime(System.currentTimeMillis()); + } + + public void recalibrate() { + this.recalibrate(System.currentTimeMillis()); + } + + /** + * Recalibrates promotion threshold based on promotion & eviction statistics, + * since last calibration - resets statistics + * @param accessTimeMs + */ + public void recalibrate(long accessTimeMs) { + this.accessTimeMs = accessTimeMs; + + CacheEntry[] thisEntries = this.edenEntries; + for ( int i = 0; i < thisEntries.length; ++i ) { + CacheEntry entry = thisEntries[i]; + if ( entry == null ) continue; + + boolean purge = entry.decay(); + if ( purge ) this.edenEntries[i] = null; + } + + Arrays.fill(this.edenMarkers, false); + + int totalPromotions = this.promotions + this.earlyPromotions; + if ( totalPromotions == 0 && this.promotionThreshold >= MIN_PROMOTION_TRESHOLD ) { + this.promotionThreshold /= 1.5; + } else if ( totalPromotions > this.promotedEvictions / 2 ) { + this.promotionThreshold *= 1.5; + } + + this.edenHits = 0; + this.promotedHits = 0; + this.earlyPromotions = 0; + this.promotions = 0; + this.edenEvictions = 0; + this.promotedEvictions = 0; + } + + @Override + public byte[] encode(CharSequence charSeq) { + if ( charSeq instanceof String ) { + String str = (String)charSeq; + return this.getUtf8(str); + } else { + return null; + } + } + + /** + * Returns the UTF-8 encoding of value -- using a cache value if available + */ + public final byte[] getUtf8(String value) { + return this.getUtf8(value, this.accessTimeMs); + } + + /** + * Returns the UTF-8 encoding of value -- using a cache value if available + * If there is cache hit, the specified accessTimeMs is used to update the cache entry + */ + public final byte[] getUtf8(String value, long accessTimeMs) { + int valueHash = value.hashCode(); + + CacheEntry[] localEntries = this.edenEntries; + long lookupTimeMs = this.accessTimeMs; + + int matchingLocalIndex = lookupEntry(localEntries, valueHash, value, lookupTimeMs); + if ( matchingLocalIndex != -1 ) { + CacheEntry localEntry = localEntries[matchingLocalIndex]; + + double hits = localEntry.hit(lookupTimeMs); + if ( hits > this.promotionThreshold ) { + // mark promoted first - to avoid racy insertions + this.promotions += 1; + + boolean evicted = lruInsert(this.promotedEntries, localEntry); + if ( evicted ) this.promotedEvictions += 1; + + localEntries[matchingLocalIndex] = null; + } + + this.edenHits += 1; + return localEntry.utf8(); + } + + CacheEntry[] promotedEntries = this.promotedEntries; + int matchingPromotedIndex = lookupEntry(promotedEntries, valueHash, value, lookupTimeMs); + if ( matchingPromotedIndex != -1 ) { + CacheEntry promotedEntry = promotedEntries[matchingPromotedIndex]; + + promotedEntry.hit(lookupTimeMs); + + this.promotedHits += 1; + return promotedEntry.utf8(); + } + + boolean wasMarked = reverseMark(this.edenMarkers, valueHash); + + // If slot isn't marked, this is likely the first request + // Don't create an entry yet + if ( !wasMarked ) return CacheEntry.utf8(value); + + CacheEntry newEntry = new CacheEntry(valueHash, value); + // First request was swallowed by marking, so double hit + newEntry.hit(lookupTimeMs); + newEntry.hit(lookupTimeMs); + + // search for empty slot or failing that the MFU entry + int localMfuIndex = findFirstAvailableOrMfuIndex(localEntries, valueHash); + CacheEntry localMfuEntry = localEntries[localMfuIndex]; + + // Found an empty slot - fill it + if ( localMfuEntry == null ) { + localEntries[localMfuIndex] = newEntry; + return newEntry.utf8(); + } + + // See if we can early promote the local MFU entry into the global cache + // Early promotion doesn't evict from the global cache + int globalAvailableIndex = findAvailable(promotedEntries, localMfuEntry.valueHash()); + if ( globalAvailableIndex != -1 ) { + promotedEntries[globalAvailableIndex] = localMfuEntry; + this.earlyPromotions += 1; + + localEntries[localMfuIndex] = newEntry; + return CacheEntry.utf8(value); + } + + // No empty slot - or space to promote into the global cache + // Insert into local cache while evicting the LFU + boolean evicted = lfuInsert(localEntries, newEntry); + if ( evicted ) this.promotedEvictions += 1; + + return newEntry.utf8(); + } + + static final int findAvailable(CacheEntry[] entries, int newValueHash) { + int initialBucketIndex = initialBucketIndex(entries, newValueHash); + for ( int probe = 0, index = initialBucketIndex; probe < MAX_PROBES; ++probe, ++index ) { + if ( index >= entries.length ) index = 0; + + CacheEntry entry = entries[index]; + if ( entry == null || entry.hits() == 0 ) return index; + } + return -1; + } + + static final int findFirstAvailableOrMfuIndex(CacheEntry[] entries, int newValueHash) { + double mfuHits = Double.MIN_VALUE; + int mfuIndex = -1; + + int initialBucketIndex = initialBucketIndex(entries, newValueHash); + for ( int probe = 0, index = initialBucketIndex; probe < MAX_PROBES; ++probe, ++index ) { + if ( index >= entries.length ) index = 0; + + CacheEntry entry = entries[index]; + if ( entry == null ) return index; + + double hits = entry.hits(); + if ( hits > mfuHits ) { + mfuHits = hits; + mfuIndex = index; + } + } + return mfuIndex; + } + + static final boolean reverseMark(boolean[] marks, int newValueHash) { + int index = initialBucketIndex(marks, newValueHash); + boolean wasMarked = marks[index]; + marks[index] = !wasMarked; + return wasMarked; + } + + static final boolean lfuInsert(CacheEntry[] entries, CacheEntry newEntry) { + int initialBucketIndex = initialBucketIndex(entries, newEntry.valueHash()); + + // initial scan to see if there's an empty slot or marker entry is already present + double lowestHits = Double.MAX_VALUE; + int lfuIndex = -1; + for ( int probe = 0, index = initialBucketIndex; probe < MAX_PROBES; ++probe, ++index ) { + if ( index >= entries.length ) index = 0; + + CacheEntry entry = entries[index]; + if ( entry == null || entry.hits() == 0 ) { + entries[index] = newEntry; + return false; + } else { + double hits = entry.hits(); + if ( hits < lowestHits ) { + lowestHits = hits; + lfuIndex = index; + } + } + } + + // If we get here, then we're evicted the LRU + entries[lfuIndex] = newEntry; + return true; + } + + static final boolean lruInsert(CacheEntry[] entries, CacheEntry newEntry) { + int initialBucketIndex = initialBucketIndex(entries, newEntry.valueHash()); + + // initial scan to see if there's an empty slot or entry is already present + long lowestUsedMs = Long.MAX_VALUE; + int lruIndex = -1; + for ( int probe = 0, index = initialBucketIndex; probe < MAX_PROBES; ++probe, ++index ) { + if ( index >= entries.length ) index = 0; + + CacheEntry entry = entries[index]; + if ( entry == null ) { + entries[index] = newEntry; + return false; + } else if ( entry.matches(newEntry) ) { + entries[index] = newEntry; + return false; + } else { + long lastUsedMs = entry.lastUsedMs(); + if ( lastUsedMs < lowestUsedMs ) { + lowestUsedMs = lastUsedMs; + lruIndex = index; + } + } + } + + entries[lruIndex] = newEntry; + return true; + } + + static final int initialBucketIndex(CacheEntry[] entries, int valueHash) { + return valueHash & (entries.length - 1); + } + + static final int initialBucketIndex(boolean[] marks, int valueHash) { + return valueHash & (marks.length - 1); + } + + static final int lookupEntry( + CacheEntry[] entries, + int valueHash, String value, + long lookupTimeMs) + { + int initialBucketIndex = initialBucketIndex(entries, valueHash); + for ( int probe = 0, index = initialBucketIndex; probe < MAX_PROBES; ++probe, ++index ) { + if ( index >= entries.length ) index = 0; + + CacheEntry entry = entries[index]; + if ( entry != null && entry.matches(valueHash, value) ) { + return index; + } + } + return -1; + } + + static final int bucketHash(int tagHash, int valueHash) { + return tagHash + 31 * valueHash; + } + + static final class CacheEntry { + final int valueHash; + final String value; + final byte[] valueUtf8; + + boolean promoted = false; + long lastUsedMs = 0; + double hitCount = 0; + + public CacheEntry(int valueHash, String value) { + this.valueHash = valueHash; + this.value = value; + this.valueUtf8 = utf8(value); + } + + boolean matches(CacheEntry thatEntry ) { + return ( this == thatEntry ) || this.matches(thatEntry.valueHash, thatEntry.value); + } + + boolean matches(int valueHash, String value) { + return (this.valueHash == valueHash) && value.equals(this.value); + } + + int valueHash() { + return this.valueHash; + } + + double hits() { + return this.hitCount; + } + + long lastUsedMs() { + return this.lastUsedMs; + } + + byte[] utf8() { + return this.valueUtf8; + } + + double hit(long lastUsedMs) { + this.lastUsedMs = lastUsedMs; + this.hitCount += 1; + + return this.hitCount; + } + + boolean decay() { + this.hitCount *= HIT_DECAY; + + return (this.hitCount < PURGE_THRESHOLD); + } + + static final byte[] utf8(String value) { + return value.getBytes(StandardCharsets.UTF_8); + } + + @Override + public String toString() { + if ( this.value == null ) { + return "marker"; + } else { + return this.value + " - hits: " + this.hitCount + " used (ms): " + this.lastUsedMs; + } + } + } +} diff --git a/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/SimpleUtf8Cache.java b/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/SimpleUtf8Cache.java new file mode 100644 index 00000000000..3bdb5edffde --- /dev/null +++ b/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/SimpleUtf8Cache.java @@ -0,0 +1,227 @@ +package datadog.trace.common.writer.ddagent; + +import datadog.communication.serialization.EncodingCache; +import java.nio.charset.StandardCharsets; +import java.util.Arrays; + +/** + * A simple UTF8 cache - primarily intended for tag names + * + * Cache is designed to against resilient against single use tags + * + * NOTE: The aim of this cache is to reduce allocation overhead -- not CPU overhead. + * Using the cache has higher CPU overhead than simply calling {@link String#getBytes(java.nio.charset.Charset)}. + * + * The cache is thread safe. + */ +/* + * Thread safety is achieved through using CacheEntry objects where the key data + * fields are final. + * + * Updating of the cache and bookkeeping are deliberately allowed to be racy to + * minimize CPU overhead and lock contention. + * + * The first time a value is requested, the value isn't cached and no CacheEntry + * is created. Without this refinement, the cost for constructing + * CacheEntry for unique values would negate the benefit of the cache. + * + * These first requests are tracked via markers which indicate if there was + * previously an unsatisfied request to the same initial cache line. + * + * If there was a request, then CacheEntry is created and stored into entries. + * NOTE: The cache line marking process is imprecise and subject to request + * ordering issues, but given that low cardinality entries are more likely to repeat + * next, imperically this scheme works well. + * + * If a collision occurs in the cache, linear probing is used to check other slots. + * New cache entries fill any available slot within the probing window. + * + * If a subsequent request, finds a matching item in entries. The hit count + * of the CacheEntry is bumped. + * + * If there are no available slots in entries for a newly created CacheEntry, + * a LFU: least frequently used eviction policy is used to free up a slot. + */ +public final class SimpleUtf8Cache implements EncodingCache { + public static final SimpleUtf8Cache INSTANCE = new SimpleUtf8Cache(); + + private static final int MAX_PROBES = 8; + + private final int SIZE = 256; + + private final boolean[] markers = new boolean[SIZE]; + private final CacheEntry[] entries = new CacheEntry[SIZE]; + + private static final double HIT_DECAY = 0.8D; + private static final double PURGE_THRESHOLD = 0.25D; + + protected int hits = 0; + protected int evictions = 0; + + public void recalibrate() { + CacheEntry[] thisEntries = this.entries; + for ( int i = 0; i < thisEntries.length; ++i ) { + CacheEntry entry = thisEntries[i]; + if ( entry == null ) continue; + + boolean purge = entry.decay(); + if ( purge ) thisEntries[i] = null; + } + + Arrays.fill(this.markers, false); + } + + @Override + public byte[] encode(CharSequence charSeq) { + if ( charSeq instanceof String ) { + String str = (String)charSeq; + return this.getUtf8(str); + } else { + return null; + } + } + + /** + * Returns the UTF-8 encoding of value -- using a cache value if available + */ + public final byte[] getUtf8(String value) { + CacheEntry[] thisEntries = this.entries; + + int valueHash = value.hashCode(); + + CacheEntry matchingEntry = lookupEntry(thisEntries, valueHash, value); + if ( matchingEntry != null ) { + this.hits += 1; + return matchingEntry.utf8(); + } + + boolean wasMarked = reverseMark(this.markers, valueHash); + if ( !wasMarked ) return CacheEntry.utf8(value); + + CacheEntry newEntry = new CacheEntry(valueHash, value); + newEntry.hit(); + + boolean evicted = lfuInsert(thisEntries, newEntry); + if ( evicted ) this.evictions += 1; + + return newEntry.utf8(); + } + + static final CacheEntry lookupEntry( + CacheEntry[] entries, + int valueHash, String value) + { + int initialBucketIndex = initialBucketIndex(entries, valueHash); + for ( int probe = 0, index = initialBucketIndex; probe < MAX_PROBES; ++probe, ++index ) { + if ( index >= entries.length ) index = 0; + + CacheEntry entry = entries[index]; + if ( entry != null && entry.matches(valueHash, value) ) { + return entry; + } + } + return null; + } + + static final boolean lfuInsert(CacheEntry[] entries, CacheEntry newEntry) { + int initialBucketIndex = initialBucketIndex(entries, newEntry.valueHash()); + + // initial scan to see if there's an empty slot or marker entry is already present + double lowestHits = Double.MAX_VALUE; + int lfuIndex = -1; + for ( int probe = 0, index = initialBucketIndex; probe < MAX_PROBES; ++probe, ++index ) { + if ( index >= entries.length ) index = 0; + + CacheEntry entry = entries[index]; + if ( entry == null || entry.hits() == 0 ) { + entries[index] = newEntry; + return false; + } else { + double hits = entry.hits(); + if ( hits < lowestHits ) { + lowestHits = hits; + lfuIndex = index; + } + } + } + + // If we get here, then we're evicting the LRU + entries[lfuIndex] = newEntry; + return true; + } + + static final int initialBucketIndex(CacheEntry[] entries, int valueHash) { + return valueHash & (entries.length - 1); + } + + static final int initialBucketIndex(boolean[] marks, int valueHash) { + return valueHash & (marks.length - 1); + } + + static final boolean reverseMark(boolean[] marks, int newValueHash) { + int index = initialBucketIndex(marks, newValueHash); + boolean wasMarked = marks[index]; + marks[index] = !wasMarked; + return wasMarked; + } + + static final class CacheEntry { + final int valueHash; + final String value; + final byte[] valueUtf8; + + boolean promoted = false; + double hitCount = 0; + + public CacheEntry(int valueHash, String value) { + this.valueHash = valueHash; + this.value = value; + this.valueUtf8 = utf8(value); + } + + boolean matches(CacheEntry thatEntry ) { + return ( this == thatEntry ) || this.matches(thatEntry.valueHash, thatEntry.value); + } + + boolean matches(int valueHash, String value) { + return (this.valueHash == valueHash) && value.equals(this.value); + } + + int valueHash() { + return this.valueHash; + } + + double hits() { + return this.hitCount; + } + + byte[] utf8() { + return this.valueUtf8; + } + + double hit() { + this.hitCount += 1; + + return this.hitCount; + } + + boolean decay() { + this.hitCount *= HIT_DECAY; + + return (this.hitCount < PURGE_THRESHOLD); + } + + static final byte[] utf8(String value) { + return value.getBytes(StandardCharsets.UTF_8); + } + + @Override + public String toString() { + if ( this.value == null ) { + return "marker"; + } else { + return this.value + " - hits: " + this.hitCount; + } + } + } +} diff --git a/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/TraceMapperV0_4.java b/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/TraceMapperV0_4.java index a1d60164b82..44dfbb2e771 100644 --- a/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/TraceMapperV0_4.java +++ b/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/TraceMapperV0_4.java @@ -6,6 +6,7 @@ import datadog.communication.serialization.GrowableBuffer; import datadog.communication.serialization.Writable; import datadog.communication.serialization.msgpack.MsgPackWriter; +import datadog.trace.api.Config; import datadog.trace.api.ProcessTags; import datadog.trace.bootstrap.instrumentation.api.InstrumentationTags; import datadog.trace.bootstrap.instrumentation.api.UTF8BytesString; @@ -23,7 +24,12 @@ import okhttp3.RequestBody; public final class TraceMapperV0_4 implements TraceMapper { - + static final SimpleUtf8Cache TAG_CACHE = + Config.get().isUtf8CacheEnabled() ? new SimpleUtf8Cache() : null; + + static final GenerationalUtf8Cache VALUE_CACHE = + Config.get().isUtf8CacheEnabled() ? new GenerationalUtf8Cache() : null; + private final int size; public TraceMapperV0_4(int size) { @@ -57,6 +63,9 @@ MetaWriter forLastSpanInChunk(final boolean lastSpanInChunk) { @Override public void accept(Metadata metadata) { + TAG_CACHE.recalibrate(); + VALUE_CACHE.recalibrate(); + final boolean writeSamplingPriority = firstSpanInChunk || lastSpanInChunk; final UTF8BytesString processTags = firstSpanInChunk ? ProcessTags.getTagsForSerialization() : null; @@ -111,8 +120,8 @@ public void accept(Metadata metadata) { writable.writeLong(metadata.getThreadId()); for (Map.Entry entry : metadata.getTags().entrySet()) { if (entry.getValue() instanceof Number) { - writable.writeString(entry.getKey(), null); - writable.writeObject(entry.getValue(), null); + writable.writeString(entry.getKey(), TAG_CACHE); + writable.writeObject(entry.getValue(), VALUE_CACHE); } } @@ -122,8 +131,8 @@ public void accept(Metadata metadata) { // since they will be accumulated into maps in the same order downstream, // we just need to be sure that the size is the same as the number of elements for (Map.Entry entry : metadata.getBaggage().entrySet()) { - writable.writeString(entry.getKey(), null); - writable.writeString(entry.getValue(), null); + writable.writeString(entry.getKey(), TAG_CACHE); + writable.writeString(entry.getValue(), VALUE_CACHE); } writable.writeUTF8(THREAD_NAME); writable.writeUTF8(metadata.getThreadName()); @@ -133,7 +142,7 @@ public void accept(Metadata metadata) { } if (null != metadata.getOrigin()) { writable.writeUTF8(ORIGIN_KEY); - writable.writeString(metadata.getOrigin(), null); + writable.writeString(metadata.getOrigin(), VALUE_CACHE); } if (processTags != null) { writable.writeUTF8(PROCESS_TAGS_KEY); @@ -146,8 +155,8 @@ public void accept(Metadata metadata) { // Write map as flat map writeFlatMap(key, (Map) value); } else if (!(value instanceof Number)) { - writable.writeString(entry.getKey(), null); - writable.writeObjectString(entry.getValue(), null); + writable.writeString(entry.getKey(), TAG_CACHE); + writable.writeObjectString(entry.getValue(), VALUE_CACHE); } } } @@ -189,8 +198,8 @@ private void writeFlatMap(String key, Map mapValue) { if (newValue instanceof Map) { writeFlatMap(newKey, (Map) newValue); } else { - writable.writeString(newKey, null); - writable.writeObjectString(newValue, null); + writable.writeString(newKey, TAG_CACHE); + writable.writeObjectString(newValue, VALUE_CACHE); } } } @@ -236,7 +245,7 @@ private void writeMetaStructEntry( try { writer.writeObject(value, null); writer.flush(); - writable.writeString(key, null); + writable.writeString(key, TAG_CACHE); writable.writeBinary(buffer.slice()); } finally { buffer.reset(); @@ -256,13 +265,13 @@ public void map(List> trace, final Writable writable) { writable.startMap(metaStruct.isEmpty() ? 12 : 13); /* 1 */ writable.writeUTF8(SERVICE); - writable.writeString(span.getServiceName(), null); + writable.writeString(span.getServiceName(), VALUE_CACHE); /* 2 */ writable.writeUTF8(NAME); - writable.writeObject(span.getOperationName(), null); + writable.writeObject(span.getOperationName(), VALUE_CACHE); /* 3 */ writable.writeUTF8(RESOURCE); - writable.writeObject(span.getResourceName(), null); + writable.writeObject(span.getResourceName(), VALUE_CACHE); /* 4 */ writable.writeUTF8(TRACE_ID); writable.writeUnsignedLong(span.getTraceId().toLong()); @@ -280,7 +289,7 @@ public void map(List> trace, final Writable writable) { writable.writeLong(PendingTrace.getDurationNano(span)); /* 9 */ writable.writeUTF8(TYPE); - writable.writeString(span.getType(), null); + writable.writeString(span.getType(), VALUE_CACHE); /* 10 */ writable.writeUTF8(ERROR); writable.writeInt(span.getError()); diff --git a/dd-trace-core/src/test/java/datadog/trace/common/writer/ddagent/GenerationalUtf8CacheTest.java b/dd-trace-core/src/test/java/datadog/trace/common/writer/ddagent/GenerationalUtf8CacheTest.java new file mode 100644 index 00000000000..7cec6a6f2c3 --- /dev/null +++ b/dd-trace-core/src/test/java/datadog/trace/common/writer/ddagent/GenerationalUtf8CacheTest.java @@ -0,0 +1,168 @@ +package datadog.trace.common.writer.ddagent; + +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertNotSame; +import static org.junit.Assert.assertSame; +import static org.junit.jupiter.api.Assertions.assertNotEquals; + +import java.nio.charset.StandardCharsets; +import java.util.Random; +import java.util.concurrent.ThreadLocalRandom; + +import org.junit.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; + +public class GenerationalUtf8CacheTest { + @ParameterizedTest + @ValueSource(strings={"foo", "bar", "baz", "quux"}) + public void getUtf8(String value) { + GenerationalUtf8Cache cache = new GenerationalUtf8Cache(); + + for ( int i = 0; i < 10; ++i ) { + byte[] valueUtf8 = cache.getUtf8(value); + assertArrayEquals(value.getBytes(StandardCharsets.UTF_8), valueUtf8); + } + } + + @Test + public void caching() { + GenerationalUtf8Cache cache = new GenerationalUtf8Cache(); + + String value = "bar"; + byte[] expected = value.getBytes(StandardCharsets.UTF_8); + + byte[] first = cache.getUtf8(value); + assertArrayEquals(expected, first); + + // first request isn't cached - to avoid burning slots + byte[] second = cache.getUtf8(value); + assertArrayEquals(expected, second); + assertNotSame(first, second); + + // after first request, the entry should be cached + byte[] third = cache.getUtf8(value); + assertArrayEquals(expected, third); + assertSame(second, third); + + assertNotEquals(0, cache.edenHits); + } + + @Test + public void promotion() { + GenerationalUtf8Cache cache = new GenerationalUtf8Cache(); + + String value = "bar"; + byte[] expected = value.getBytes(StandardCharsets.UTF_8); + + byte[] first = cache.getUtf8(value); + assertArrayEquals(expected, first); + + byte[] second = cache.getUtf8(value); + assertArrayEquals(expected, second); + assertNotSame(second, first); + + while ( cache.promotions == 0 ) { + byte[] cached = cache.getUtf8(value); + assertArrayEquals(expected, cached); + assertSame(cached, second); + } + + assertNotEquals(0, cache.edenHits); + + for ( int i = 0; i < 10; ++i ) { + byte[] cached = cache.getUtf8(value); + + assertArrayEquals(expected, cached); + assertSame(cached, second); + } + + assertNotEquals(0, cache.promotedHits); + } + + @Test + public void fuzz() { + Random random = ThreadLocalRandom.current(); + + int edenHits = 0; + int promotedHits = 0; + + GenerationalUtf8Cache cache = new GenerationalUtf8Cache(); + for ( int i = 0; i < 1_000; ++i ) { + cache.recalibrate(); + + int cycles = 500 + random.nextInt(2_000); + for ( int j = 0; j < cycles; ++j ) { + String nextTag = nextTag(); + String nextValue = nextValue(); + byte[] nextExpected = nextValue.getBytes(StandardCharsets.UTF_8); + + byte[] nextValueUtf8 = cache.getUtf8(nextValue); + assertArrayEquals(nextExpected, nextValueUtf8); + } + + edenHits += cache.edenHits; + promotedHits += cache.promotedHits; + + printStats(cache); + } + + assertNotEquals(0, edenHits); + assertNotEquals(0, promotedHits); + } + + static final String[] TAGS = { + "foo", + "bar", + "baz" + }; + + static final String[] BASE_STRINGS = { + "Hello", + "world", + "foo", + "bar", + "baz", + "quux" + }; + + static final String nextTag() { + ThreadLocalRandom random = ThreadLocalRandom.current(); + + int tagIndex = random.nextInt(TAGS.length + 1); + if ( tagIndex >= TAGS.length ) { + return "tag-" + Integer.toString(random.nextInt()); + } else { + return TAGS[tagIndex]; + } + } + + static final String nextValue() { + ThreadLocalRandom random = ThreadLocalRandom.current(); + + if ( random.nextDouble() < 0.1 ) { + return Integer.toString(random.nextInt()); + } + + int baseIndex = random.nextInt(BASE_STRINGS.length); + String baseString = BASE_STRINGS[baseIndex]; + + if ( random.nextDouble() < 0.2 ) { + baseString = baseString.toLowerCase(); + } + + int valueSuffix = random.nextInt(2 * baseIndex + 1); + return baseString + valueSuffix; + } + + static final void printStats(GenerationalUtf8Cache cache) { + System.out.printf( + "eden hits: %5d\tpromotion hits: %5d\tpromotions: %5d\tearly: %5d\tlocal evictions: %5d\tglobal evictions: %5d%n", + cache.edenHits, + cache.promotedHits, + cache.promotions, + cache.earlyPromotions, + cache.edenEvictions, + cache.promotedEvictions); + } +} diff --git a/dd-trace-core/src/test/java/datadog/trace/common/writer/ddagent/SimpleUtf8CacheTest.java b/dd-trace-core/src/test/java/datadog/trace/common/writer/ddagent/SimpleUtf8CacheTest.java new file mode 100644 index 00000000000..08bc9d4f517 --- /dev/null +++ b/dd-trace-core/src/test/java/datadog/trace/common/writer/ddagent/SimpleUtf8CacheTest.java @@ -0,0 +1,129 @@ +package datadog.trace.common.writer.ddagent; + +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertNotSame; +import static org.junit.Assert.assertSame; +import static org.junit.jupiter.api.Assertions.assertNotEquals; + +import java.nio.charset.StandardCharsets; +import java.util.Random; +import java.util.concurrent.ThreadLocalRandom; + +import org.junit.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; + +public class SimpleUtf8CacheTest { + @ParameterizedTest + @ValueSource(strings={"foo", "bar", "baz", "quux"}) + public void getUtf8(String value) { + SimpleUtf8Cache cache = new SimpleUtf8Cache(); + + for ( int i = 0; i < 10; ++i ) { + byte[] valueUtf8 = cache.getUtf8(value); + assertArrayEquals(value.getBytes(StandardCharsets.UTF_8), valueUtf8); + } + } + + @Test + public void caching() { + SimpleUtf8Cache cache = new SimpleUtf8Cache(); + + String value = "bar"; + byte[] expected = value.getBytes(StandardCharsets.UTF_8); + + byte[] first = cache.getUtf8(value); + assertArrayEquals(expected, first); + + // first request isn't cached - to avoid burning slots + byte[] second = cache.getUtf8(value); + assertArrayEquals(expected, second); + assertNotSame(first, second); + + // after first request, the entry should be cached + byte[] third = cache.getUtf8(value); + assertArrayEquals(expected, third); + assertSame(second, third); + + assertNotEquals(0, cache.hits); + } + + @Test + public void fuzz() { + Random random = ThreadLocalRandom.current(); + + int hits = 0; + + SimpleUtf8Cache cache = new SimpleUtf8Cache(); + for ( int i = 0; i < 1_000; ++i ) { + cache.recalibrate(); + + int cycles = 500 + random.nextInt(2_000); + for ( int j = 0; j < cycles; ++j ) { + String nextTag = nextTag(); + String nextValue = nextValue(); + byte[] nextExpected = nextValue.getBytes(StandardCharsets.UTF_8); + + byte[] nextValueUtf8 = cache.getUtf8(nextValue); + assertArrayEquals(nextExpected, nextValueUtf8); + } + + hits += cache.hits; + + printStats(cache); + } + + assertNotEquals(0, hits); + } + + static final String[] TAGS = { + "foo", + "bar", + "baz" + }; + + static final String[] BASE_STRINGS = { + "Hello", + "world", + "foo", + "bar", + "baz", + "quux" + }; + + static final String nextTag() { + ThreadLocalRandom random = ThreadLocalRandom.current(); + + int tagIndex = random.nextInt(TAGS.length + 1); + if ( tagIndex >= TAGS.length ) { + return "tag-" + Integer.toString(random.nextInt()); + } else { + return TAGS[tagIndex]; + } + } + + static final String nextValue() { + ThreadLocalRandom random = ThreadLocalRandom.current(); + + if ( random.nextDouble() < 0.1 ) { + return Integer.toString(random.nextInt()); + } + + int baseIndex = random.nextInt(BASE_STRINGS.length); + String baseString = BASE_STRINGS[baseIndex]; + + if ( random.nextDouble() < 0.2 ) { + baseString = baseString.toLowerCase(); + } + + int valueSuffix = random.nextInt(2 * baseIndex + 1); + return baseString + valueSuffix; + } + + static final void printStats(SimpleUtf8Cache cache) { + System.out.printf( + "eden hits: %5d\tpromotion hits: %5d\tpromotions: %5d\tearly: %5d\tlocal evictions: %5d\tglobal evictions: %5d%n", + cache.hits, + cache.evictions); + } +} diff --git a/internal-api/src/main/java/datadog/trace/api/Config.java b/internal-api/src/main/java/datadog/trace/api/Config.java index aae53ec7e07..89c7f243d4c 100644 --- a/internal-api/src/main/java/datadog/trace/api/Config.java +++ b/internal-api/src/main/java/datadog/trace/api/Config.java @@ -1222,6 +1222,7 @@ public static String getHostName() { private final boolean jdkSocketEnabled; private final boolean optimizedMapEnabled; + private final boolean utf8CacheEnabled; private final int stackTraceLengthLimit; private final RumInjectorConfig rumInjectorConfig; @@ -2734,6 +2735,8 @@ PROFILING_DATADOG_PROFILER_ENABLED, isDatadogProfilerSafeInCurrentEnvironment()) this.optimizedMapEnabled = configProvider.getBoolean(GeneralConfig.OPTIMIZED_MAP_ENABLED, false); + this.utf8CacheEnabled = + configProvider.getBoolean(GeneralConfig.UTF8_CACHE_ENABLED, true); int defaultStackTraceLengthLimit = instrumenterConfig.isCiVisibilityEnabled() @@ -4419,6 +4422,10 @@ public boolean isJdkSocketEnabled() { public boolean isOptimizedMapEnabled() { return optimizedMapEnabled; } + + public boolean isUtf8CacheEnabled() { + return utf8CacheEnabled; + } public int getStackTraceLengthLimit() { return stackTraceLengthLimit; From 68fdcb953bd11fe740995b4120e26b25d6ca965c Mon Sep 17 00:00:00 2001 From: Douglas Q Hawkins Date: Thu, 28 Aug 2025 13:09:42 -0400 Subject: [PATCH 02/23] spotless --- .../common/writer/ddagent/Utf8Benchmark.java | 141 ++--- .../writer/ddagent/GenerationalUtf8Cache.java | 588 +++++++++--------- .../writer/ddagent/SimpleUtf8Cache.java | 262 ++++---- .../writer/ddagent/TraceMapperV0_4.java | 12 +- .../ddagent/GenerationalUtf8CacheTest.java | 228 ++++--- .../writer/ddagent/SimpleUtf8CacheTest.java | 163 +++-- .../main/java/datadog/trace/api/Config.java | 7 +- 7 files changed, 677 insertions(+), 724 deletions(-) diff --git a/dd-trace-core/src/jmh/java/datadog/trace/common/writer/ddagent/Utf8Benchmark.java b/dd-trace-core/src/jmh/java/datadog/trace/common/writer/ddagent/Utf8Benchmark.java index 4a7d87ab4ce..3d9e0085c8c 100644 --- a/dd-trace-core/src/jmh/java/datadog/trace/common/writer/ddagent/Utf8Benchmark.java +++ b/dd-trace-core/src/jmh/java/datadog/trace/common/writer/ddagent/Utf8Benchmark.java @@ -1,36 +1,26 @@ package datadog.trace.common.writer.ddagent; +import datadog.utf8.GeneratedTagUtf8Cache; +import datadog.utf8.GenerationalUtf8Cache; +import datadog.utf8.SimpleUtf8Cache; import java.nio.charset.StandardCharsets; import java.util.concurrent.ThreadLocalRandom; - import org.openjdk.jmh.annotations.Benchmark; import org.openjdk.jmh.annotations.BenchmarkMode; -import org.openjdk.jmh.annotations.Fork; -import org.openjdk.jmh.annotations.Measurement; import org.openjdk.jmh.annotations.Mode; -import org.openjdk.jmh.annotations.Warmup; import org.openjdk.jmh.infra.Blackhole; -import datadog.trace.api.DDTags; -import datadog.utf8.GeneratedTagUtf8Cache; -import datadog.utf8.SimpleUtf8Cache; -import datadog.utf8.Tags; - -import datadog.utf8.GenerationalUtf8Cache; -import datadog.utf8.ValueUtf8CacheBackup; - /** - * This benchmark isn't really intended to used to measure throughput, - * but rather to be used with "-prof gc" to check bytes / op. - * - * Since {@link String#getBytes(java.nio.charset.Charset)} is intrinsified - * the caches typically perform worse throughput wise, the benefit of the - * caches is to reduce allocation. + * This benchmark isn't really intended to used to measure throughput, but rather to be used with + * "-prof gc" to check bytes / op. + * + *

Since {@link String#getBytes(java.nio.charset.Charset)} is intrinsified the caches typically + * perform worse throughput wise, the benefit of the caches is to reduce allocation. */ @BenchmarkMode(Mode.Throughput) public class Utf8Benchmark { static final int NUM_LOOKUPS = 10_000; - + static final String[] TAGS = { "_dd.asm.keep", "ci.provider", @@ -41,120 +31,121 @@ public class Utf8Benchmark { "db.pool", "http.forwarder", "db.warehouse", - "custom" + "custom" }; - + static int pos = 0; static int standardVal = 0; - + static final String nextTag() { - if ( pos == TAGS.length - 1 ) { + if (pos == TAGS.length - 1) { pos = 0; } else { pos += 1; } return TAGS[pos]; } - + static final String nextValue(String tag) { - if ( tag == "custom" ) { - return nextCustomValue(); - } else { - return nextStandardValue(tag); - } + if (tag == "custom") { + return nextCustomValue(); + } else { + return nextStandardValue(tag); + } } - + static final String nextCustomValue() { - return "custom" + ThreadLocalRandom.current().nextInt(); + return "custom" + ThreadLocalRandom.current().nextInt(); } - + static final String nextStandardValue(String tag) { - return tag + ThreadLocalRandom.current().nextInt(20); + return tag + ThreadLocalRandom.current().nextInt(20); } - + @Benchmark public static final String tagUtf8_baseline() { - return nextTag(); + return nextTag(); } - + @Benchmark public static final byte[] tagUtf8_nocache() { - String tag = nextTag(); - return tag.getBytes(StandardCharsets.UTF_8); + String tag = nextTag(); + return tag.getBytes(StandardCharsets.UTF_8); } - + @Benchmark public static final byte[] tagUtf8_w_generatedCache() { - String tag = nextTag(); - - byte[] cache = GeneratedTagUtf8Cache.lookup(tag); - if ( cache != null ) return cache; - - return tag.getBytes(StandardCharsets.UTF_8); + String tag = nextTag(); + + byte[] cache = GeneratedTagUtf8Cache.lookup(tag); + if (cache != null) return cache; + + return tag.getBytes(StandardCharsets.UTF_8); } - + static final SimpleUtf8Cache TAG_CACHE = new SimpleUtf8Cache(); - + @Benchmark public static final byte[] tagUtf8_w_cache() { - String tag = nextTag(); - - byte[] cache = TAG_CACHE.getUtf8(tag); - if ( cache != null ) return cache; - - return tag.getBytes(StandardCharsets.UTF_8); + String tag = nextTag(); + + byte[] cache = TAG_CACHE.getUtf8(tag); + if (cache != null) return cache; + + return tag.getBytes(StandardCharsets.UTF_8); } @Benchmark public static final void valueUtf8_baseline(Blackhole bh) { - for ( int i = 0; i < NUM_LOOKUPS; ++i ) { + for (int i = 0; i < NUM_LOOKUPS; ++i) { String tag = nextTag(); String value = nextValue(tag); - + bh.consume(tag); bh.consume(value); - } + } } - + static final GenerationalUtf8Cache VALUE_CACHE = new GenerationalUtf8Cache(); - + @Benchmark public static final void valueUtf8_cache_generational(Blackhole bh) { - GenerationalUtf8Cache valueCache = VALUE_CACHE; - valueCache.recalibrate(); - - for ( int i = 0; i < NUM_LOOKUPS; ++i ) { + GenerationalUtf8Cache valueCache = VALUE_CACHE; + valueCache.recalibrate(); + + for (int i = 0; i < NUM_LOOKUPS; ++i) { String tag = nextTag(); String value = nextValue(tag); - + byte[] lookup = valueCache.getUtf8(value); bh.consume(lookup); - } + } } - + static final SimpleUtf8Cache SIMPLE_VALUE_CACHE = new SimpleUtf8Cache(); + @Benchmark public static final void valueUtf8_cache_simple(Blackhole bh) { - SimpleUtf8Cache valueCache = SIMPLE_VALUE_CACHE; - valueCache.recalibrate(); - - for ( int i = 0; i < NUM_LOOKUPS; ++i ) { + SimpleUtf8Cache valueCache = SIMPLE_VALUE_CACHE; + valueCache.recalibrate(); + + for (int i = 0; i < NUM_LOOKUPS; ++i) { String tag = nextTag(); String value = nextValue(tag); - + byte[] lookup = valueCache.getUtf8(value); bh.consume(lookup); - } + } } - + @Benchmark public static final void valueUtf8_nocache(Blackhole bh) { - for ( int i = 0; i < NUM_LOOKUPS; ++i ) { + for (int i = 0; i < NUM_LOOKUPS; ++i) { String tag = nextTag(); String value = nextValue(tag); - + bh.consume(tag); bh.consume(value.getBytes(StandardCharsets.UTF_8)); - } + } } } diff --git a/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/GenerationalUtf8Cache.java b/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/GenerationalUtf8Cache.java index 3f1dcd96469..62e5e34b701 100644 --- a/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/GenerationalUtf8Cache.java +++ b/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/GenerationalUtf8Cache.java @@ -4,343 +4,338 @@ import java.nio.charset.StandardCharsets; import java.util.Arrays; -/** +/** * 2-level generational cache of UTF8 values - primarily intended to be used for tag values - * - * Cache is designed to take advantage of low cardinality tags to avoid - * repeated UTF8 encodings while also minimizing cache overhead and - * churn from high cardinality tags. - * - * NOTE: The aim of this cache is to reduce allocation overhead -- not CPU overhead. - * Using the cache has higher CPU overhead than simply calling {@link String#getBytes(java.nio.charset.Charset)}. - * - * The cache is thread safe. + * + *

Cache is designed to take advantage of low cardinality tags to avoid repeated UTF8 encodings + * while also minimizing cache overhead and churn from high cardinality tags. + * + *

NOTE: The aim of this cache is to reduce allocation overhead -- not CPU overhead. Using the + * cache has higher CPU overhead than simply calling {@link + * String#getBytes(java.nio.charset.Charset)}. + * + *

The cache is thread safe. */ -/* +/* * Cache works by using a 2-level promotion based scheme. - * - * Thread safety is achieved through using CacheEntry objects where the key data + * + * Thread safety is achieved through using CacheEntry objects where the key data * fields are final. - * - * Updating of the cache and bookkeeping are deliberately allowed to be racy to + * + * Updating of the cache and bookkeeping are deliberately allowed to be racy to * minimize CPU overhead and lock contention. - * - * The first time a value is requested, the value isn't cached and no CacheEntry - * is created. Without this refinement, the cost for constructing + * + * The first time a value is requested, the value isn't cached and no CacheEntry + * is created. Without this refinement, the cost for constructing * CacheEntry for unique values would negate the benefit of the cache. - * - * These first requests are tracked via edenMarkers which indicate if there was + * + * These first requests are tracked via edenMarkers which indicate if there was * previously an unsatisfied request to the same initial cache line. - * + * * If there was a request, then CacheEntry is created and stored into edenEntries. - * NOTE: The eden line marking process is imprecise and subject to request + * NOTE: The eden line marking process is imprecise and subject to request * ordering issues, but given that low cardinality entries are more likely to repeat * next. - * + * * If a collision occurs in the cache, linear probing is used to check other slots. * New cache entries fill any available slot within the probing window. - * - * If a subsequent request, finds a matching item in edenEntries. The hit count - * of the CacheEntry is bumped. If the CacheEntry exceeds the current promotion - * threshold, then the CacheEntry is inserted into promotionEntries -- freeing up - * a slot in edenEntries. If there isn't an available slot in promotionEntries, + * + * If a subsequent request, finds a matching item in edenEntries. The hit count + * of the CacheEntry is bumped. If the CacheEntry exceeds the current promotion + * threshold, then the CacheEntry is inserted into promotionEntries -- freeing up + * a slot in edenEntries. If there isn't an available slot in promotionEntries, * the LRU: least recently used promotionEntry is evicted. - * + * * If there are no available slots in edenEntries for a newly created CacheEntry... - * - * Attempt to early promote the MFU: most frequently used CacheEntry from + * + * Attempt to early promote the MFU: most frequently used CacheEntry from * edenEntries to promotedEntries (without eviction). - * - * If there's no space in promotedEntries to promote the MFU, then evict the + * + * If there's no space in promotedEntries to promote the MFU, then evict the * LFU: least frequently used entry from edenEntries instead. - * - * + * + * * LRU based eviction of the promotedEntries works on tagging with the last hit time. - * The access time can be provided directly to ValueUtf8Cache#getUtf8 or can + * The access time can be provided directly to ValueUtf8Cache#getUtf8 or can * be refreshed periodically by calling ValueUtf8Cache#updateAccessTime. - * - * If there's a natural transaction boundary around the UTF8 cache, - * calling ValueUtf8Cache#recalibrateThresholds will adjust promotion + * + * If there's a natural transaction boundary around the UTF8 cache, + * calling ValueUtf8Cache#recalibrateThresholds will adjust promotion * thresholds to provide better cache utilization. */ public final class GenerationalUtf8Cache implements EncodingCache { private static final int MAX_PROBES = 8; - + private static final int MIN_PROMOTION_TRESHOLD = 2; private static final int INITIAL_PROMOTION_THRESHOLD = 10; - + private static final double HIT_DECAY = 0.8D; private static final double PURGE_THRESHOLD = 0.25D; - + private final CacheEntry[] edenEntries; private final boolean[] edenMarkers; - + private final CacheEntry[] promotedEntries; - + private long accessTimeMs; private double promotionThreshold = INITIAL_PROMOTION_THRESHOLD; - + int edenHits = 0; int promotedHits = 0; int earlyPromotions = 0; int promotions = 0; int edenEvictions = 0; int promotedEvictions = 0; - + public GenerationalUtf8Cache() { - this.accessTimeMs = System.currentTimeMillis(); - - // These sizes must be powers of 2 - this.edenEntries = new CacheEntry[256]; - this.edenMarkers = new boolean[256]; - - // The size must be a power of 2 - this.promotedEntries = new CacheEntry[512]; + this.accessTimeMs = System.currentTimeMillis(); + + // These sizes must be powers of 2 + this.edenEntries = new CacheEntry[256]; + this.edenMarkers = new boolean[256]; + + // The size must be a power of 2 + this.promotedEntries = new CacheEntry[512]; } - - /** - * Updates access time used @link {@link #getUtf8(String, String)} to the provided value - */ + + /** Updates access time used @link {@link #getUtf8(String, String)} to the provided value */ public void updateAccessTime(long accessTimeMs) { - this.accessTimeMs = accessTimeMs; + this.accessTimeMs = accessTimeMs; } - - /** - * Updates access time to the @link {@link System#currentTimeMillis()} - */ + + /** Updates access time to the @link {@link System#currentTimeMillis()} */ public void refreshAcessTime() { - this.updateAccessTime(System.currentTimeMillis()); + this.updateAccessTime(System.currentTimeMillis()); } - + public void recalibrate() { - this.recalibrate(System.currentTimeMillis()); + this.recalibrate(System.currentTimeMillis()); } - + /** - * Recalibrates promotion threshold based on promotion & eviction statistics, - * since last calibration - resets statistics + * Recalibrates promotion threshold based on promotion & eviction statistics, since last + * calibration - resets statistics + * * @param accessTimeMs */ public void recalibrate(long accessTimeMs) { - this.accessTimeMs = accessTimeMs; - - CacheEntry[] thisEntries = this.edenEntries; - for ( int i = 0; i < thisEntries.length; ++i ) { - CacheEntry entry = thisEntries[i]; - if ( entry == null ) continue; - - boolean purge = entry.decay(); - if ( purge ) this.edenEntries[i] = null; - } - - Arrays.fill(this.edenMarkers, false); - - int totalPromotions = this.promotions + this.earlyPromotions; - if ( totalPromotions == 0 && this.promotionThreshold >= MIN_PROMOTION_TRESHOLD ) { - this.promotionThreshold /= 1.5; - } else if ( totalPromotions > this.promotedEvictions / 2 ) { - this.promotionThreshold *= 1.5; - } - - this.edenHits = 0; - this.promotedHits = 0; - this.earlyPromotions = 0; - this.promotions = 0; - this.edenEvictions = 0; - this.promotedEvictions = 0; + this.accessTimeMs = accessTimeMs; + + CacheEntry[] thisEntries = this.edenEntries; + for (int i = 0; i < thisEntries.length; ++i) { + CacheEntry entry = thisEntries[i]; + if (entry == null) continue; + + boolean purge = entry.decay(); + if (purge) this.edenEntries[i] = null; + } + + Arrays.fill(this.edenMarkers, false); + + int totalPromotions = this.promotions + this.earlyPromotions; + if (totalPromotions == 0 && this.promotionThreshold >= MIN_PROMOTION_TRESHOLD) { + this.promotionThreshold /= 1.5; + } else if (totalPromotions > this.promotedEvictions / 2) { + this.promotionThreshold *= 1.5; + } + + this.edenHits = 0; + this.promotedHits = 0; + this.earlyPromotions = 0; + this.promotions = 0; + this.edenEvictions = 0; + this.promotedEvictions = 0; } - + @Override public byte[] encode(CharSequence charSeq) { - if ( charSeq instanceof String ) { - String str = (String)charSeq; - return this.getUtf8(str); - } else { - return null; - } + if (charSeq instanceof String) { + String str = (String) charSeq; + return this.getUtf8(str); + } else { + return null; + } } - - /** - * Returns the UTF-8 encoding of value -- using a cache value if available - */ + + /** Returns the UTF-8 encoding of value -- using a cache value if available */ public final byte[] getUtf8(String value) { return this.getUtf8(value, this.accessTimeMs); } - + /** - * Returns the UTF-8 encoding of value -- using a cache value if available - * If there is cache hit, the specified accessTimeMs is used to update the cache entry + * Returns the UTF-8 encoding of value -- using a cache value if available If there is cache hit, + * the specified accessTimeMs is used to update the cache entry */ public final byte[] getUtf8(String value, long accessTimeMs) { - int valueHash = value.hashCode(); - - CacheEntry[] localEntries = this.edenEntries; - long lookupTimeMs = this.accessTimeMs; - - int matchingLocalIndex = lookupEntry(localEntries, valueHash, value, lookupTimeMs); - if ( matchingLocalIndex != -1 ) { - CacheEntry localEntry = localEntries[matchingLocalIndex]; - - double hits = localEntry.hit(lookupTimeMs); - if ( hits > this.promotionThreshold ) { - // mark promoted first - to avoid racy insertions - this.promotions += 1; - - boolean evicted = lruInsert(this.promotedEntries, localEntry); - if ( evicted ) this.promotedEvictions += 1; - - localEntries[matchingLocalIndex] = null; - } - - this.edenHits += 1; - return localEntry.utf8(); - } - - CacheEntry[] promotedEntries = this.promotedEntries; - int matchingPromotedIndex = lookupEntry(promotedEntries, valueHash, value, lookupTimeMs); - if ( matchingPromotedIndex != -1 ) { - CacheEntry promotedEntry = promotedEntries[matchingPromotedIndex]; - - promotedEntry.hit(lookupTimeMs); - - this.promotedHits += 1; - return promotedEntry.utf8(); - } - - boolean wasMarked = reverseMark(this.edenMarkers, valueHash); - - // If slot isn't marked, this is likely the first request - // Don't create an entry yet - if ( !wasMarked ) return CacheEntry.utf8(value); - - CacheEntry newEntry = new CacheEntry(valueHash, value); - // First request was swallowed by marking, so double hit - newEntry.hit(lookupTimeMs); - newEntry.hit(lookupTimeMs); - - // search for empty slot or failing that the MFU entry - int localMfuIndex = findFirstAvailableOrMfuIndex(localEntries, valueHash); - CacheEntry localMfuEntry = localEntries[localMfuIndex]; - - // Found an empty slot - fill it - if ( localMfuEntry == null ) { - localEntries[localMfuIndex] = newEntry; - return newEntry.utf8(); - } - - // See if we can early promote the local MFU entry into the global cache - // Early promotion doesn't evict from the global cache - int globalAvailableIndex = findAvailable(promotedEntries, localMfuEntry.valueHash()); - if ( globalAvailableIndex != -1 ) { - promotedEntries[globalAvailableIndex] = localMfuEntry; - this.earlyPromotions += 1; - - localEntries[localMfuIndex] = newEntry; - return CacheEntry.utf8(value); - } - - // No empty slot - or space to promote into the global cache - // Insert into local cache while evicting the LFU - boolean evicted = lfuInsert(localEntries, newEntry); - if ( evicted ) this.promotedEvictions += 1; - - return newEntry.utf8(); + int valueHash = value.hashCode(); + + CacheEntry[] localEntries = this.edenEntries; + long lookupTimeMs = this.accessTimeMs; + + int matchingLocalIndex = lookupEntry(localEntries, valueHash, value, lookupTimeMs); + if (matchingLocalIndex != -1) { + CacheEntry localEntry = localEntries[matchingLocalIndex]; + + double hits = localEntry.hit(lookupTimeMs); + if (hits > this.promotionThreshold) { + // mark promoted first - to avoid racy insertions + this.promotions += 1; + + boolean evicted = lruInsert(this.promotedEntries, localEntry); + if (evicted) this.promotedEvictions += 1; + + localEntries[matchingLocalIndex] = null; + } + + this.edenHits += 1; + return localEntry.utf8(); + } + + CacheEntry[] promotedEntries = this.promotedEntries; + int matchingPromotedIndex = lookupEntry(promotedEntries, valueHash, value, lookupTimeMs); + if (matchingPromotedIndex != -1) { + CacheEntry promotedEntry = promotedEntries[matchingPromotedIndex]; + + promotedEntry.hit(lookupTimeMs); + + this.promotedHits += 1; + return promotedEntry.utf8(); + } + + boolean wasMarked = reverseMark(this.edenMarkers, valueHash); + + // If slot isn't marked, this is likely the first request + // Don't create an entry yet + if (!wasMarked) return CacheEntry.utf8(value); + + CacheEntry newEntry = new CacheEntry(valueHash, value); + // First request was swallowed by marking, so double hit + newEntry.hit(lookupTimeMs); + newEntry.hit(lookupTimeMs); + + // search for empty slot or failing that the MFU entry + int localMfuIndex = findFirstAvailableOrMfuIndex(localEntries, valueHash); + CacheEntry localMfuEntry = localEntries[localMfuIndex]; + + // Found an empty slot - fill it + if (localMfuEntry == null) { + localEntries[localMfuIndex] = newEntry; + return newEntry.utf8(); + } + + // See if we can early promote the local MFU entry into the global cache + // Early promotion doesn't evict from the global cache + int globalAvailableIndex = findAvailable(promotedEntries, localMfuEntry.valueHash()); + if (globalAvailableIndex != -1) { + promotedEntries[globalAvailableIndex] = localMfuEntry; + this.earlyPromotions += 1; + + localEntries[localMfuIndex] = newEntry; + return CacheEntry.utf8(value); + } + + // No empty slot - or space to promote into the global cache + // Insert into local cache while evicting the LFU + boolean evicted = lfuInsert(localEntries, newEntry); + if (evicted) this.promotedEvictions += 1; + + return newEntry.utf8(); } - + static final int findAvailable(CacheEntry[] entries, int newValueHash) { - int initialBucketIndex = initialBucketIndex(entries, newValueHash); - for ( int probe = 0, index = initialBucketIndex; probe < MAX_PROBES; ++probe, ++index ) { - if ( index >= entries.length ) index = 0; - - CacheEntry entry = entries[index]; - if ( entry == null || entry.hits() == 0 ) return index; - } - return -1; + int initialBucketIndex = initialBucketIndex(entries, newValueHash); + for (int probe = 0, index = initialBucketIndex; probe < MAX_PROBES; ++probe, ++index) { + if (index >= entries.length) index = 0; + + CacheEntry entry = entries[index]; + if (entry == null || entry.hits() == 0) return index; + } + return -1; } - + static final int findFirstAvailableOrMfuIndex(CacheEntry[] entries, int newValueHash) { - double mfuHits = Double.MIN_VALUE; - int mfuIndex = -1; - - int initialBucketIndex = initialBucketIndex(entries, newValueHash); - for ( int probe = 0, index = initialBucketIndex; probe < MAX_PROBES; ++probe, ++index ) { - if ( index >= entries.length ) index = 0; - - CacheEntry entry = entries[index]; - if ( entry == null ) return index; - - double hits = entry.hits(); - if ( hits > mfuHits ) { - mfuHits = hits; - mfuIndex = index; - } - } - return mfuIndex; + double mfuHits = Double.MIN_VALUE; + int mfuIndex = -1; + + int initialBucketIndex = initialBucketIndex(entries, newValueHash); + for (int probe = 0, index = initialBucketIndex; probe < MAX_PROBES; ++probe, ++index) { + if (index >= entries.length) index = 0; + + CacheEntry entry = entries[index]; + if (entry == null) return index; + + double hits = entry.hits(); + if (hits > mfuHits) { + mfuHits = hits; + mfuIndex = index; + } + } + return mfuIndex; } - + static final boolean reverseMark(boolean[] marks, int newValueHash) { - int index = initialBucketIndex(marks, newValueHash); - boolean wasMarked = marks[index]; - marks[index] = !wasMarked; - return wasMarked; + int index = initialBucketIndex(marks, newValueHash); + boolean wasMarked = marks[index]; + marks[index] = !wasMarked; + return wasMarked; } - + static final boolean lfuInsert(CacheEntry[] entries, CacheEntry newEntry) { int initialBucketIndex = initialBucketIndex(entries, newEntry.valueHash()); - + // initial scan to see if there's an empty slot or marker entry is already present double lowestHits = Double.MAX_VALUE; int lfuIndex = -1; - for ( int probe = 0, index = initialBucketIndex; probe < MAX_PROBES; ++probe, ++index ) { - if ( index >= entries.length ) index = 0; - - CacheEntry entry = entries[index]; - if ( entry == null || entry.hits() == 0 ) { - entries[index] = newEntry; - return false; - } else { - double hits = entry.hits(); - if ( hits < lowestHits ) { - lowestHits = hits; - lfuIndex = index; - } - } - } - - // If we get here, then we're evicted the LRU - entries[lfuIndex] = newEntry; - return true; + for (int probe = 0, index = initialBucketIndex; probe < MAX_PROBES; ++probe, ++index) { + if (index >= entries.length) index = 0; + + CacheEntry entry = entries[index]; + if (entry == null || entry.hits() == 0) { + entries[index] = newEntry; + return false; + } else { + double hits = entry.hits(); + if (hits < lowestHits) { + lowestHits = hits; + lfuIndex = index; + } + } + } + + // If we get here, then we're evicted the LRU + entries[lfuIndex] = newEntry; + return true; } - + static final boolean lruInsert(CacheEntry[] entries, CacheEntry newEntry) { int initialBucketIndex = initialBucketIndex(entries, newEntry.valueHash()); - + // initial scan to see if there's an empty slot or entry is already present long lowestUsedMs = Long.MAX_VALUE; int lruIndex = -1; - for ( int probe = 0, index = initialBucketIndex; probe < MAX_PROBES; ++probe, ++index ) { - if ( index >= entries.length ) index = 0; - - CacheEntry entry = entries[index]; - if ( entry == null ) { - entries[index] = newEntry; - return false; - } else if ( entry.matches(newEntry) ) { - entries[index] = newEntry; - return false; - } else { - long lastUsedMs = entry.lastUsedMs(); - if ( lastUsedMs < lowestUsedMs ) { - lowestUsedMs = lastUsedMs; - lruIndex = index; - } - } - } - - entries[lruIndex] = newEntry; - return true; + for (int probe = 0, index = initialBucketIndex; probe < MAX_PROBES; ++probe, ++index) { + if (index >= entries.length) index = 0; + + CacheEntry entry = entries[index]; + if (entry == null) { + entries[index] = newEntry; + return false; + } else if (entry.matches(newEntry)) { + entries[index] = newEntry; + return false; + } else { + long lastUsedMs = entry.lastUsedMs(); + if (lastUsedMs < lowestUsedMs) { + lowestUsedMs = lastUsedMs; + lruIndex = index; + } + } + } + + entries[lruIndex] = newEntry; + return true; } - + static final int initialBucketIndex(CacheEntry[] entries, int valueHash) { return valueHash & (entries.length - 1); } @@ -348,90 +343,87 @@ static final int initialBucketIndex(CacheEntry[] entries, int valueHash) { static final int initialBucketIndex(boolean[] marks, int valueHash) { return valueHash & (marks.length - 1); } - + static final int lookupEntry( - CacheEntry[] entries, - int valueHash, String value, - long lookupTimeMs) - { - int initialBucketIndex = initialBucketIndex(entries, valueHash); - for ( int probe = 0, index = initialBucketIndex; probe < MAX_PROBES; ++probe, ++index ) { - if ( index >= entries.length ) index = 0; - - CacheEntry entry = entries[index]; - if ( entry != null && entry.matches(valueHash, value) ) { - return index; - } - } - return -1; + CacheEntry[] entries, int valueHash, String value, long lookupTimeMs) { + int initialBucketIndex = initialBucketIndex(entries, valueHash); + for (int probe = 0, index = initialBucketIndex; probe < MAX_PROBES; ++probe, ++index) { + if (index >= entries.length) index = 0; + + CacheEntry entry = entries[index]; + if (entry != null && entry.matches(valueHash, value)) { + return index; + } + } + return -1; } - + static final int bucketHash(int tagHash, int valueHash) { - return tagHash + 31 * valueHash; + return tagHash + 31 * valueHash; } - + static final class CacheEntry { - final int valueHash; + final int valueHash; final String value; final byte[] valueUtf8; - + boolean promoted = false; long lastUsedMs = 0; double hitCount = 0; - + public CacheEntry(int valueHash, String value) { this.valueHash = valueHash; this.value = value; this.valueUtf8 = utf8(value); } - - boolean matches(CacheEntry thatEntry ) { - return ( this == thatEntry ) || this.matches(thatEntry.valueHash, thatEntry.value); + + boolean matches(CacheEntry thatEntry) { + return (this == thatEntry) || this.matches(thatEntry.valueHash, thatEntry.value); } - + boolean matches(int valueHash, String value) { return (this.valueHash == valueHash) && value.equals(this.value); } - + int valueHash() { return this.valueHash; } - + double hits() { - return this.hitCount; + return this.hitCount; } - + long lastUsedMs() { return this.lastUsedMs; } - + byte[] utf8() { return this.valueUtf8; } - + double hit(long lastUsedMs) { this.lastUsedMs = lastUsedMs; this.hitCount += 1; - + return this.hitCount; } - + boolean decay() { this.hitCount *= HIT_DECAY; - + return (this.hitCount < PURGE_THRESHOLD); } - + static final byte[] utf8(String value) { return value.getBytes(StandardCharsets.UTF_8); } - + @Override public String toString() { - if ( this.value == null ) { - return "marker"; + if (this.value == null) { + return "marker"; } else { - return this.value + " - hits: " + this.hitCount + " used (ms): " + this.lastUsedMs; + return this.value + " - hits: " + this.hitCount + " used (ms): " + this.lastUsedMs; } } } diff --git a/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/SimpleUtf8Cache.java b/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/SimpleUtf8Cache.java index 3bdb5edffde..a823f6d5fa9 100644 --- a/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/SimpleUtf8Cache.java +++ b/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/SimpleUtf8Cache.java @@ -4,152 +4,148 @@ import java.nio.charset.StandardCharsets; import java.util.Arrays; -/** +/** * A simple UTF8 cache - primarily intended for tag names - * - * Cache is designed to against resilient against single use tags - * - * NOTE: The aim of this cache is to reduce allocation overhead -- not CPU overhead. - * Using the cache has higher CPU overhead than simply calling {@link String#getBytes(java.nio.charset.Charset)}. - * - * The cache is thread safe. + * + *

Cache is designed to against resilient against single use tags + * + *

NOTE: The aim of this cache is to reduce allocation overhead -- not CPU overhead. Using the + * cache has higher CPU overhead than simply calling {@link + * String#getBytes(java.nio.charset.Charset)}. + * + *

The cache is thread safe. */ -/* - * Thread safety is achieved through using CacheEntry objects where the key data +/* + * Thread safety is achieved through using CacheEntry objects where the key data * fields are final. - * - * Updating of the cache and bookkeeping are deliberately allowed to be racy to + * + * Updating of the cache and bookkeeping are deliberately allowed to be racy to * minimize CPU overhead and lock contention. - * - * The first time a value is requested, the value isn't cached and no CacheEntry - * is created. Without this refinement, the cost for constructing + * + * The first time a value is requested, the value isn't cached and no CacheEntry + * is created. Without this refinement, the cost for constructing * CacheEntry for unique values would negate the benefit of the cache. - * - * These first requests are tracked via markers which indicate if there was + * + * These first requests are tracked via markers which indicate if there was * previously an unsatisfied request to the same initial cache line. - * + * * If there was a request, then CacheEntry is created and stored into entries. - * NOTE: The cache line marking process is imprecise and subject to request + * NOTE: The cache line marking process is imprecise and subject to request * ordering issues, but given that low cardinality entries are more likely to repeat * next, imperically this scheme works well. - * + * * If a collision occurs in the cache, linear probing is used to check other slots. * New cache entries fill any available slot within the probing window. - * - * If a subsequent request, finds a matching item in entries. The hit count + * + * If a subsequent request, finds a matching item in entries. The hit count * of the CacheEntry is bumped. - * + * * If there are no available slots in entries for a newly created CacheEntry, * a LFU: least frequently used eviction policy is used to free up a slot. */ public final class SimpleUtf8Cache implements EncodingCache { public static final SimpleUtf8Cache INSTANCE = new SimpleUtf8Cache(); - + private static final int MAX_PROBES = 8; - + private final int SIZE = 256; - + private final boolean[] markers = new boolean[SIZE]; private final CacheEntry[] entries = new CacheEntry[SIZE]; - + private static final double HIT_DECAY = 0.8D; private static final double PURGE_THRESHOLD = 0.25D; - + protected int hits = 0; protected int evictions = 0; - + public void recalibrate() { - CacheEntry[] thisEntries = this.entries; - for ( int i = 0; i < thisEntries.length; ++i ) { - CacheEntry entry = thisEntries[i]; - if ( entry == null ) continue; - - boolean purge = entry.decay(); - if ( purge ) thisEntries[i] = null; - } - - Arrays.fill(this.markers, false); + CacheEntry[] thisEntries = this.entries; + for (int i = 0; i < thisEntries.length; ++i) { + CacheEntry entry = thisEntries[i]; + if (entry == null) continue; + + boolean purge = entry.decay(); + if (purge) thisEntries[i] = null; + } + + Arrays.fill(this.markers, false); } - + @Override public byte[] encode(CharSequence charSeq) { - if ( charSeq instanceof String ) { - String str = (String)charSeq; - return this.getUtf8(str); - } else { - return null; - } + if (charSeq instanceof String) { + String str = (String) charSeq; + return this.getUtf8(str); + } else { + return null; + } } - - /** - * Returns the UTF-8 encoding of value -- using a cache value if available - */ + + /** Returns the UTF-8 encoding of value -- using a cache value if available */ public final byte[] getUtf8(String value) { - CacheEntry[] thisEntries = this.entries; - - int valueHash = value.hashCode(); - - CacheEntry matchingEntry = lookupEntry(thisEntries, valueHash, value); - if ( matchingEntry != null ) { - this.hits += 1; - return matchingEntry.utf8(); - } - - boolean wasMarked = reverseMark(this.markers, valueHash); - if ( !wasMarked ) return CacheEntry.utf8(value); - - CacheEntry newEntry = new CacheEntry(valueHash, value); - newEntry.hit(); - - boolean evicted = lfuInsert(thisEntries, newEntry); - if ( evicted ) this.evictions += 1; - - return newEntry.utf8(); + CacheEntry[] thisEntries = this.entries; + + int valueHash = value.hashCode(); + + CacheEntry matchingEntry = lookupEntry(thisEntries, valueHash, value); + if (matchingEntry != null) { + this.hits += 1; + return matchingEntry.utf8(); + } + + boolean wasMarked = reverseMark(this.markers, valueHash); + if (!wasMarked) return CacheEntry.utf8(value); + + CacheEntry newEntry = new CacheEntry(valueHash, value); + newEntry.hit(); + + boolean evicted = lfuInsert(thisEntries, newEntry); + if (evicted) this.evictions += 1; + + return newEntry.utf8(); } - - static final CacheEntry lookupEntry( - CacheEntry[] entries, - int valueHash, String value) - { - int initialBucketIndex = initialBucketIndex(entries, valueHash); - for ( int probe = 0, index = initialBucketIndex; probe < MAX_PROBES; ++probe, ++index ) { - if ( index >= entries.length ) index = 0; - - CacheEntry entry = entries[index]; - if ( entry != null && entry.matches(valueHash, value) ) { - return entry; - } - } - return null; + + static final CacheEntry lookupEntry(CacheEntry[] entries, int valueHash, String value) { + int initialBucketIndex = initialBucketIndex(entries, valueHash); + for (int probe = 0, index = initialBucketIndex; probe < MAX_PROBES; ++probe, ++index) { + if (index >= entries.length) index = 0; + + CacheEntry entry = entries[index]; + if (entry != null && entry.matches(valueHash, value)) { + return entry; + } + } + return null; } - + static final boolean lfuInsert(CacheEntry[] entries, CacheEntry newEntry) { int initialBucketIndex = initialBucketIndex(entries, newEntry.valueHash()); - + // initial scan to see if there's an empty slot or marker entry is already present double lowestHits = Double.MAX_VALUE; int lfuIndex = -1; - for ( int probe = 0, index = initialBucketIndex; probe < MAX_PROBES; ++probe, ++index ) { - if ( index >= entries.length ) index = 0; - - CacheEntry entry = entries[index]; - if ( entry == null || entry.hits() == 0 ) { - entries[index] = newEntry; - return false; - } else { - double hits = entry.hits(); - if ( hits < lowestHits ) { - lowestHits = hits; - lfuIndex = index; - } - } - } - - // If we get here, then we're evicting the LRU - entries[lfuIndex] = newEntry; - return true; + for (int probe = 0, index = initialBucketIndex; probe < MAX_PROBES; ++probe, ++index) { + if (index >= entries.length) index = 0; + + CacheEntry entry = entries[index]; + if (entry == null || entry.hits() == 0) { + entries[index] = newEntry; + return false; + } else { + double hits = entry.hits(); + if (hits < lowestHits) { + lowestHits = hits; + lfuIndex = index; + } + } + } + + // If we get here, then we're evicting the LRU + entries[lfuIndex] = newEntry; + return true; } - + static final int initialBucketIndex(CacheEntry[] entries, int valueHash) { return valueHash & (entries.length - 1); } @@ -157,70 +153,70 @@ static final int initialBucketIndex(CacheEntry[] entries, int valueHash) { static final int initialBucketIndex(boolean[] marks, int valueHash) { return valueHash & (marks.length - 1); } - + static final boolean reverseMark(boolean[] marks, int newValueHash) { - int index = initialBucketIndex(marks, newValueHash); - boolean wasMarked = marks[index]; - marks[index] = !wasMarked; - return wasMarked; + int index = initialBucketIndex(marks, newValueHash); + boolean wasMarked = marks[index]; + marks[index] = !wasMarked; + return wasMarked; } - + static final class CacheEntry { - final int valueHash; + final int valueHash; final String value; final byte[] valueUtf8; - + boolean promoted = false; double hitCount = 0; - + public CacheEntry(int valueHash, String value) { this.valueHash = valueHash; this.value = value; this.valueUtf8 = utf8(value); } - - boolean matches(CacheEntry thatEntry ) { - return ( this == thatEntry ) || this.matches(thatEntry.valueHash, thatEntry.value); + + boolean matches(CacheEntry thatEntry) { + return (this == thatEntry) || this.matches(thatEntry.valueHash, thatEntry.value); } - + boolean matches(int valueHash, String value) { return (this.valueHash == valueHash) && value.equals(this.value); } - + int valueHash() { return this.valueHash; } - + double hits() { - return this.hitCount; + return this.hitCount; } - + byte[] utf8() { return this.valueUtf8; } - + double hit() { this.hitCount += 1; - + return this.hitCount; } - + boolean decay() { this.hitCount *= HIT_DECAY; - + return (this.hitCount < PURGE_THRESHOLD); } - + static final byte[] utf8(String value) { return value.getBytes(StandardCharsets.UTF_8); } - + @Override public String toString() { - if ( this.value == null ) { - return "marker"; + if (this.value == null) { + return "marker"; } else { - return this.value + " - hits: " + this.hitCount; + return this.value + " - hits: " + this.hitCount; } } } diff --git a/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/TraceMapperV0_4.java b/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/TraceMapperV0_4.java index 44dfbb2e771..fc4fd53c7a7 100644 --- a/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/TraceMapperV0_4.java +++ b/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/TraceMapperV0_4.java @@ -24,12 +24,12 @@ import okhttp3.RequestBody; public final class TraceMapperV0_4 implements TraceMapper { - static final SimpleUtf8Cache TAG_CACHE = - Config.get().isUtf8CacheEnabled() ? new SimpleUtf8Cache() : null; - + static final SimpleUtf8Cache TAG_CACHE = + Config.get().isUtf8CacheEnabled() ? new SimpleUtf8Cache() : null; + static final GenerationalUtf8Cache VALUE_CACHE = - Config.get().isUtf8CacheEnabled() ? new GenerationalUtf8Cache() : null; - + Config.get().isUtf8CacheEnabled() ? new GenerationalUtf8Cache() : null; + private final int size; public TraceMapperV0_4(int size) { @@ -65,7 +65,7 @@ MetaWriter forLastSpanInChunk(final boolean lastSpanInChunk) { public void accept(Metadata metadata) { TAG_CACHE.recalibrate(); VALUE_CACHE.recalibrate(); - + final boolean writeSamplingPriority = firstSpanInChunk || lastSpanInChunk; final UTF8BytesString processTags = firstSpanInChunk ? ProcessTags.getTagsForSerialization() : null; diff --git a/dd-trace-core/src/test/java/datadog/trace/common/writer/ddagent/GenerationalUtf8CacheTest.java b/dd-trace-core/src/test/java/datadog/trace/common/writer/ddagent/GenerationalUtf8CacheTest.java index 7cec6a6f2c3..fefb85949de 100644 --- a/dd-trace-core/src/test/java/datadog/trace/common/writer/ddagent/GenerationalUtf8CacheTest.java +++ b/dd-trace-core/src/test/java/datadog/trace/common/writer/ddagent/GenerationalUtf8CacheTest.java @@ -8,161 +8,149 @@ import java.nio.charset.StandardCharsets; import java.util.Random; import java.util.concurrent.ThreadLocalRandom; - import org.junit.Test; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.ValueSource; public class GenerationalUtf8CacheTest { @ParameterizedTest - @ValueSource(strings={"foo", "bar", "baz", "quux"}) + @ValueSource(strings = {"foo", "bar", "baz", "quux"}) public void getUtf8(String value) { - GenerationalUtf8Cache cache = new GenerationalUtf8Cache(); - - for ( int i = 0; i < 10; ++i ) { - byte[] valueUtf8 = cache.getUtf8(value); - assertArrayEquals(value.getBytes(StandardCharsets.UTF_8), valueUtf8); - } + GenerationalUtf8Cache cache = new GenerationalUtf8Cache(); + + for (int i = 0; i < 10; ++i) { + byte[] valueUtf8 = cache.getUtf8(value); + assertArrayEquals(value.getBytes(StandardCharsets.UTF_8), valueUtf8); + } } - + @Test public void caching() { - GenerationalUtf8Cache cache = new GenerationalUtf8Cache(); - - String value = "bar"; - byte[] expected = value.getBytes(StandardCharsets.UTF_8); - - byte[] first = cache.getUtf8(value); - assertArrayEquals(expected, first); - - // first request isn't cached - to avoid burning slots - byte[] second = cache.getUtf8(value); - assertArrayEquals(expected, second); - assertNotSame(first, second); - - // after first request, the entry should be cached - byte[] third = cache.getUtf8(value); - assertArrayEquals(expected, third); - assertSame(second, third); - - assertNotEquals(0, cache.edenHits); + GenerationalUtf8Cache cache = new GenerationalUtf8Cache(); + + String value = "bar"; + byte[] expected = value.getBytes(StandardCharsets.UTF_8); + + byte[] first = cache.getUtf8(value); + assertArrayEquals(expected, first); + + // first request isn't cached - to avoid burning slots + byte[] second = cache.getUtf8(value); + assertArrayEquals(expected, second); + assertNotSame(first, second); + + // after first request, the entry should be cached + byte[] third = cache.getUtf8(value); + assertArrayEquals(expected, third); + assertSame(second, third); + + assertNotEquals(0, cache.edenHits); } - + @Test public void promotion() { - GenerationalUtf8Cache cache = new GenerationalUtf8Cache(); - - String value = "bar"; - byte[] expected = value.getBytes(StandardCharsets.UTF_8); - - byte[] first = cache.getUtf8(value); - assertArrayEquals(expected, first); - - byte[] second = cache.getUtf8(value); - assertArrayEquals(expected, second); - assertNotSame(second, first); - - while ( cache.promotions == 0 ) { - byte[] cached = cache.getUtf8(value); - assertArrayEquals(expected, cached); - assertSame(cached, second); - } - - assertNotEquals(0, cache.edenHits); - - for ( int i = 0; i < 10; ++i ) { - byte[] cached = cache.getUtf8(value); - - assertArrayEquals(expected, cached); - assertSame(cached, second); - } - + GenerationalUtf8Cache cache = new GenerationalUtf8Cache(); + + String value = "bar"; + byte[] expected = value.getBytes(StandardCharsets.UTF_8); + + byte[] first = cache.getUtf8(value); + assertArrayEquals(expected, first); + + byte[] second = cache.getUtf8(value); + assertArrayEquals(expected, second); + assertNotSame(second, first); + + while (cache.promotions == 0) { + byte[] cached = cache.getUtf8(value); + assertArrayEquals(expected, cached); + assertSame(cached, second); + } + + assertNotEquals(0, cache.edenHits); + + for (int i = 0; i < 10; ++i) { + byte[] cached = cache.getUtf8(value); + + assertArrayEquals(expected, cached); + assertSame(cached, second); + } + assertNotEquals(0, cache.promotedHits); } - + @Test public void fuzz() { Random random = ThreadLocalRandom.current(); - + int edenHits = 0; int promotedHits = 0; - - GenerationalUtf8Cache cache = new GenerationalUtf8Cache(); - for ( int i = 0; i < 1_000; ++i ) { + + GenerationalUtf8Cache cache = new GenerationalUtf8Cache(); + for (int i = 0; i < 1_000; ++i) { cache.recalibrate(); - + int cycles = 500 + random.nextInt(2_000); - for ( int j = 0; j < cycles; ++j ) { - String nextTag = nextTag(); - String nextValue = nextValue(); - byte[] nextExpected = nextValue.getBytes(StandardCharsets.UTF_8); - - byte[] nextValueUtf8 = cache.getUtf8(nextValue); - assertArrayEquals(nextExpected, nextValueUtf8); + for (int j = 0; j < cycles; ++j) { + String nextTag = nextTag(); + String nextValue = nextValue(); + byte[] nextExpected = nextValue.getBytes(StandardCharsets.UTF_8); + + byte[] nextValueUtf8 = cache.getUtf8(nextValue); + assertArrayEquals(nextExpected, nextValueUtf8); } - + edenHits += cache.edenHits; promotedHits += cache.promotedHits; - + printStats(cache); } - + assertNotEquals(0, edenHits); assertNotEquals(0, promotedHits); } - - static final String[] TAGS = { - "foo", - "bar", - "baz" - }; - - static final String[] BASE_STRINGS = { - "Hello", - "world", - "foo", - "bar", - "baz", - "quux" - }; - + + static final String[] TAGS = {"foo", "bar", "baz"}; + + static final String[] BASE_STRINGS = {"Hello", "world", "foo", "bar", "baz", "quux"}; + static final String nextTag() { - ThreadLocalRandom random = ThreadLocalRandom.current(); - - int tagIndex = random.nextInt(TAGS.length + 1); - if ( tagIndex >= TAGS.length ) { - return "tag-" + Integer.toString(random.nextInt()); - } else { - return TAGS[tagIndex]; - } + ThreadLocalRandom random = ThreadLocalRandom.current(); + + int tagIndex = random.nextInt(TAGS.length + 1); + if (tagIndex >= TAGS.length) { + return "tag-" + Integer.toString(random.nextInt()); + } else { + return TAGS[tagIndex]; + } } - + static final String nextValue() { - ThreadLocalRandom random = ThreadLocalRandom.current(); - - if ( random.nextDouble() < 0.1 ) { - return Integer.toString(random.nextInt()); - } - - int baseIndex = random.nextInt(BASE_STRINGS.length); - String baseString = BASE_STRINGS[baseIndex]; - - if ( random.nextDouble() < 0.2 ) { - baseString = baseString.toLowerCase(); - } - - int valueSuffix = random.nextInt(2 * baseIndex + 1); - return baseString + valueSuffix; + ThreadLocalRandom random = ThreadLocalRandom.current(); + + if (random.nextDouble() < 0.1) { + return Integer.toString(random.nextInt()); + } + + int baseIndex = random.nextInt(BASE_STRINGS.length); + String baseString = BASE_STRINGS[baseIndex]; + + if (random.nextDouble() < 0.2) { + baseString = baseString.toLowerCase(); + } + + int valueSuffix = random.nextInt(2 * baseIndex + 1); + return baseString + valueSuffix; } - + static final void printStats(GenerationalUtf8Cache cache) { System.out.printf( - "eden hits: %5d\tpromotion hits: %5d\tpromotions: %5d\tearly: %5d\tlocal evictions: %5d\tglobal evictions: %5d%n", - cache.edenHits, - cache.promotedHits, - cache.promotions, - cache.earlyPromotions, - cache.edenEvictions, - cache.promotedEvictions); + "eden hits: %5d\tpromotion hits: %5d\tpromotions: %5d\tearly: %5d\tlocal evictions: %5d\tglobal evictions: %5d%n", + cache.edenHits, + cache.promotedHits, + cache.promotions, + cache.earlyPromotions, + cache.edenEvictions, + cache.promotedEvictions); } } diff --git a/dd-trace-core/src/test/java/datadog/trace/common/writer/ddagent/SimpleUtf8CacheTest.java b/dd-trace-core/src/test/java/datadog/trace/common/writer/ddagent/SimpleUtf8CacheTest.java index 08bc9d4f517..6aeff655a3a 100644 --- a/dd-trace-core/src/test/java/datadog/trace/common/writer/ddagent/SimpleUtf8CacheTest.java +++ b/dd-trace-core/src/test/java/datadog/trace/common/writer/ddagent/SimpleUtf8CacheTest.java @@ -8,122 +8,109 @@ import java.nio.charset.StandardCharsets; import java.util.Random; import java.util.concurrent.ThreadLocalRandom; - import org.junit.Test; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.ValueSource; public class SimpleUtf8CacheTest { @ParameterizedTest - @ValueSource(strings={"foo", "bar", "baz", "quux"}) + @ValueSource(strings = {"foo", "bar", "baz", "quux"}) public void getUtf8(String value) { - SimpleUtf8Cache cache = new SimpleUtf8Cache(); - - for ( int i = 0; i < 10; ++i ) { - byte[] valueUtf8 = cache.getUtf8(value); - assertArrayEquals(value.getBytes(StandardCharsets.UTF_8), valueUtf8); - } + SimpleUtf8Cache cache = new SimpleUtf8Cache(); + + for (int i = 0; i < 10; ++i) { + byte[] valueUtf8 = cache.getUtf8(value); + assertArrayEquals(value.getBytes(StandardCharsets.UTF_8), valueUtf8); + } } - + @Test public void caching() { - SimpleUtf8Cache cache = new SimpleUtf8Cache(); - - String value = "bar"; - byte[] expected = value.getBytes(StandardCharsets.UTF_8); - - byte[] first = cache.getUtf8(value); - assertArrayEquals(expected, first); - - // first request isn't cached - to avoid burning slots - byte[] second = cache.getUtf8(value); - assertArrayEquals(expected, second); - assertNotSame(first, second); - - // after first request, the entry should be cached - byte[] third = cache.getUtf8(value); - assertArrayEquals(expected, third); - assertSame(second, third); - - assertNotEquals(0, cache.hits); - } - + SimpleUtf8Cache cache = new SimpleUtf8Cache(); + + String value = "bar"; + byte[] expected = value.getBytes(StandardCharsets.UTF_8); + + byte[] first = cache.getUtf8(value); + assertArrayEquals(expected, first); + + // first request isn't cached - to avoid burning slots + byte[] second = cache.getUtf8(value); + assertArrayEquals(expected, second); + assertNotSame(first, second); + + // after first request, the entry should be cached + byte[] third = cache.getUtf8(value); + assertArrayEquals(expected, third); + assertSame(second, third); + + assertNotEquals(0, cache.hits); + } + @Test public void fuzz() { Random random = ThreadLocalRandom.current(); - + int hits = 0; - + SimpleUtf8Cache cache = new SimpleUtf8Cache(); - for ( int i = 0; i < 1_000; ++i ) { + for (int i = 0; i < 1_000; ++i) { cache.recalibrate(); - + int cycles = 500 + random.nextInt(2_000); - for ( int j = 0; j < cycles; ++j ) { - String nextTag = nextTag(); - String nextValue = nextValue(); - byte[] nextExpected = nextValue.getBytes(StandardCharsets.UTF_8); - - byte[] nextValueUtf8 = cache.getUtf8(nextValue); - assertArrayEquals(nextExpected, nextValueUtf8); + for (int j = 0; j < cycles; ++j) { + String nextTag = nextTag(); + String nextValue = nextValue(); + byte[] nextExpected = nextValue.getBytes(StandardCharsets.UTF_8); + + byte[] nextValueUtf8 = cache.getUtf8(nextValue); + assertArrayEquals(nextExpected, nextValueUtf8); } - + hits += cache.hits; - + printStats(cache); } - + assertNotEquals(0, hits); } - - static final String[] TAGS = { - "foo", - "bar", - "baz" - }; - - static final String[] BASE_STRINGS = { - "Hello", - "world", - "foo", - "bar", - "baz", - "quux" - }; - + + static final String[] TAGS = {"foo", "bar", "baz"}; + + static final String[] BASE_STRINGS = {"Hello", "world", "foo", "bar", "baz", "quux"}; + static final String nextTag() { - ThreadLocalRandom random = ThreadLocalRandom.current(); - - int tagIndex = random.nextInt(TAGS.length + 1); - if ( tagIndex >= TAGS.length ) { - return "tag-" + Integer.toString(random.nextInt()); - } else { - return TAGS[tagIndex]; - } + ThreadLocalRandom random = ThreadLocalRandom.current(); + + int tagIndex = random.nextInt(TAGS.length + 1); + if (tagIndex >= TAGS.length) { + return "tag-" + Integer.toString(random.nextInt()); + } else { + return TAGS[tagIndex]; + } } - + static final String nextValue() { - ThreadLocalRandom random = ThreadLocalRandom.current(); - - if ( random.nextDouble() < 0.1 ) { - return Integer.toString(random.nextInt()); - } - - int baseIndex = random.nextInt(BASE_STRINGS.length); - String baseString = BASE_STRINGS[baseIndex]; - - if ( random.nextDouble() < 0.2 ) { - baseString = baseString.toLowerCase(); - } - - int valueSuffix = random.nextInt(2 * baseIndex + 1); - return baseString + valueSuffix; + ThreadLocalRandom random = ThreadLocalRandom.current(); + + if (random.nextDouble() < 0.1) { + return Integer.toString(random.nextInt()); + } + + int baseIndex = random.nextInt(BASE_STRINGS.length); + String baseString = BASE_STRINGS[baseIndex]; + + if (random.nextDouble() < 0.2) { + baseString = baseString.toLowerCase(); + } + + int valueSuffix = random.nextInt(2 * baseIndex + 1); + return baseString + valueSuffix; } - + static final void printStats(SimpleUtf8Cache cache) { System.out.printf( - "eden hits: %5d\tpromotion hits: %5d\tpromotions: %5d\tearly: %5d\tlocal evictions: %5d\tglobal evictions: %5d%n", - cache.hits, - cache.evictions); + "eden hits: %5d\tpromotion hits: %5d\tpromotions: %5d\tearly: %5d\tlocal evictions: %5d\tglobal evictions: %5d%n", + cache.hits, cache.evictions); } } diff --git a/internal-api/src/main/java/datadog/trace/api/Config.java b/internal-api/src/main/java/datadog/trace/api/Config.java index 89c7f243d4c..61e78723c2e 100644 --- a/internal-api/src/main/java/datadog/trace/api/Config.java +++ b/internal-api/src/main/java/datadog/trace/api/Config.java @@ -2735,8 +2735,7 @@ PROFILING_DATADOG_PROFILER_ENABLED, isDatadogProfilerSafeInCurrentEnvironment()) this.optimizedMapEnabled = configProvider.getBoolean(GeneralConfig.OPTIMIZED_MAP_ENABLED, false); - this.utf8CacheEnabled = - configProvider.getBoolean(GeneralConfig.UTF8_CACHE_ENABLED, true); + this.utf8CacheEnabled = configProvider.getBoolean(GeneralConfig.UTF8_CACHE_ENABLED, true); int defaultStackTraceLengthLimit = instrumenterConfig.isCiVisibilityEnabled() @@ -4422,9 +4421,9 @@ public boolean isJdkSocketEnabled() { public boolean isOptimizedMapEnabled() { return optimizedMapEnabled; } - + public boolean isUtf8CacheEnabled() { - return utf8CacheEnabled; + return utf8CacheEnabled; } public int getStackTraceLengthLimit() { From 95767a67596e96f4b950432439e24c4495e1a17f Mon Sep 17 00:00:00 2001 From: Douglas Q Hawkins Date: Thu, 28 Aug 2025 13:34:05 -0400 Subject: [PATCH 03/23] Tweaking comments --- .../writer/ddagent/GenerationalUtf8Cache.java | 18 +++++++++--------- .../common/writer/ddagent/SimpleUtf8Cache.java | 2 +- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/GenerationalUtf8Cache.java b/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/GenerationalUtf8Cache.java index 62e5e34b701..af27c841301 100644 --- a/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/GenerationalUtf8Cache.java +++ b/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/GenerationalUtf8Cache.java @@ -29,13 +29,13 @@ * is created. Without this refinement, the cost for constructing * CacheEntry for unique values would negate the benefit of the cache. * - * These first requests are tracked via edenMarkers which indicate if there was - * previously an unsatisfied request to the same initial cache line. + * These first requests are tracked via edenMarkers which indicate if there was a + * previously unsatisfied request to the same initial cache line. * * If there was a request, then CacheEntry is created and stored into edenEntries. * NOTE: The eden line marking process is imprecise and subject to request - * ordering issues, but given that low cardinality entries are more likely to repeat - * next. + * ordering issues. But given that low cardinality entries are more likely to repeat + * next, imperically this scheme works well. * * If a collision occurs in the cache, linear probing is used to check other slots. * New cache entries fill any available slot within the probing window. @@ -48,7 +48,7 @@ * * If there are no available slots in edenEntries for a newly created CacheEntry... * - * Attempt to early promote the MFU: most frequently used CacheEntry from + * First, attempt to early promote the MFU: most frequently used CacheEntry from * edenEntries to promotedEntries (without eviction). * * If there's no space in promotedEntries to promote the MFU, then evict the @@ -56,12 +56,12 @@ * * * LRU based eviction of the promotedEntries works on tagging with the last hit time. - * The access time can be provided directly to ValueUtf8Cache#getUtf8 or can - * be refreshed periodically by calling ValueUtf8Cache#updateAccessTime. + * The access time can be provided directly to GenerationalUtf8Cache#getUtf8 or can + * be refreshed periodically by calling GenerationalUtf8Cache#updateAccessTime. * * If there's a natural transaction boundary around the UTF8 cache, - * calling ValueUtf8Cache#recalibrateThresholds will adjust promotion - * thresholds to provide better cache utilization. + * calling ValueUtf8Cache#reclibrate will adjust promotion thresholds to + * provide better cache utilization. */ public final class GenerationalUtf8Cache implements EncodingCache { private static final int MAX_PROBES = 8; diff --git a/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/SimpleUtf8Cache.java b/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/SimpleUtf8Cache.java index a823f6d5fa9..a358a9b7560 100644 --- a/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/SimpleUtf8Cache.java +++ b/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/SimpleUtf8Cache.java @@ -7,7 +7,7 @@ /** * A simple UTF8 cache - primarily intended for tag names * - *

Cache is designed to against resilient against single use tags + *

Cache is designed to be resilient against single use values * *

NOTE: The aim of this cache is to reduce allocation overhead -- not CPU overhead. Using the * cache has higher CPU overhead than simply calling {@link From 5270f9c905db4976ca5685dd4f13fa25577c0c1e Mon Sep 17 00:00:00 2001 From: Douglas Q Hawkins Date: Thu, 28 Aug 2025 13:43:39 -0400 Subject: [PATCH 04/23] Tweaking comments --- .../datadog/trace/common/writer/ddagent/SimpleUtf8Cache.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/SimpleUtf8Cache.java b/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/SimpleUtf8Cache.java index a358a9b7560..bb5a1779f3e 100644 --- a/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/SimpleUtf8Cache.java +++ b/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/SimpleUtf8Cache.java @@ -31,7 +31,7 @@ * * If there was a request, then CacheEntry is created and stored into entries. * NOTE: The cache line marking process is imprecise and subject to request - * ordering issues, but given that low cardinality entries are more likely to repeat + * ordering issues. But given that low cardinality entries are more likely to repeat * next, imperically this scheme works well. * * If a collision occurs in the cache, linear probing is used to check other slots. From 69c49832feaea39b4d404da69f0dd304a35644b2 Mon Sep 17 00:00:00 2001 From: Douglas Q Hawkins Date: Thu, 28 Aug 2025 14:34:57 -0400 Subject: [PATCH 05/23] Comparing results with caching off --- internal-api/src/main/java/datadog/trace/api/Config.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal-api/src/main/java/datadog/trace/api/Config.java b/internal-api/src/main/java/datadog/trace/api/Config.java index 61e78723c2e..69eb1bc9ea3 100644 --- a/internal-api/src/main/java/datadog/trace/api/Config.java +++ b/internal-api/src/main/java/datadog/trace/api/Config.java @@ -2735,7 +2735,7 @@ PROFILING_DATADOG_PROFILER_ENABLED, isDatadogProfilerSafeInCurrentEnvironment()) this.optimizedMapEnabled = configProvider.getBoolean(GeneralConfig.OPTIMIZED_MAP_ENABLED, false); - this.utf8CacheEnabled = configProvider.getBoolean(GeneralConfig.UTF8_CACHE_ENABLED, true); + this.utf8CacheEnabled = configProvider.getBoolean(GeneralConfig.UTF8_CACHE_ENABLED, false); int defaultStackTraceLengthLimit = instrumenterConfig.isCiVisibilityEnabled() From ebc3fb0f36dd99bb6ebd91a259ac06ec666b1c39 Mon Sep 17 00:00:00 2001 From: Douglas Q Hawkins Date: Fri, 29 Aug 2025 10:29:37 -0400 Subject: [PATCH 06/23] Fixing silly oversight when cache is disabled --- .../datadog/trace/common/writer/ddagent/TraceMapperV0_4.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/TraceMapperV0_4.java b/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/TraceMapperV0_4.java index fc4fd53c7a7..eb7005c9ebc 100644 --- a/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/TraceMapperV0_4.java +++ b/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/TraceMapperV0_4.java @@ -63,8 +63,8 @@ MetaWriter forLastSpanInChunk(final boolean lastSpanInChunk) { @Override public void accept(Metadata metadata) { - TAG_CACHE.recalibrate(); - VALUE_CACHE.recalibrate(); + if ( TAG_CACHE != null ) TAG_CACHE.recalibrate(); + if ( VALUE_CACHE != null ) VALUE_CACHE.recalibrate(); final boolean writeSamplingPriority = firstSpanInChunk || lastSpanInChunk; final UTF8BytesString processTags = From 247bb02fe7026529b03d04a86ec63c4faa063700 Mon Sep 17 00:00:00 2001 From: Douglas Q Hawkins Date: Fri, 29 Aug 2025 13:27:55 -0400 Subject: [PATCH 07/23] Adding comments about benchmark data being used --- .../trace/common/writer/ddagent/Utf8Benchmark.java | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/dd-trace-core/src/jmh/java/datadog/trace/common/writer/ddagent/Utf8Benchmark.java b/dd-trace-core/src/jmh/java/datadog/trace/common/writer/ddagent/Utf8Benchmark.java index 3d9e0085c8c..85c452efe00 100644 --- a/dd-trace-core/src/jmh/java/datadog/trace/common/writer/ddagent/Utf8Benchmark.java +++ b/dd-trace-core/src/jmh/java/datadog/trace/common/writer/ddagent/Utf8Benchmark.java @@ -47,17 +47,23 @@ static final String nextTag() { } static final String nextValue(String tag) { - if (tag == "custom") { - return nextCustomValue(); + if (tag.equals("custom")) { + return nextCustomValue(tag); } else { return nextStandardValue(tag); } } - static final String nextCustomValue() { - return "custom" + ThreadLocalRandom.current().nextInt(); + /* + * Produces a high cardinality value - > thousands of distinct values per tag - many 1-time values + */ + static final String nextCustomValue(String tag) { + return tag + ThreadLocalRandom.current().nextInt(); } + /* + * Produces a moderate cardinality value - tens of distinct values per tag + */ static final String nextStandardValue(String tag) { return tag + ThreadLocalRandom.current().nextInt(20); } From 69c94d16fe5db0dfb66c73d7716d770691f22cd1 Mon Sep 17 00:00:00 2001 From: Douglas Q Hawkins Date: Fri, 29 Aug 2025 13:54:59 -0400 Subject: [PATCH 08/23] Misc improvements - implementing review feedback - experimenting with exact hash based marking scheme - fixed issue with not updating entry after hit in simple cache - re-enabling cache by default for benchmarking - spotless --- .../writer/ddagent/GenerationalUtf8Cache.java | 174 +++++++++--------- .../writer/ddagent/SimpleUtf8Cache.java | 101 +++++----- .../writer/ddagent/TraceMapperV0_4.java | 4 +- .../ddagent/GenerationalUtf8CacheTest.java | 2 +- .../main/java/datadog/trace/api/Config.java | 2 +- 5 files changed, 150 insertions(+), 133 deletions(-) diff --git a/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/GenerationalUtf8Cache.java b/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/GenerationalUtf8Cache.java index af27c841301..37397399f28 100644 --- a/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/GenerationalUtf8Cache.java +++ b/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/GenerationalUtf8Cache.java @@ -1,6 +1,7 @@ package datadog.trace.common.writer.ddagent; import datadog.communication.serialization.EncodingCache; + import java.nio.charset.StandardCharsets; import java.util.Arrays; @@ -64,18 +65,19 @@ * provide better cache utilization. */ public final class GenerationalUtf8Cache implements EncodingCache { - private static final int MAX_PROBES = 8; + private static final int MAX_PROBES = 4; private static final int MIN_PROMOTION_TRESHOLD = 2; private static final int INITIAL_PROMOTION_THRESHOLD = 10; - private static final double HIT_DECAY = 0.8D; - private static final double PURGE_THRESHOLD = 0.25D; + private static final double SCORE_DECAY = 0.8D; + private static final double PURGE_THRESHOLD = 0.1D; + private static final double PROMOTION_THRESHOLD_ADJ_FACTOR = 1.5; private final CacheEntry[] edenEntries; - private final boolean[] edenMarkers; + private final int[] edenMarkers; - private final CacheEntry[] promotedEntries; + private final CacheEntry[] tenuredEntries; private long accessTimeMs; private double promotionThreshold = INITIAL_PROMOTION_THRESHOLD; @@ -85,17 +87,17 @@ public final class GenerationalUtf8Cache implements EncodingCache { int earlyPromotions = 0; int promotions = 0; int edenEvictions = 0; - int promotedEvictions = 0; + int tenuredEvictions = 0; public GenerationalUtf8Cache() { this.accessTimeMs = System.currentTimeMillis(); // These sizes must be powers of 2 - this.edenEntries = new CacheEntry[256]; - this.edenMarkers = new boolean[256]; + this.edenEntries = new CacheEntry[64]; + this.edenMarkers = new int[64]; // The size must be a power of 2 - this.promotedEntries = new CacheEntry[512]; + this.tenuredEntries = new CacheEntry[128]; } /** Updates access time used @link {@link #getUtf8(String, String)} to the provided value */ @@ -108,15 +110,18 @@ public void refreshAcessTime() { this.updateAccessTime(System.currentTimeMillis()); } - public void recalibrate() { + public synchronized void recalibrate() { this.recalibrate(System.currentTimeMillis()); } /** - * Recalibrates promotion threshold based on promotion & eviction statistics, since last - * calibration - resets statistics - * - * @param accessTimeMs + * Recalibrates the cache + * Applies a decay to existing entries - and purges entries below the PURGE_THRESHOLD + * + * Adjusts the promotion threshold depending on ratio of promotions to + * evictions, since prior recalibration + * + * While still racy this method is synchronized to avoid simultaneous recalibrations */ public void recalibrate(long accessTimeMs) { this.accessTimeMs = accessTimeMs; @@ -130,13 +135,13 @@ public void recalibrate(long accessTimeMs) { if (purge) this.edenEntries[i] = null; } - Arrays.fill(this.edenMarkers, false); + Arrays.fill(this.edenMarkers, 0); int totalPromotions = this.promotions + this.earlyPromotions; if (totalPromotions == 0 && this.promotionThreshold >= MIN_PROMOTION_TRESHOLD) { - this.promotionThreshold /= 1.5; - } else if (totalPromotions > this.promotedEvictions / 2) { - this.promotionThreshold *= 1.5; + this.promotionThreshold /= PROMOTION_THRESHOLD_ADJ_FACTOR; + } else if (totalPromotions > this.tenuredEvictions / 2) { + this.promotionThreshold *= PROMOTION_THRESHOLD_ADJ_FACTOR; } this.edenHits = 0; @@ -144,7 +149,7 @@ public void recalibrate(long accessTimeMs) { this.earlyPromotions = 0; this.promotions = 0; this.edenEvictions = 0; - this.promotedEvictions = 0; + this.tenuredEvictions = 0; } @Override @@ -167,12 +172,12 @@ public final byte[] getUtf8(String value) { * the specified accessTimeMs is used to update the cache entry */ public final byte[] getUtf8(String value, long accessTimeMs) { - int valueHash = value.hashCode(); + int adjHash = CacheEntry.adjHash(value); CacheEntry[] localEntries = this.edenEntries; long lookupTimeMs = this.accessTimeMs; - int matchingLocalIndex = lookupEntry(localEntries, valueHash, value, lookupTimeMs); + int matchingLocalIndex = lookupEntryIndex(localEntries, adjHash, value, lookupTimeMs); if (matchingLocalIndex != -1) { CacheEntry localEntry = localEntries[matchingLocalIndex]; @@ -181,8 +186,8 @@ public final byte[] getUtf8(String value, long accessTimeMs) { // mark promoted first - to avoid racy insertions this.promotions += 1; - boolean evicted = lruInsert(this.promotedEntries, localEntry); - if (evicted) this.promotedEvictions += 1; + boolean evicted = lruInsert(this.tenuredEntries, localEntry); + if (evicted) this.tenuredEvictions += 1; localEntries[matchingLocalIndex] = null; } @@ -191,8 +196,8 @@ public final byte[] getUtf8(String value, long accessTimeMs) { return localEntry.utf8(); } - CacheEntry[] promotedEntries = this.promotedEntries; - int matchingPromotedIndex = lookupEntry(promotedEntries, valueHash, value, lookupTimeMs); + CacheEntry[] promotedEntries = this.tenuredEntries; + int matchingPromotedIndex = lookupEntryIndex(promotedEntries, adjHash, value, lookupTimeMs); if (matchingPromotedIndex != -1) { CacheEntry promotedEntry = promotedEntries[matchingPromotedIndex]; @@ -202,19 +207,19 @@ public final byte[] getUtf8(String value, long accessTimeMs) { return promotedEntry.utf8(); } - boolean wasMarked = reverseMark(this.edenMarkers, valueHash); + boolean wasMarked = mark(this.edenMarkers, adjHash); // If slot isn't marked, this is likely the first request // Don't create an entry yet if (!wasMarked) return CacheEntry.utf8(value); - CacheEntry newEntry = new CacheEntry(valueHash, value); + CacheEntry newEntry = new CacheEntry(adjHash, value); // First request was swallowed by marking, so double hit newEntry.hit(lookupTimeMs); newEntry.hit(lookupTimeMs); // search for empty slot or failing that the MFU entry - int localMfuIndex = findFirstAvailableOrMfuIndex(localEntries, valueHash); + int localMfuIndex = findFirstAvailableOrMfuIndex(localEntries, adjHash); CacheEntry localMfuEntry = localEntries[localMfuIndex]; // Found an empty slot - fill it @@ -225,7 +230,7 @@ public final byte[] getUtf8(String value, long accessTimeMs) { // See if we can early promote the local MFU entry into the global cache // Early promotion doesn't evict from the global cache - int globalAvailableIndex = findAvailable(promotedEntries, localMfuEntry.valueHash()); + int globalAvailableIndex = findAvailableIndex(promotedEntries, localMfuEntry.adjHash()); if (globalAvailableIndex != -1) { promotedEntries[globalAvailableIndex] = localMfuEntry; this.earlyPromotions += 1; @@ -237,66 +242,68 @@ public final byte[] getUtf8(String value, long accessTimeMs) { // No empty slot - or space to promote into the global cache // Insert into local cache while evicting the LFU boolean evicted = lfuInsert(localEntries, newEntry); - if (evicted) this.promotedEvictions += 1; + if (evicted) this.tenuredEvictions += 1; return newEntry.utf8(); } - static final int findAvailable(CacheEntry[] entries, int newValueHash) { - int initialBucketIndex = initialBucketIndex(entries, newValueHash); + static final int findAvailableIndex(CacheEntry[] entries, int newAdjHash) { + int initialBucketIndex = initialBucketIndex(entries, newAdjHash); for (int probe = 0, index = initialBucketIndex; probe < MAX_PROBES; ++probe, ++index) { if (index >= entries.length) index = 0; CacheEntry entry = entries[index]; - if (entry == null || entry.hits() == 0) return index; + if (entry == null || entry.isPurgeable()) return index; } return -1; } - static final int findFirstAvailableOrMfuIndex(CacheEntry[] entries, int newValueHash) { - double mfuHits = Double.MIN_VALUE; + static final int findFirstAvailableOrMfuIndex(CacheEntry[] entries, int newAdjHash) { + double mfuScore = Double.MIN_VALUE; int mfuIndex = -1; - int initialBucketIndex = initialBucketIndex(entries, newValueHash); + int initialBucketIndex = initialBucketIndex(entries, newAdjHash); for (int probe = 0, index = initialBucketIndex; probe < MAX_PROBES; ++probe, ++index) { if (index >= entries.length) index = 0; CacheEntry entry = entries[index]; if (entry == null) return index; - double hits = entry.hits(); - if (hits > mfuHits) { - mfuHits = hits; + double score = entry.score(); + if (score > mfuScore) { + mfuScore = score; mfuIndex = index; } } return mfuIndex; } - static final boolean reverseMark(boolean[] marks, int newValueHash) { - int index = initialBucketIndex(marks, newValueHash); - boolean wasMarked = marks[index]; - marks[index] = !wasMarked; - return wasMarked; + static final boolean mark(int[] marks, int newAdjHash) { + int index = initialBucketIndex(marks, newAdjHash); + + int priorMarkHash = marks[index]; + marks[index] = newAdjHash; + + return (priorMarkHash == newAdjHash); } static final boolean lfuInsert(CacheEntry[] entries, CacheEntry newEntry) { - int initialBucketIndex = initialBucketIndex(entries, newEntry.valueHash()); + int initialBucketIndex = initialBucketIndex(entries, newEntry.adjHash()); // initial scan to see if there's an empty slot or marker entry is already present - double lowestHits = Double.MAX_VALUE; + double lowestScore = Double.MAX_VALUE; int lfuIndex = -1; for (int probe = 0, index = initialBucketIndex; probe < MAX_PROBES; ++probe, ++index) { if (index >= entries.length) index = 0; CacheEntry entry = entries[index]; - if (entry == null || entry.hits() == 0) { + if (entry == null || entry.isPurgeable()) { entries[index] = newEntry; return false; } else { - double hits = entry.hits(); - if (hits < lowestHits) { - lowestHits = hits; + double score = entry.score(); + if (score < lowestScore) { + lowestScore = score; lfuIndex = index; } } @@ -308,7 +315,7 @@ static final boolean lfuInsert(CacheEntry[] entries, CacheEntry newEntry) { } static final boolean lruInsert(CacheEntry[] entries, CacheEntry newEntry) { - int initialBucketIndex = initialBucketIndex(entries, newEntry.valueHash()); + int initialBucketIndex = initialBucketIndex(entries, newEntry.adjHash()); // initial scan to see if there's an empty slot or entry is already present long lowestUsedMs = Long.MAX_VALUE; @@ -336,61 +343,57 @@ static final boolean lruInsert(CacheEntry[] entries, CacheEntry newEntry) { return true; } - static final int initialBucketIndex(CacheEntry[] entries, int valueHash) { - return valueHash & (entries.length - 1); + static final int initialBucketIndex(CacheEntry[] entries, int adjHash) { + return adjHash & (entries.length - 1); } - static final int initialBucketIndex(boolean[] marks, int valueHash) { - return valueHash & (marks.length - 1); + static final int initialBucketIndex(int[] marks, int adjHash) { + return adjHash & (marks.length - 1); } - static final int lookupEntry( - CacheEntry[] entries, int valueHash, String value, long lookupTimeMs) { - int initialBucketIndex = initialBucketIndex(entries, valueHash); + static final int lookupEntryIndex( + CacheEntry[] entries, int adjHash, String value, long lookupTimeMs) { + int initialBucketIndex = initialBucketIndex(entries, adjHash); for (int probe = 0, index = initialBucketIndex; probe < MAX_PROBES; ++probe, ++index) { if (index >= entries.length) index = 0; CacheEntry entry = entries[index]; - if (entry != null && entry.matches(valueHash, value)) { + if (entry != null && entry.matches(adjHash, value)) { return index; } } return -1; } - static final int bucketHash(int tagHash, int valueHash) { - return tagHash + 31 * valueHash; - } - static final class CacheEntry { - final int valueHash; + final int adjHash; final String value; final byte[] valueUtf8; boolean promoted = false; long lastUsedMs = 0; - double hitCount = 0; + double score = 0; - public CacheEntry(int valueHash, String value) { - this.valueHash = valueHash; + public CacheEntry(int adjHash, String value) { + this.adjHash = adjHash; this.value = value; this.valueUtf8 = utf8(value); } boolean matches(CacheEntry thatEntry) { - return (this == thatEntry) || this.matches(thatEntry.valueHash, thatEntry.value); + return (this == thatEntry) || this.matches(thatEntry.adjHash, thatEntry.value); } - boolean matches(int valueHash, String value) { - return (this.valueHash == valueHash) && value.equals(this.value); + boolean matches(int adjHash, String value) { + return (this.adjHash == adjHash) && value.equals(this.value); } - int valueHash() { - return this.valueHash; + int adjHash() { + return this.adjHash; } - double hits() { - return this.hitCount; + double score() { + return this.score; } long lastUsedMs() { @@ -403,15 +406,24 @@ byte[] utf8() { double hit(long lastUsedMs) { this.lastUsedMs = lastUsedMs; - this.hitCount += 1; + this.score += 1; - return this.hitCount; + return this.score; } boolean decay() { - this.hitCount *= HIT_DECAY; + this.score *= SCORE_DECAY; - return (this.hitCount < PURGE_THRESHOLD); + return this.isPurgeable(); + } + + boolean isPurgeable() { + return (this.score < PURGE_THRESHOLD); + } + + static final int adjHash(String value) { + int hash = value.hashCode(); + return (hash == 0) ? 0xDA7AD06 : hash; } static final byte[] utf8(String value) { @@ -420,11 +432,7 @@ static final byte[] utf8(String value) { @Override public String toString() { - if (this.value == null) { - return "marker"; - } else { - return this.value + " - hits: " + this.hitCount + " used (ms): " + this.lastUsedMs; - } + return this.value + " - score: " + this.score + " used (ms): " + this.lastUsedMs; } } } diff --git a/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/SimpleUtf8Cache.java b/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/SimpleUtf8Cache.java index bb5a1779f3e..e8164ff0391 100644 --- a/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/SimpleUtf8Cache.java +++ b/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/SimpleUtf8Cache.java @@ -44,13 +44,11 @@ * a LFU: least frequently used eviction policy is used to free up a slot. */ public final class SimpleUtf8Cache implements EncodingCache { - public static final SimpleUtf8Cache INSTANCE = new SimpleUtf8Cache(); + private static final int MAX_PROBES = 4; - private static final int MAX_PROBES = 8; + private final int SIZE = 64; - private final int SIZE = 256; - - private final boolean[] markers = new boolean[SIZE]; + private final int[] markers = new int[SIZE]; private final CacheEntry[] entries = new CacheEntry[SIZE]; private static final double HIT_DECAY = 0.8D; @@ -59,7 +57,13 @@ public final class SimpleUtf8Cache implements EncodingCache { protected int hits = 0; protected int evictions = 0; - public void recalibrate() { + /** + * Recalibrates the cache + * Applies a decay to existing entries - and purges entries below the PURGE_THRESHOLD + * + * While still racy this method is synchronized to avoid simultaneous recalibrations + */ + public synchronized void recalibrate() { CacheEntry[] thisEntries = this.entries; for (int i = 0; i < thisEntries.length; ++i) { CacheEntry entry = thisEntries[i]; @@ -69,7 +73,7 @@ public void recalibrate() { if (purge) thisEntries[i] = null; } - Arrays.fill(this.markers, false); + Arrays.fill(this.markers, 0); } @Override @@ -86,18 +90,20 @@ public byte[] encode(CharSequence charSeq) { public final byte[] getUtf8(String value) { CacheEntry[] thisEntries = this.entries; - int valueHash = value.hashCode(); + int adjHash = value.hashCode(); - CacheEntry matchingEntry = lookupEntry(thisEntries, valueHash, value); + CacheEntry matchingEntry = lookupEntry(thisEntries, adjHash, value); if (matchingEntry != null) { + matchingEntry.hit(); + this.hits += 1; return matchingEntry.utf8(); } - boolean wasMarked = reverseMark(this.markers, valueHash); + boolean wasMarked = mark(this.markers, adjHash); if (!wasMarked) return CacheEntry.utf8(value); - CacheEntry newEntry = new CacheEntry(valueHash, value); + CacheEntry newEntry = new CacheEntry(adjHash, value); newEntry.hit(); boolean evicted = lfuInsert(thisEntries, newEntry); @@ -106,13 +112,13 @@ public final byte[] getUtf8(String value) { return newEntry.utf8(); } - static final CacheEntry lookupEntry(CacheEntry[] entries, int valueHash, String value) { - int initialBucketIndex = initialBucketIndex(entries, valueHash); + static final CacheEntry lookupEntry(CacheEntry[] entries, int adjHash, String value) { + int initialBucketIndex = initialBucketIndex(entries, adjHash); for (int probe = 0, index = initialBucketIndex; probe < MAX_PROBES; ++probe, ++index) { if (index >= entries.length) index = 0; CacheEntry entry = entries[index]; - if (entry != null && entry.matches(valueHash, value)) { + if (entry != null && entry.matches(adjHash, value)) { return entry; } } @@ -120,7 +126,7 @@ static final CacheEntry lookupEntry(CacheEntry[] entries, int valueHash, String } static final boolean lfuInsert(CacheEntry[] entries, CacheEntry newEntry) { - int initialBucketIndex = initialBucketIndex(entries, newEntry.valueHash()); + int initialBucketIndex = initialBucketIndex(entries, newEntry.adjHash()); // initial scan to see if there's an empty slot or marker entry is already present double lowestHits = Double.MAX_VALUE; @@ -129,11 +135,11 @@ static final boolean lfuInsert(CacheEntry[] entries, CacheEntry newEntry) { if (index >= entries.length) index = 0; CacheEntry entry = entries[index]; - if (entry == null || entry.hits() == 0) { + if (entry == null || entry.score() == 0) { entries[index] = newEntry; return false; } else { - double hits = entry.hits(); + double hits = entry.score(); if (hits < lowestHits) { lowestHits = hits; lfuIndex = index; @@ -146,49 +152,51 @@ static final boolean lfuInsert(CacheEntry[] entries, CacheEntry newEntry) { return true; } - static final int initialBucketIndex(CacheEntry[] entries, int valueHash) { - return valueHash & (entries.length - 1); + static final int initialBucketIndex(CacheEntry[] entries, int adjHash) { + return adjHash & (entries.length - 1); } - static final int initialBucketIndex(boolean[] marks, int valueHash) { - return valueHash & (marks.length - 1); + static final int initialBucketIndex(int[] marks, int adjHash) { + return adjHash & (marks.length - 1); } - static final boolean reverseMark(boolean[] marks, int newValueHash) { - int index = initialBucketIndex(marks, newValueHash); - boolean wasMarked = marks[index]; - marks[index] = !wasMarked; - return wasMarked; + static final boolean mark(int[] marks, int newAdjHash) { + int index = initialBucketIndex(marks, newAdjHash); + + int priorMarkHash = marks[index]; + marks[index] = newAdjHash; + + return (priorMarkHash == newAdjHash); } static final class CacheEntry { - final int valueHash; + final int adjHash; final String value; final byte[] valueUtf8; boolean promoted = false; - double hitCount = 0; + double score = 0; - public CacheEntry(int valueHash, String value) { - this.valueHash = valueHash; + public CacheEntry(int adjHash, String value) { + this.adjHash = adjHash; this.value = value; this.valueUtf8 = utf8(value); } boolean matches(CacheEntry thatEntry) { - return (this == thatEntry) || this.matches(thatEntry.valueHash, thatEntry.value); + return (this == thatEntry) || this.matches(thatEntry.adjHash, thatEntry.value); } - boolean matches(int valueHash, String value) { - return (this.valueHash == valueHash) && value.equals(this.value); + boolean matches(int adjHash, String value) { + return (this.adjHash == adjHash) && value.equals(this.value); } - int valueHash() { - return this.valueHash; + int adjHash() { + return this.adjHash; } - double hits() { - return this.hitCount; + double score() { + return this.score; } byte[] utf8() { @@ -196,15 +204,20 @@ byte[] utf8() { } double hit() { - this.hitCount += 1; + this.score += 1; - return this.hitCount; + return this.score; } boolean decay() { - this.hitCount *= HIT_DECAY; + this.score *= HIT_DECAY; - return (this.hitCount < PURGE_THRESHOLD); + return (this.score < PURGE_THRESHOLD); + } + + static final int adjHash(String value) { + int hash = value.hashCode(); + return (hash == 0) ? 0xDA7AD06 : hash; } static final byte[] utf8(String value) { @@ -213,11 +226,7 @@ static final byte[] utf8(String value) { @Override public String toString() { - if (this.value == null) { - return "marker"; - } else { - return this.value + " - hits: " + this.hitCount; - } + return this.value + " - score: " + this.score; } } } diff --git a/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/TraceMapperV0_4.java b/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/TraceMapperV0_4.java index eb7005c9ebc..475fe0dee8d 100644 --- a/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/TraceMapperV0_4.java +++ b/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/TraceMapperV0_4.java @@ -63,8 +63,8 @@ MetaWriter forLastSpanInChunk(final boolean lastSpanInChunk) { @Override public void accept(Metadata metadata) { - if ( TAG_CACHE != null ) TAG_CACHE.recalibrate(); - if ( VALUE_CACHE != null ) VALUE_CACHE.recalibrate(); + if (TAG_CACHE != null) TAG_CACHE.recalibrate(); + if (VALUE_CACHE != null) VALUE_CACHE.recalibrate(); final boolean writeSamplingPriority = firstSpanInChunk || lastSpanInChunk; final UTF8BytesString processTags = diff --git a/dd-trace-core/src/test/java/datadog/trace/common/writer/ddagent/GenerationalUtf8CacheTest.java b/dd-trace-core/src/test/java/datadog/trace/common/writer/ddagent/GenerationalUtf8CacheTest.java index fefb85949de..91c2838f83b 100644 --- a/dd-trace-core/src/test/java/datadog/trace/common/writer/ddagent/GenerationalUtf8CacheTest.java +++ b/dd-trace-core/src/test/java/datadog/trace/common/writer/ddagent/GenerationalUtf8CacheTest.java @@ -151,6 +151,6 @@ static final void printStats(GenerationalUtf8Cache cache) { cache.promotions, cache.earlyPromotions, cache.edenEvictions, - cache.promotedEvictions); + cache.tenuredEvictions); } } diff --git a/internal-api/src/main/java/datadog/trace/api/Config.java b/internal-api/src/main/java/datadog/trace/api/Config.java index 369a34926bf..702cdfcc346 100644 --- a/internal-api/src/main/java/datadog/trace/api/Config.java +++ b/internal-api/src/main/java/datadog/trace/api/Config.java @@ -2735,7 +2735,7 @@ PROFILING_DATADOG_PROFILER_ENABLED, isDatadogProfilerSafeInCurrentEnvironment()) this.optimizedMapEnabled = configProvider.getBoolean(GeneralConfig.OPTIMIZED_MAP_ENABLED, false); - this.utf8CacheEnabled = configProvider.getBoolean(GeneralConfig.UTF8_CACHE_ENABLED, false); + this.utf8CacheEnabled = configProvider.getBoolean(GeneralConfig.UTF8_CACHE_ENABLED, true); int defaultStackTraceLengthLimit = instrumenterConfig.isCiVisibilityEnabled() From 01aa2846257c05acab77ef35462718e1a47ab983 Mon Sep 17 00:00:00 2001 From: Douglas Q Hawkins Date: Tue, 2 Sep 2025 08:42:51 -0400 Subject: [PATCH 09/23] Tweaking the cache heuristics - altered marking strategy to use a bloom filter of previously requested values, once a new entry hits the filter the filter is reset to zero - tweaking cache sizes --- .../writer/ddagent/GenerationalUtf8Cache.java | 77 +++++++++++-------- .../writer/ddagent/SimpleUtf8Cache.java | 13 +++- 2 files changed, 54 insertions(+), 36 deletions(-) diff --git a/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/GenerationalUtf8Cache.java b/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/GenerationalUtf8Cache.java index 37397399f28..ad4aafc6fbb 100644 --- a/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/GenerationalUtf8Cache.java +++ b/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/GenerationalUtf8Cache.java @@ -1,7 +1,6 @@ package datadog.trace.common.writer.ddagent; import datadog.communication.serialization.EncodingCache; - import java.nio.charset.StandardCharsets; import java.util.Arrays; @@ -65,10 +64,10 @@ * provide better cache utilization. */ public final class GenerationalUtf8Cache implements EncodingCache { - private static final int MAX_PROBES = 4; + private static final int MAX_PROBES = 8; private static final int MIN_PROMOTION_TRESHOLD = 2; - private static final int INITIAL_PROMOTION_THRESHOLD = 10; + private static final int INITIAL_PROMOTION_THRESHOLD = 16; private static final double SCORE_DECAY = 0.8D; private static final double PURGE_THRESHOLD = 0.1D; @@ -93,8 +92,8 @@ public GenerationalUtf8Cache() { this.accessTimeMs = System.currentTimeMillis(); // These sizes must be powers of 2 - this.edenEntries = new CacheEntry[64]; - this.edenMarkers = new int[64]; + this.edenEntries = new CacheEntry[128]; + this.edenMarkers = new int[128]; // The size must be a power of 2 this.tenuredEntries = new CacheEntry[128]; @@ -115,28 +114,37 @@ public synchronized void recalibrate() { } /** - * Recalibrates the cache - * Applies a decay to existing entries - and purges entries below the PURGE_THRESHOLD - * - * Adjusts the promotion threshold depending on ratio of promotions to - * evictions, since prior recalibration - * - * While still racy this method is synchronized to avoid simultaneous recalibrations + * Recalibrates the cache Applies a decay to existing entries - and purges entries below the + * PURGE_THRESHOLD + * + *

Adjusts the promotion threshold depending on ratio of promotions to evictions, since prior + * recalibration + * + *

While still racy this method is synchronized to avoid simultaneous recalibrations */ public void recalibrate(long accessTimeMs) { this.accessTimeMs = accessTimeMs; - CacheEntry[] thisEntries = this.edenEntries; - for (int i = 0; i < thisEntries.length; ++i) { - CacheEntry entry = thisEntries[i]; + CacheEntry[] edenEntries = this.edenEntries; + for (int i = 0; i < edenEntries.length; ++i) { + CacheEntry entry = edenEntries[i]; if (entry == null) continue; boolean purge = entry.decay(); - if (purge) this.edenEntries[i] = null; + if (purge) edenEntries[i] = null; } Arrays.fill(this.edenMarkers, 0); + CacheEntry[] tenuredEntries = this.tenuredEntries; + for (int i = 0; i < tenuredEntries.length; ++i) { + CacheEntry entry = tenuredEntries[i]; + if (entry == null) continue; + + boolean purge = entry.decay(); + if (purge) tenuredEntries[i] = null; + } + int totalPromotions = this.promotions + this.earlyPromotions; if (totalPromotions == 0 && this.promotionThreshold >= MIN_PROMOTION_TRESHOLD) { this.promotionThreshold /= PROMOTION_THRESHOLD_ADJ_FACTOR; @@ -174,26 +182,26 @@ public final byte[] getUtf8(String value) { public final byte[] getUtf8(String value, long accessTimeMs) { int adjHash = CacheEntry.adjHash(value); - CacheEntry[] localEntries = this.edenEntries; + CacheEntry[] edenEntries = this.edenEntries; long lookupTimeMs = this.accessTimeMs; - int matchingLocalIndex = lookupEntryIndex(localEntries, adjHash, value, lookupTimeMs); - if (matchingLocalIndex != -1) { - CacheEntry localEntry = localEntries[matchingLocalIndex]; + int matchingEdenIndex = lookupEntryIndex(edenEntries, adjHash, value, lookupTimeMs); + if (matchingEdenIndex != -1) { + CacheEntry edenEntry = edenEntries[matchingEdenIndex]; - double hits = localEntry.hit(lookupTimeMs); + double hits = edenEntry.hit(lookupTimeMs); if (hits > this.promotionThreshold) { // mark promoted first - to avoid racy insertions this.promotions += 1; - boolean evicted = lruInsert(this.tenuredEntries, localEntry); + boolean evicted = lruInsert(this.tenuredEntries, edenEntry); if (evicted) this.tenuredEvictions += 1; - localEntries[matchingLocalIndex] = null; + edenEntries[matchingEdenIndex] = null; } this.edenHits += 1; - return localEntry.utf8(); + return edenEntry.utf8(); } CacheEntry[] promotedEntries = this.tenuredEntries; @@ -219,12 +227,12 @@ public final byte[] getUtf8(String value, long accessTimeMs) { newEntry.hit(lookupTimeMs); // search for empty slot or failing that the MFU entry - int localMfuIndex = findFirstAvailableOrMfuIndex(localEntries, adjHash); - CacheEntry localMfuEntry = localEntries[localMfuIndex]; + int localMfuIndex = findFirstAvailableOrMfuIndex(edenEntries, adjHash); + CacheEntry localMfuEntry = edenEntries[localMfuIndex]; // Found an empty slot - fill it if (localMfuEntry == null) { - localEntries[localMfuIndex] = newEntry; + edenEntries[localMfuIndex] = newEntry; return newEntry.utf8(); } @@ -235,14 +243,14 @@ public final byte[] getUtf8(String value, long accessTimeMs) { promotedEntries[globalAvailableIndex] = localMfuEntry; this.earlyPromotions += 1; - localEntries[localMfuIndex] = newEntry; + edenEntries[localMfuIndex] = newEntry; return CacheEntry.utf8(value); } // No empty slot - or space to promote into the global cache // Insert into local cache while evicting the LFU - boolean evicted = lfuInsert(localEntries, newEntry); - if (evicted) this.tenuredEvictions += 1; + boolean evicted = lfuInsert(edenEntries, newEntry); + if (evicted) this.edenEvictions += 1; return newEntry.utf8(); } @@ -282,9 +290,14 @@ static final boolean mark(int[] marks, int newAdjHash) { int index = initialBucketIndex(marks, newAdjHash); int priorMarkHash = marks[index]; - marks[index] = newAdjHash; - return (priorMarkHash == newAdjHash); + boolean match = ((priorMarkHash & newAdjHash) == newAdjHash); + if (match) { + marks[index] = 0; + } else { + marks[index] = priorMarkHash | newAdjHash; + } + return match; } static final boolean lfuInsert(CacheEntry[] entries, CacheEntry newEntry) { diff --git a/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/SimpleUtf8Cache.java b/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/SimpleUtf8Cache.java index e8164ff0391..2af25140553 100644 --- a/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/SimpleUtf8Cache.java +++ b/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/SimpleUtf8Cache.java @@ -46,7 +46,7 @@ public final class SimpleUtf8Cache implements EncodingCache { private static final int MAX_PROBES = 4; - private final int SIZE = 64; + private final int SIZE = 128; private final int[] markers = new int[SIZE]; private final CacheEntry[] entries = new CacheEntry[SIZE]; @@ -75,7 +75,7 @@ public synchronized void recalibrate() { Arrays.fill(this.markers, 0); } - + @Override public byte[] encode(CharSequence charSeq) { if (charSeq instanceof String) { @@ -164,9 +164,14 @@ static final boolean mark(int[] marks, int newAdjHash) { int index = initialBucketIndex(marks, newAdjHash); int priorMarkHash = marks[index]; - marks[index] = newAdjHash; - return (priorMarkHash == newAdjHash); + boolean match = ((priorMarkHash & newAdjHash) == newAdjHash); + if (match) { + marks[index] = 0; + } else { + marks[index] = priorMarkHash | newAdjHash; + } + return match; } static final class CacheEntry { From f15e1cc1b090803a0acabb4136a752077987ca13 Mon Sep 17 00:00:00 2001 From: Douglas Q Hawkins Date: Tue, 2 Sep 2025 08:51:46 -0400 Subject: [PATCH 10/23] spotless --- .../writer/ddagent/GenerationalUtf8Cache.java | 12 +++++++++- .../writer/ddagent/SimpleUtf8Cache.java | 22 ++++++++++++++----- 2 files changed, 27 insertions(+), 7 deletions(-) diff --git a/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/GenerationalUtf8Cache.java b/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/GenerationalUtf8Cache.java index ad4aafc6fbb..08043a8644c 100644 --- a/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/GenerationalUtf8Cache.java +++ b/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/GenerationalUtf8Cache.java @@ -289,8 +289,18 @@ static final int findFirstAvailableOrMfuIndex(CacheEntry[] entries, int newAdjHa static final boolean mark(int[] marks, int newAdjHash) { int index = initialBucketIndex(marks, newAdjHash); - int priorMarkHash = marks[index]; + // This is the 4th iteration of the marking strategy + // First version - used a mark entry, but that would prematurely + // burn a slot in the cache + // Second version - used a mark boolean, that worked well, but + // was a overly permissive in allowing the next request to the same slot + // to immediately create a CacheEntry + // Third version - used a mark hash that to match exactly, + // that could lead to racy fights over the cache line + // So this version is a hybrid of 2nd & 3rd, using a bloom filter + // that effectively degenerates to a boolean + int priorMarkHash = marks[index]; boolean match = ((priorMarkHash & newAdjHash) == newAdjHash); if (match) { marks[index] = 0; diff --git a/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/SimpleUtf8Cache.java b/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/SimpleUtf8Cache.java index 2af25140553..effa14d1959 100644 --- a/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/SimpleUtf8Cache.java +++ b/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/SimpleUtf8Cache.java @@ -58,10 +58,10 @@ public final class SimpleUtf8Cache implements EncodingCache { protected int evictions = 0; /** - * Recalibrates the cache - * Applies a decay to existing entries - and purges entries below the PURGE_THRESHOLD - * - * While still racy this method is synchronized to avoid simultaneous recalibrations + * Recalibrates the cache Applies a decay to existing entries - and purges entries below the + * PURGE_THRESHOLD + * + *

While still racy this method is synchronized to avoid simultaneous recalibrations */ public synchronized void recalibrate() { CacheEntry[] thisEntries = this.entries; @@ -75,7 +75,7 @@ public synchronized void recalibrate() { Arrays.fill(this.markers, 0); } - + @Override public byte[] encode(CharSequence charSeq) { if (charSeq instanceof String) { @@ -163,8 +163,18 @@ static final int initialBucketIndex(int[] marks, int adjHash) { static final boolean mark(int[] marks, int newAdjHash) { int index = initialBucketIndex(marks, newAdjHash); - int priorMarkHash = marks[index]; + // This is the 4th iteration of the marking strategy + // First version - used a mark entry, but that would prematurely + // burn a slot in the cache + // Second version - used a mark boolean, that worked well, but + // was a overly permissive in allowing the next request to the same slot + // to immediately create a CacheEntry + // Third version - used a mark hash that to match exactly, + // that could lead to racy fights over the cache line + // So this version is a hybrid of 2nd & 3rd, using a bloom filter + // that effectively degenerates to a boolean + int priorMarkHash = marks[index]; boolean match = ((priorMarkHash & newAdjHash) == newAdjHash); if (match) { marks[index] = 0; From f509c0ae510c2f3fc13587d8a49ebfef7ad211e3 Mon Sep 17 00:00:00 2001 From: Douglas Q Hawkins Date: Tue, 2 Sep 2025 14:30:31 -0400 Subject: [PATCH 11/23] Clean-up & tweaking - clean-up based on review feedback - making naming consistent - some vestiges of prior names for second level cache updated - tweaked generational cache to check tenured entries first - --- .../writer/ddagent/GenerationalUtf8Cache.java | 52 +++++++++---------- .../writer/ddagent/SimpleUtf8Cache.java | 20 ++++--- .../ddagent/GenerationalUtf8CacheTest.java | 6 +-- 3 files changed, 39 insertions(+), 39 deletions(-) diff --git a/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/GenerationalUtf8Cache.java b/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/GenerationalUtf8Cache.java index 08043a8644c..6558b107db4 100644 --- a/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/GenerationalUtf8Cache.java +++ b/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/GenerationalUtf8Cache.java @@ -82,7 +82,7 @@ public final class GenerationalUtf8Cache implements EncodingCache { private double promotionThreshold = INITIAL_PROMOTION_THRESHOLD; int edenHits = 0; - int promotedHits = 0; + int tenuredHits = 0; int earlyPromotions = 0; int promotions = 0; int edenEvictions = 0; @@ -153,7 +153,7 @@ public void recalibrate(long accessTimeMs) { } this.edenHits = 0; - this.promotedHits = 0; + this.tenuredHits = 0; this.earlyPromotions = 0; this.promotions = 0; this.edenEvictions = 0; @@ -181,10 +181,20 @@ public final byte[] getUtf8(String value) { */ public final byte[] getUtf8(String value, long accessTimeMs) { int adjHash = CacheEntry.adjHash(value); - - CacheEntry[] edenEntries = this.edenEntries; long lookupTimeMs = this.accessTimeMs; + CacheEntry[] tenuredEntries = this.tenuredEntries; + int matchingTenuredIndex = lookupEntryIndex(tenuredEntries, adjHash, value, lookupTimeMs); + if (matchingTenuredIndex != -1) { + CacheEntry tenuredEntry = tenuredEntries[matchingTenuredIndex]; + + tenuredEntry.hit(lookupTimeMs); + + this.tenuredHits += 1; + return tenuredEntry.utf8(); + } + + CacheEntry[] edenEntries = this.edenEntries; int matchingEdenIndex = lookupEntryIndex(edenEntries, adjHash, value, lookupTimeMs); if (matchingEdenIndex != -1) { CacheEntry edenEntry = edenEntries[matchingEdenIndex]; @@ -204,17 +214,6 @@ public final byte[] getUtf8(String value, long accessTimeMs) { return edenEntry.utf8(); } - CacheEntry[] promotedEntries = this.tenuredEntries; - int matchingPromotedIndex = lookupEntryIndex(promotedEntries, adjHash, value, lookupTimeMs); - if (matchingPromotedIndex != -1) { - CacheEntry promotedEntry = promotedEntries[matchingPromotedIndex]; - - promotedEntry.hit(lookupTimeMs); - - this.promotedHits += 1; - return promotedEntry.utf8(); - } - boolean wasMarked = mark(this.edenMarkers, adjHash); // If slot isn't marked, this is likely the first request @@ -238,9 +237,9 @@ public final byte[] getUtf8(String value, long accessTimeMs) { // See if we can early promote the local MFU entry into the global cache // Early promotion doesn't evict from the global cache - int globalAvailableIndex = findAvailableIndex(promotedEntries, localMfuEntry.adjHash()); - if (globalAvailableIndex != -1) { - promotedEntries[globalAvailableIndex] = localMfuEntry; + int tenuredAvailableIndex = findAvailableIndex(tenuredEntries, localMfuEntry.adjHash()); + if (tenuredAvailableIndex != -1) { + tenuredEntries[tenuredAvailableIndex] = localMfuEntry; this.earlyPromotions += 1; edenEntries[localMfuIndex] = newEntry; @@ -347,18 +346,15 @@ static final boolean lruInsert(CacheEntry[] entries, CacheEntry newEntry) { if (index >= entries.length) index = 0; CacheEntry entry = entries[index]; - if (entry == null) { - entries[index] = newEntry; - return false; - } else if (entry.matches(newEntry)) { + if (entry == null || entry.matches(newEntry)) { entries[index] = newEntry; return false; - } else { - long lastUsedMs = entry.lastUsedMs(); - if (lastUsedMs < lowestUsedMs) { - lowestUsedMs = lastUsedMs; - lruIndex = index; - } + } + + long lastUsedMs = entry.lastUsedMs(); + if (lastUsedMs < lowestUsedMs) { + lowestUsedMs = lastUsedMs; + lruIndex = index; } } diff --git a/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/SimpleUtf8Cache.java b/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/SimpleUtf8Cache.java index effa14d1959..fb1ad8bb7c7 100644 --- a/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/SimpleUtf8Cache.java +++ b/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/SimpleUtf8Cache.java @@ -135,15 +135,15 @@ static final boolean lfuInsert(CacheEntry[] entries, CacheEntry newEntry) { if (index >= entries.length) index = 0; CacheEntry entry = entries[index]; - if (entry == null || entry.score() == 0) { + if (entry == null || entry.isPurgeable()) { entries[index] = newEntry; return false; - } else { - double hits = entry.score(); - if (hits < lowestHits) { - lowestHits = hits; - lfuIndex = index; - } + } + + double hits = entry.score(); + if (hits < lowestHits) { + lowestHits = hits; + lfuIndex = index; } } @@ -227,12 +227,16 @@ byte[] utf8() { boolean decay() { this.score *= HIT_DECAY; + return this.isPurgeable(); + } + + boolean isPurgeable() { return (this.score < PURGE_THRESHOLD); } static final int adjHash(String value) { int hash = value.hashCode(); - return (hash == 0) ? 0xDA7AD06 : hash; + return (hash == 0) ? 0xDA7AD06 : hash ^ (hash >>> 16); } static final byte[] utf8(String value) { diff --git a/dd-trace-core/src/test/java/datadog/trace/common/writer/ddagent/GenerationalUtf8CacheTest.java b/dd-trace-core/src/test/java/datadog/trace/common/writer/ddagent/GenerationalUtf8CacheTest.java index 91c2838f83b..cac6e1dce1a 100644 --- a/dd-trace-core/src/test/java/datadog/trace/common/writer/ddagent/GenerationalUtf8CacheTest.java +++ b/dd-trace-core/src/test/java/datadog/trace/common/writer/ddagent/GenerationalUtf8CacheTest.java @@ -76,7 +76,7 @@ public void promotion() { assertSame(cached, second); } - assertNotEquals(0, cache.promotedHits); + assertNotEquals(0, cache.tenuredHits); } @Test @@ -101,7 +101,7 @@ public void fuzz() { } edenHits += cache.edenHits; - promotedHits += cache.promotedHits; + promotedHits += cache.tenuredHits; printStats(cache); } @@ -147,7 +147,7 @@ static final void printStats(GenerationalUtf8Cache cache) { System.out.printf( "eden hits: %5d\tpromotion hits: %5d\tpromotions: %5d\tearly: %5d\tlocal evictions: %5d\tglobal evictions: %5d%n", cache.edenHits, - cache.promotedHits, + cache.tenuredHits, cache.promotions, cache.earlyPromotions, cache.edenEvictions, From db8239474f7d7ca805377a3af6eeb20e4dde08bb Mon Sep 17 00:00:00 2001 From: Douglas Q Hawkins Date: Tue, 2 Sep 2025 16:30:43 -0400 Subject: [PATCH 12/23] Tweaking settings to be good at multiple memory levels - switching generational cache to use different probe lengths for eden vs tenured generation - these settings are neutral or better throughput wise for petclinic for 64m, 80m, 96m, and 128m heaps --- .../writer/ddagent/GenerationalUtf8Cache.java | 41 ++++++++++--------- .../writer/ddagent/SimpleUtf8Cache.java | 4 +- 2 files changed, 23 insertions(+), 22 deletions(-) diff --git a/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/GenerationalUtf8Cache.java b/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/GenerationalUtf8Cache.java index 6558b107db4..a9803168e71 100644 --- a/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/GenerationalUtf8Cache.java +++ b/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/GenerationalUtf8Cache.java @@ -64,13 +64,14 @@ * provide better cache utilization. */ public final class GenerationalUtf8Cache implements EncodingCache { - private static final int MAX_PROBES = 8; + private static final int MAX_EDEN_PROBES = 4; + private static final int MAX_TENURED_PROBES = 8; private static final int MIN_PROMOTION_TRESHOLD = 2; private static final int INITIAL_PROMOTION_THRESHOLD = 16; - private static final double SCORE_DECAY = 0.8D; - private static final double PURGE_THRESHOLD = 0.1D; + private static final double SCORE_DECAY = 0.5D; + private static final double PURGE_THRESHOLD = 0.25D; private static final double PROMOTION_THRESHOLD_ADJ_FACTOR = 1.5; private final CacheEntry[] edenEntries; @@ -92,8 +93,8 @@ public GenerationalUtf8Cache() { this.accessTimeMs = System.currentTimeMillis(); // These sizes must be powers of 2 - this.edenEntries = new CacheEntry[128]; - this.edenMarkers = new int[128]; + this.edenEntries = new CacheEntry[64]; + this.edenMarkers = new int[64]; // The size must be a power of 2 this.tenuredEntries = new CacheEntry[128]; @@ -195,7 +196,7 @@ public final byte[] getUtf8(String value, long accessTimeMs) { } CacheEntry[] edenEntries = this.edenEntries; - int matchingEdenIndex = lookupEntryIndex(edenEntries, adjHash, value, lookupTimeMs); + int matchingEdenIndex = lookupEntryIndex(edenEntries, MAX_EDEN_PROBES, adjHash, value, lookupTimeMs); if (matchingEdenIndex != -1) { CacheEntry edenEntry = edenEntries[matchingEdenIndex]; @@ -204,7 +205,7 @@ public final byte[] getUtf8(String value, long accessTimeMs) { // mark promoted first - to avoid racy insertions this.promotions += 1; - boolean evicted = lruInsert(this.tenuredEntries, edenEntry); + boolean evicted = lruInsert(this.tenuredEntries, MAX_TENURED_PROBES, edenEntry); if (evicted) this.tenuredEvictions += 1; edenEntries[matchingEdenIndex] = null; @@ -226,7 +227,7 @@ public final byte[] getUtf8(String value, long accessTimeMs) { newEntry.hit(lookupTimeMs); // search for empty slot or failing that the MFU entry - int localMfuIndex = findFirstAvailableOrMfuIndex(edenEntries, adjHash); + int localMfuIndex = findFirstAvailableOrMfuIndex(edenEntries, MAX_EDEN_PROBES, adjHash); CacheEntry localMfuEntry = edenEntries[localMfuIndex]; // Found an empty slot - fill it @@ -237,7 +238,7 @@ public final byte[] getUtf8(String value, long accessTimeMs) { // See if we can early promote the local MFU entry into the global cache // Early promotion doesn't evict from the global cache - int tenuredAvailableIndex = findAvailableIndex(tenuredEntries, localMfuEntry.adjHash()); + int tenuredAvailableIndex = findAvailableIndex(tenuredEntries, MAX_TENURED_PROBES, localMfuEntry.adjHash()); if (tenuredAvailableIndex != -1) { tenuredEntries[tenuredAvailableIndex] = localMfuEntry; this.earlyPromotions += 1; @@ -248,15 +249,15 @@ public final byte[] getUtf8(String value, long accessTimeMs) { // No empty slot - or space to promote into the global cache // Insert into local cache while evicting the LFU - boolean evicted = lfuInsert(edenEntries, newEntry); + boolean evicted = lfuInsert(edenEntries, MAX_EDEN_PROBES, newEntry); if (evicted) this.edenEvictions += 1; return newEntry.utf8(); } - static final int findAvailableIndex(CacheEntry[] entries, int newAdjHash) { + static final int findAvailableIndex(CacheEntry[] entries, int numProbes, int newAdjHash) { int initialBucketIndex = initialBucketIndex(entries, newAdjHash); - for (int probe = 0, index = initialBucketIndex; probe < MAX_PROBES; ++probe, ++index) { + for (int probe = 0, index = initialBucketIndex; probe < numProbes; ++probe, ++index) { if (index >= entries.length) index = 0; CacheEntry entry = entries[index]; @@ -265,12 +266,12 @@ static final int findAvailableIndex(CacheEntry[] entries, int newAdjHash) { return -1; } - static final int findFirstAvailableOrMfuIndex(CacheEntry[] entries, int newAdjHash) { + static final int findFirstAvailableOrMfuIndex(CacheEntry[] entries, int numProbes, int newAdjHash) { double mfuScore = Double.MIN_VALUE; int mfuIndex = -1; int initialBucketIndex = initialBucketIndex(entries, newAdjHash); - for (int probe = 0, index = initialBucketIndex; probe < MAX_PROBES; ++probe, ++index) { + for (int probe = 0, index = initialBucketIndex; probe < numProbes; ++probe, ++index) { if (index >= entries.length) index = 0; CacheEntry entry = entries[index]; @@ -309,13 +310,13 @@ static final boolean mark(int[] marks, int newAdjHash) { return match; } - static final boolean lfuInsert(CacheEntry[] entries, CacheEntry newEntry) { + static final boolean lfuInsert(CacheEntry[] entries, int numProbes, CacheEntry newEntry) { int initialBucketIndex = initialBucketIndex(entries, newEntry.adjHash()); // initial scan to see if there's an empty slot or marker entry is already present double lowestScore = Double.MAX_VALUE; int lfuIndex = -1; - for (int probe = 0, index = initialBucketIndex; probe < MAX_PROBES; ++probe, ++index) { + for (int probe = 0, index = initialBucketIndex; probe < numProbes; ++probe, ++index) { if (index >= entries.length) index = 0; CacheEntry entry = entries[index]; @@ -336,13 +337,13 @@ static final boolean lfuInsert(CacheEntry[] entries, CacheEntry newEntry) { return true; } - static final boolean lruInsert(CacheEntry[] entries, CacheEntry newEntry) { + static final boolean lruInsert(CacheEntry[] entries, int numProbes, CacheEntry newEntry) { int initialBucketIndex = initialBucketIndex(entries, newEntry.adjHash()); // initial scan to see if there's an empty slot or entry is already present long lowestUsedMs = Long.MAX_VALUE; int lruIndex = -1; - for (int probe = 0, index = initialBucketIndex; probe < MAX_PROBES; ++probe, ++index) { + for (int probe = 0, index = initialBucketIndex; probe < numProbes; ++probe, ++index) { if (index >= entries.length) index = 0; CacheEntry entry = entries[index]; @@ -371,9 +372,9 @@ static final int initialBucketIndex(int[] marks, int adjHash) { } static final int lookupEntryIndex( - CacheEntry[] entries, int adjHash, String value, long lookupTimeMs) { + CacheEntry[] entries, int numProbes, int adjHash, String value, long lookupTimeMs) { int initialBucketIndex = initialBucketIndex(entries, adjHash); - for (int probe = 0, index = initialBucketIndex; probe < MAX_PROBES; ++probe, ++index) { + for (int probe = 0, index = initialBucketIndex; probe < numProbes; ++probe, ++index) { if (index >= entries.length) index = 0; CacheEntry entry = entries[index]; diff --git a/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/SimpleUtf8Cache.java b/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/SimpleUtf8Cache.java index fb1ad8bb7c7..97e89bfae31 100644 --- a/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/SimpleUtf8Cache.java +++ b/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/SimpleUtf8Cache.java @@ -46,12 +46,12 @@ public final class SimpleUtf8Cache implements EncodingCache { private static final int MAX_PROBES = 4; - private final int SIZE = 128; + private final int SIZE = 64; private final int[] markers = new int[SIZE]; private final CacheEntry[] entries = new CacheEntry[SIZE]; - private static final double HIT_DECAY = 0.8D; + private static final double HIT_DECAY = 0.5D; private static final double PURGE_THRESHOLD = 0.25D; protected int hits = 0; From 41d059d246bea3a2916cefc37a6517c7394104a3 Mon Sep 17 00:00:00 2001 From: Douglas Q Hawkins Date: Wed, 3 Sep 2025 12:16:57 -0400 Subject: [PATCH 13/23] Fixing oversight from marking change Should be using adjHash not value.hashCode --- .../datadog/trace/common/writer/ddagent/SimpleUtf8Cache.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/SimpleUtf8Cache.java b/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/SimpleUtf8Cache.java index 97e89bfae31..a97ff496466 100644 --- a/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/SimpleUtf8Cache.java +++ b/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/SimpleUtf8Cache.java @@ -90,7 +90,7 @@ public byte[] encode(CharSequence charSeq) { public final byte[] getUtf8(String value) { CacheEntry[] thisEntries = this.entries; - int adjHash = value.hashCode(); + int adjHash = CacheEntry.adjHash(value); CacheEntry matchingEntry = lookupEntry(thisEntries, adjHash, value); if (matchingEntry != null) { From 3b69e62ff809ee00dd176f523d80e48aba041d88 Mon Sep 17 00:00:00 2001 From: Douglas Q Hawkins Date: Wed, 3 Sep 2025 12:17:42 -0400 Subject: [PATCH 14/23] Fixing bug introduced with different probes lengths for eden & tenured --- .../trace/common/writer/ddagent/GenerationalUtf8Cache.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/GenerationalUtf8Cache.java b/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/GenerationalUtf8Cache.java index a9803168e71..beeb7f698e3 100644 --- a/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/GenerationalUtf8Cache.java +++ b/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/GenerationalUtf8Cache.java @@ -185,7 +185,7 @@ public final byte[] getUtf8(String value, long accessTimeMs) { long lookupTimeMs = this.accessTimeMs; CacheEntry[] tenuredEntries = this.tenuredEntries; - int matchingTenuredIndex = lookupEntryIndex(tenuredEntries, adjHash, value, lookupTimeMs); + int matchingTenuredIndex = lookupEntryIndex(tenuredEntries, MAX_TENURED_PROBES, adjHash, value, lookupTimeMs); if (matchingTenuredIndex != -1) { CacheEntry tenuredEntry = tenuredEntries[matchingTenuredIndex]; From 4102a26af6072b98c0fe843579a70ce21af5d76f Mon Sep 17 00:00:00 2001 From: Douglas Q Hawkins Date: Wed, 3 Sep 2025 13:08:39 -0400 Subject: [PATCH 15/23] More clean-up - more explanatory comments - more naming updates: local -> eden --- .../writer/ddagent/GenerationalUtf8Cache.java | 34 +++++++++++++------ .../writer/ddagent/SimpleUtf8Cache.java | 7 +++- 2 files changed, 29 insertions(+), 12 deletions(-) diff --git a/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/GenerationalUtf8Cache.java b/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/GenerationalUtf8Cache.java index beeb7f698e3..5f8c82cbccb 100644 --- a/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/GenerationalUtf8Cache.java +++ b/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/GenerationalUtf8Cache.java @@ -185,7 +185,8 @@ public final byte[] getUtf8(String value, long accessTimeMs) { long lookupTimeMs = this.accessTimeMs; CacheEntry[] tenuredEntries = this.tenuredEntries; - int matchingTenuredIndex = lookupEntryIndex(tenuredEntries, MAX_TENURED_PROBES, adjHash, value, lookupTimeMs); + int matchingTenuredIndex = + lookupEntryIndex(tenuredEntries, MAX_TENURED_PROBES, adjHash, value, lookupTimeMs); if (matchingTenuredIndex != -1) { CacheEntry tenuredEntry = tenuredEntries[matchingTenuredIndex]; @@ -196,7 +197,8 @@ public final byte[] getUtf8(String value, long accessTimeMs) { } CacheEntry[] edenEntries = this.edenEntries; - int matchingEdenIndex = lookupEntryIndex(edenEntries, MAX_EDEN_PROBES, adjHash, value, lookupTimeMs); + int matchingEdenIndex = + lookupEntryIndex(edenEntries, MAX_EDEN_PROBES, adjHash, value, lookupTimeMs); if (matchingEdenIndex != -1) { CacheEntry edenEntry = edenEntries[matchingEdenIndex]; @@ -227,23 +229,27 @@ public final byte[] getUtf8(String value, long accessTimeMs) { newEntry.hit(lookupTimeMs); // search for empty slot or failing that the MFU entry - int localMfuIndex = findFirstAvailableOrMfuIndex(edenEntries, MAX_EDEN_PROBES, adjHash); - CacheEntry localMfuEntry = edenEntries[localMfuIndex]; + int edenMfuIndex = findFirstAvailableOrMfuIndex(edenEntries, MAX_EDEN_PROBES, adjHash); + CacheEntry edenMfuEntry = edenEntries[edenMfuIndex]; // Found an empty slot - fill it - if (localMfuEntry == null) { - edenEntries[localMfuIndex] = newEntry; + if (edenMfuEntry == null) { + edenEntries[edenMfuIndex] = newEntry; return newEntry.utf8(); } // See if we can early promote the local MFU entry into the global cache // Early promotion doesn't evict from the global cache - int tenuredAvailableIndex = findAvailableIndex(tenuredEntries, MAX_TENURED_PROBES, localMfuEntry.adjHash()); + + // NOTE: Need to make sure to use hash of the entry being promoted, + // since it may differ from the requested hash + int tenuredAvailableIndex = + findAvailableIndex(tenuredEntries, MAX_TENURED_PROBES, edenMfuEntry.adjHash()); if (tenuredAvailableIndex != -1) { - tenuredEntries[tenuredAvailableIndex] = localMfuEntry; + tenuredEntries[tenuredAvailableIndex] = edenMfuEntry; this.earlyPromotions += 1; - edenEntries[localMfuIndex] = newEntry; + edenEntries[edenMfuIndex] = newEntry; return CacheEntry.utf8(value); } @@ -266,7 +272,8 @@ static final int findAvailableIndex(CacheEntry[] entries, int numProbes, int new return -1; } - static final int findFirstAvailableOrMfuIndex(CacheEntry[] entries, int numProbes, int newAdjHash) { + static final int findFirstAvailableOrMfuIndex( + CacheEntry[] entries, int numProbes, int newAdjHash) { double mfuScore = Double.MIN_VALUE; int mfuIndex = -1; @@ -296,10 +303,15 @@ static final boolean mark(int[] marks, int newAdjHash) { // was a overly permissive in allowing the next request to the same slot // to immediately create a CacheEntry // Third version - used a mark hash that to match exactly, - // that could lead to racy fights over the cache line + // that could lead to access order fights over the cache slot // So this version is a hybrid of 2nd & 3rd, using a bloom filter // that effectively degenerates to a boolean + // This approach provides a nice balance when there's an A-B-A access pattern + // The first A will mark the slot + // Then B will mark the slot with A | B + // Then either A or B can claim and reset the slot + int priorMarkHash = marks[index]; boolean match = ((priorMarkHash & newAdjHash) == newAdjHash); if (match) { diff --git a/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/SimpleUtf8Cache.java b/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/SimpleUtf8Cache.java index a97ff496466..5f3749ee416 100644 --- a/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/SimpleUtf8Cache.java +++ b/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/SimpleUtf8Cache.java @@ -170,10 +170,15 @@ static final boolean mark(int[] marks, int newAdjHash) { // was a overly permissive in allowing the next request to the same slot // to immediately create a CacheEntry // Third version - used a mark hash that to match exactly, - // that could lead to racy fights over the cache line + // that could lead to access order fights over the cache slot // So this version is a hybrid of 2nd & 3rd, using a bloom filter // that effectively degenerates to a boolean + // This approach provides a nice balance when there's an A-B-A access pattern + // The first A will mark the slot + // Then B will mark the slot with A | B + // Then either A or B can claim and reset the slot + int priorMarkHash = marks[index]; boolean match = ((priorMarkHash & newAdjHash) == newAdjHash); if (match) { From 9b78df752b9ecb30489982b2738e36a4efa2eaa8 Mon Sep 17 00:00:00 2001 From: Douglas Q Hawkins Date: Wed, 3 Sep 2025 14:38:03 -0400 Subject: [PATCH 16/23] Misc fixes - adding protections against storing large strings in cache - fixed errant use of CacheEntry.utf8(String) instead of entry.utf8() - removed unnecessary lookupTimeMs variable --- .../writer/ddagent/GenerationalUtf8Cache.java | 15 +++++++++------ .../common/writer/ddagent/SimpleUtf8Cache.java | 4 ++++ 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/GenerationalUtf8Cache.java b/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/GenerationalUtf8Cache.java index 5f8c82cbccb..acdb2545382 100644 --- a/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/GenerationalUtf8Cache.java +++ b/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/GenerationalUtf8Cache.java @@ -1,6 +1,7 @@ package datadog.trace.common.writer.ddagent; import datadog.communication.serialization.EncodingCache; +import datadog.trace.common.writer.ddagent.SimpleUtf8Cache.CacheEntry; import java.nio.charset.StandardCharsets; import java.util.Arrays; @@ -74,6 +75,8 @@ public final class GenerationalUtf8Cache implements EncodingCache { private static final double PURGE_THRESHOLD = 0.25D; private static final double PROMOTION_THRESHOLD_ADJ_FACTOR = 1.5; + private static final int MAX_ENTRY_LEN = 256; + private final CacheEntry[] edenEntries; private final int[] edenMarkers; @@ -181,12 +184,13 @@ public final byte[] getUtf8(String value) { * the specified accessTimeMs is used to update the cache entry */ public final byte[] getUtf8(String value, long accessTimeMs) { + if (value.length() > MAX_ENTRY_LEN) return CacheEntry.utf8(value); + int adjHash = CacheEntry.adjHash(value); long lookupTimeMs = this.accessTimeMs; CacheEntry[] tenuredEntries = this.tenuredEntries; - int matchingTenuredIndex = - lookupEntryIndex(tenuredEntries, MAX_TENURED_PROBES, adjHash, value, lookupTimeMs); + int matchingTenuredIndex = lookupEntryIndex(tenuredEntries, MAX_TENURED_PROBES, adjHash, value); if (matchingTenuredIndex != -1) { CacheEntry tenuredEntry = tenuredEntries[matchingTenuredIndex]; @@ -197,8 +201,7 @@ public final byte[] getUtf8(String value, long accessTimeMs) { } CacheEntry[] edenEntries = this.edenEntries; - int matchingEdenIndex = - lookupEntryIndex(edenEntries, MAX_EDEN_PROBES, adjHash, value, lookupTimeMs); + int matchingEdenIndex = lookupEntryIndex(edenEntries, MAX_EDEN_PROBES, adjHash, value); if (matchingEdenIndex != -1) { CacheEntry edenEntry = edenEntries[matchingEdenIndex]; @@ -250,7 +253,7 @@ public final byte[] getUtf8(String value, long accessTimeMs) { this.earlyPromotions += 1; edenEntries[edenMfuIndex] = newEntry; - return CacheEntry.utf8(value); + return newEntry.utf8(); } // No empty slot - or space to promote into the global cache @@ -384,7 +387,7 @@ static final int initialBucketIndex(int[] marks, int adjHash) { } static final int lookupEntryIndex( - CacheEntry[] entries, int numProbes, int adjHash, String value, long lookupTimeMs) { + CacheEntry[] entries, int numProbes, int adjHash, String value) { int initialBucketIndex = initialBucketIndex(entries, adjHash); for (int probe = 0, index = initialBucketIndex; probe < numProbes; ++probe, ++index) { if (index >= entries.length) index = 0; diff --git a/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/SimpleUtf8Cache.java b/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/SimpleUtf8Cache.java index 5f3749ee416..04c4768fc2c 100644 --- a/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/SimpleUtf8Cache.java +++ b/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/SimpleUtf8Cache.java @@ -54,6 +54,8 @@ public final class SimpleUtf8Cache implements EncodingCache { private static final double HIT_DECAY = 0.5D; private static final double PURGE_THRESHOLD = 0.25D; + private static final int MAX_ENTRY_LEN = 128; + protected int hits = 0; protected int evictions = 0; @@ -88,6 +90,8 @@ public byte[] encode(CharSequence charSeq) { /** Returns the UTF-8 encoding of value -- using a cache value if available */ public final byte[] getUtf8(String value) { + if (value.length() > MAX_ENTRY_LEN) return CacheEntry.utf8(value); + CacheEntry[] thisEntries = this.entries; int adjHash = CacheEntry.adjHash(value); From 3c33c38a9fd8c047a0773ee70dd3807361ade6d1 Mon Sep 17 00:00:00 2001 From: Douglas Q Hawkins Date: Wed, 3 Sep 2025 14:42:55 -0400 Subject: [PATCH 17/23] Fixing benchmarks brought over from standalone prototype --- .../common/writer/ddagent/Utf8Benchmark.java | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/dd-trace-core/src/jmh/java/datadog/trace/common/writer/ddagent/Utf8Benchmark.java b/dd-trace-core/src/jmh/java/datadog/trace/common/writer/ddagent/Utf8Benchmark.java index 85c452efe00..b087c65af90 100644 --- a/dd-trace-core/src/jmh/java/datadog/trace/common/writer/ddagent/Utf8Benchmark.java +++ b/dd-trace-core/src/jmh/java/datadog/trace/common/writer/ddagent/Utf8Benchmark.java @@ -1,8 +1,5 @@ package datadog.trace.common.writer.ddagent; -import datadog.utf8.GeneratedTagUtf8Cache; -import datadog.utf8.GenerationalUtf8Cache; -import datadog.utf8.SimpleUtf8Cache; import java.nio.charset.StandardCharsets; import java.util.concurrent.ThreadLocalRandom; import org.openjdk.jmh.annotations.Benchmark; @@ -10,6 +7,9 @@ import org.openjdk.jmh.annotations.Mode; import org.openjdk.jmh.infra.Blackhole; +import datadog.trace.common.writer.ddagent.GenerationalUtf8Cache; +import datadog.trace.common.writer.ddagent.SimpleUtf8CacheTest; + /** * This benchmark isn't really intended to used to measure throughput, but rather to be used with * "-prof gc" to check bytes / op. @@ -79,16 +79,6 @@ public static final byte[] tagUtf8_nocache() { return tag.getBytes(StandardCharsets.UTF_8); } - @Benchmark - public static final byte[] tagUtf8_w_generatedCache() { - String tag = nextTag(); - - byte[] cache = GeneratedTagUtf8Cache.lookup(tag); - if (cache != null) return cache; - - return tag.getBytes(StandardCharsets.UTF_8); - } - static final SimpleUtf8Cache TAG_CACHE = new SimpleUtf8Cache(); @Benchmark From 0b9f0d0aea3ecdfbc35299e6a11b6e20ab755d0c Mon Sep 17 00:00:00 2001 From: Douglas Q Hawkins Date: Wed, 3 Sep 2025 16:20:27 -0400 Subject: [PATCH 18/23] test & benchmark clean-up Added tests to verify that big strings are not cached --- .../common/writer/ddagent/Utf8Benchmark.java | 3 -- .../ddagent/GenerationalUtf8CacheTest.java | 30 ++++++++++++++++++- .../writer/ddagent/SimpleUtf8CacheTest.java | 29 +++++++++++++++++- 3 files changed, 57 insertions(+), 5 deletions(-) diff --git a/dd-trace-core/src/jmh/java/datadog/trace/common/writer/ddagent/Utf8Benchmark.java b/dd-trace-core/src/jmh/java/datadog/trace/common/writer/ddagent/Utf8Benchmark.java index b087c65af90..f74d57f8336 100644 --- a/dd-trace-core/src/jmh/java/datadog/trace/common/writer/ddagent/Utf8Benchmark.java +++ b/dd-trace-core/src/jmh/java/datadog/trace/common/writer/ddagent/Utf8Benchmark.java @@ -7,9 +7,6 @@ import org.openjdk.jmh.annotations.Mode; import org.openjdk.jmh.infra.Blackhole; -import datadog.trace.common.writer.ddagent.GenerationalUtf8Cache; -import datadog.trace.common.writer.ddagent.SimpleUtf8CacheTest; - /** * This benchmark isn't really intended to used to measure throughput, but rather to be used with * "-prof gc" to check bytes / op. diff --git a/dd-trace-core/src/test/java/datadog/trace/common/writer/ddagent/GenerationalUtf8CacheTest.java b/dd-trace-core/src/test/java/datadog/trace/common/writer/ddagent/GenerationalUtf8CacheTest.java index cac6e1dce1a..65a470ce915 100644 --- a/dd-trace-core/src/test/java/datadog/trace/common/writer/ddagent/GenerationalUtf8CacheTest.java +++ b/dd-trace-core/src/test/java/datadog/trace/common/writer/ddagent/GenerationalUtf8CacheTest.java @@ -1,9 +1,10 @@ package datadog.trace.common.writer.ddagent; import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotEquals; import static org.junit.Assert.assertNotSame; import static org.junit.Assert.assertSame; -import static org.junit.jupiter.api.Assertions.assertNotEquals; import java.nio.charset.StandardCharsets; import java.util.Random; @@ -110,6 +111,33 @@ public void fuzz() { assertNotEquals(0, promotedHits); } + @Test + public void bigString_dont_cache() { + String lorem = "Lorem ipsum dolor sit amet"; + while (lorem.length() < 500) { + lorem += lorem; + } + byte[] expected = lorem.getBytes(StandardCharsets.UTF_8); + + GenerationalUtf8Cache cache = new GenerationalUtf8Cache(); + byte[] first = cache.getUtf8(lorem); + assertArrayEquals(expected, first); + + byte[] second = cache.getUtf8(lorem); + assertArrayEquals(expected, second); + assertNotSame(first, second); + + for (int i = 0; i < 10; ++i) { + byte[] result = cache.getUtf8(lorem); + assertArrayEquals(expected, result); + + assertNotSame(first, result); + assertNotSame(second, result); + } + assertEquals(0, cache.edenHits); + assertEquals(0, cache.tenuredHits); + } + static final String[] TAGS = {"foo", "bar", "baz"}; static final String[] BASE_STRINGS = {"Hello", "world", "foo", "bar", "baz", "quux"}; diff --git a/dd-trace-core/src/test/java/datadog/trace/common/writer/ddagent/SimpleUtf8CacheTest.java b/dd-trace-core/src/test/java/datadog/trace/common/writer/ddagent/SimpleUtf8CacheTest.java index 6aeff655a3a..8785252fef8 100644 --- a/dd-trace-core/src/test/java/datadog/trace/common/writer/ddagent/SimpleUtf8CacheTest.java +++ b/dd-trace-core/src/test/java/datadog/trace/common/writer/ddagent/SimpleUtf8CacheTest.java @@ -1,9 +1,10 @@ package datadog.trace.common.writer.ddagent; import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotEquals; import static org.junit.Assert.assertNotSame; import static org.junit.Assert.assertSame; -import static org.junit.jupiter.api.Assertions.assertNotEquals; import java.nio.charset.StandardCharsets; import java.util.Random; @@ -75,6 +76,32 @@ public void fuzz() { assertNotEquals(0, hits); } + @Test + public void bigString_dont_cache() { + String lorem = "Lorem ipsum dolor sit amet"; + while (lorem.length() < 100) { + lorem += lorem; + } + byte[] expected = lorem.getBytes(StandardCharsets.UTF_8); + + SimpleUtf8Cache cache = new SimpleUtf8Cache(); + byte[] first = cache.getUtf8(lorem); + assertArrayEquals(expected, first); + + byte[] second = cache.getUtf8(lorem); + assertArrayEquals(expected, second); + assertNotSame(first, second); + + for (int i = 0; i < 10; ++i) { + byte[] result = cache.getUtf8(lorem); + assertArrayEquals(expected, result); + + assertNotSame(first, result); + assertNotSame(second, result); + } + assertEquals(0, cache.hits); + } + static final String[] TAGS = {"foo", "bar", "baz"}; static final String[] BASE_STRINGS = {"Hello", "world", "foo", "bar", "baz", "quux"}; From bdc1859909c375ce2f6a147e2e9f4d44d6a7c678 Mon Sep 17 00:00:00 2001 From: Douglas Q Hawkins Date: Wed, 3 Sep 2025 16:27:31 -0400 Subject: [PATCH 19/23] Added some explanatory comments --- .../datadog/trace/common/writer/ddagent/Utf8Benchmark.java | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/dd-trace-core/src/jmh/java/datadog/trace/common/writer/ddagent/Utf8Benchmark.java b/dd-trace-core/src/jmh/java/datadog/trace/common/writer/ddagent/Utf8Benchmark.java index f74d57f8336..438822a7858 100644 --- a/dd-trace-core/src/jmh/java/datadog/trace/common/writer/ddagent/Utf8Benchmark.java +++ b/dd-trace-core/src/jmh/java/datadog/trace/common/writer/ddagent/Utf8Benchmark.java @@ -12,7 +12,10 @@ * "-prof gc" to check bytes / op. * *

Since {@link String#getBytes(java.nio.charset.Charset)} is intrinsified the caches typically - * perform worse throughput wise, the benefit of the caches is to reduce allocation. + * perform worse throughput wise, the benefit of the caches is to reduce allocation. * Intention of + * this benchmark is to create data that roughly resembles what might be seen in a trace payload. + * Tag names are quite static, tag values are mostly low cardinality, but some tag values have + * infinite cardinality. */ @BenchmarkMode(Mode.Throughput) public class Utf8Benchmark { From 6ab19b08739d19ea352363022901669259d75709 Mon Sep 17 00:00:00 2001 From: Douglas Q Hawkins Date: Thu, 4 Sep 2025 09:06:55 -0400 Subject: [PATCH 20/23] Update dd-trace-core/src/jmh/java/datadog/trace/common/writer/ddagent/Utf8Benchmark.java Co-authored-by: Brice Dutheil --- .../java/datadog/trace/common/writer/ddagent/Utf8Benchmark.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dd-trace-core/src/jmh/java/datadog/trace/common/writer/ddagent/Utf8Benchmark.java b/dd-trace-core/src/jmh/java/datadog/trace/common/writer/ddagent/Utf8Benchmark.java index 438822a7858..4177e37ebd0 100644 --- a/dd-trace-core/src/jmh/java/datadog/trace/common/writer/ddagent/Utf8Benchmark.java +++ b/dd-trace-core/src/jmh/java/datadog/trace/common/writer/ddagent/Utf8Benchmark.java @@ -12,7 +12,7 @@ * "-prof gc" to check bytes / op. * *

Since {@link String#getBytes(java.nio.charset.Charset)} is intrinsified the caches typically - * perform worse throughput wise, the benefit of the caches is to reduce allocation. * Intention of + * perform worse throughput wise, the benefit of the caches is to reduce allocation. Intention of * this benchmark is to create data that roughly resembles what might be seen in a trace payload. * Tag names are quite static, tag values are mostly low cardinality, but some tag values have * infinite cardinality. From 49100cbd782d16170fd437184e3fed075a655caa Mon Sep 17 00:00:00 2001 From: Douglas Q Hawkins Date: Thu, 4 Sep 2025 12:39:46 -0400 Subject: [PATCH 21/23] Making cache more configurable & clean-up - added ability to configure cache size - for both tag names & values - factored shared code into Caching static utility class - added tests for Caching class & size determination logic --- .../trace/api/config/GeneralConfig.java | 3 +- .../trace/common/writer/ddagent/Caching.java | 81 +++++++++++ .../writer/ddagent/GenerationalUtf8Cache.java | 128 ++++++++---------- .../writer/ddagent/SimpleUtf8Cache.java | 70 +++------- .../writer/ddagent/TraceMapperV0_4.java | 8 +- .../common/writer/ddagent/CachingTest.java | 45 ++++++ .../ddagent/GenerationalUtf8CacheTest.java | 44 +++++- .../writer/ddagent/SimpleUtf8CacheTest.java | 25 +++- .../main/java/datadog/trace/api/Config.java | 16 ++- 9 files changed, 281 insertions(+), 139 deletions(-) create mode 100644 dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/Caching.java create mode 100644 dd-trace-core/src/test/java/datadog/trace/common/writer/ddagent/CachingTest.java diff --git a/dd-trace-api/src/main/java/datadog/trace/api/config/GeneralConfig.java b/dd-trace-api/src/main/java/datadog/trace/api/config/GeneralConfig.java index ee24889233a..43c72885ef8 100644 --- a/dd-trace-api/src/main/java/datadog/trace/api/config/GeneralConfig.java +++ b/dd-trace-api/src/main/java/datadog/trace/api/config/GeneralConfig.java @@ -105,7 +105,8 @@ public final class GeneralConfig { public static final String JDK_SOCKET_ENABLED = "jdk.socket.enabled"; public static final String OPTIMIZED_MAP_ENABLED = "optimized.map.enabled"; - public static final String UTF8_CACHE_ENABLED = "utf8.cache.enabled"; + public static final String TAG_NAME_UTF8_CACHE_SIZE = "tag.name.utf8.cache.size"; + public static final String TAG_VALUE_UTF8_CACHE_SIZE = "tag.value.utf8.cache.size"; public static final String STACK_TRACE_LENGTH_LIMIT = "stack.trace.length.limit"; public static final String SSI_INJECTION_ENABLED = "injection.enabled"; diff --git a/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/Caching.java b/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/Caching.java new file mode 100644 index 00000000000..bc61b037784 --- /dev/null +++ b/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/Caching.java @@ -0,0 +1,81 @@ +package datadog.trace.common.writer.ddagent; + +import java.util.Arrays; + +/** Some common static functions used by simple & generational caches */ +final class Caching { + private Caching() {} + + /** + * Provides the cache size that holds the requestedCapacity + * + * @param requestedCapacity > 0 + * @return size >= requestedCapacity + */ + static final int cacheSizeFor(int requestedCapacity) { + int pow; + for (pow = 1; pow < requestedCapacity; pow *= 2) ; + return pow; + } + + /** Provides an "adjusted" (e.g. non-zero) hash for the given String */ + static final int adjHash(String value) { + int hash = value.hashCode(); + return (hash == 0) ? 0xDA7AD06 : hash; + } + + /** Resets markers to zero */ + static final void reset(int[] marks) { + Arrays.fill(marks, 0); + } + + /** + * Changes the mark status of the corresponding slot in the marking array. If there was previously + * a matching mark, resets the slot to zero and returns true If there was previously a mismatching + * mark, updates the slot and returns false + * + *

A return value of true indicates that the requested value has likely been seen previously + * and cache entry should be created. + */ + static final boolean mark(int[] marks, int newAdjHash) { + int index = bucketIndex(marks, newAdjHash); + + // This is the 4th iteration of the marking strategy + // First version - used a mark entry, but that would prematurely + // burn a slot in the cache + // Second version - used a mark boolean, that worked well, but + // was a overly permissive in allowing the next request to the same slot + // to immediately create a CacheEntry + // Third version - used a mark hash that to match exactly, + // that could lead to access order fights over the cache slot + // So this version is a hybrid of 2nd & 3rd, using a bloom filter + // that effectively degenerates to a boolean + + // This approach provides a nice balance when there's an A-B-A access pattern + // The first A will mark the slot + // Then B will mark the slot with A | B + // Then either A or B can claim and reset the slot + + int priorMarkHash = marks[index]; + boolean match = ((priorMarkHash & newAdjHash) == newAdjHash); + if (match) { + marks[index] = 0; + } else { + marks[index] = priorMarkHash | newAdjHash; + } + return match; + } + + /** Provides the corresponding index into the marking array */ + static final int bucketIndex(int[] marks, int adjHash) { + return adjHash & (marks.length - 1); + } + + /** + * Provides the corresponding index into an entry array Assumes that array size was determined by + * using {@Caching#cacheSizeFor} + */ + static final int bucketIndex(E[] entries, int adjHash) { + return adjHash & (entries.length - 1); + } +} diff --git a/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/GenerationalUtf8Cache.java b/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/GenerationalUtf8Cache.java index acdb2545382..a488e49bc76 100644 --- a/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/GenerationalUtf8Cache.java +++ b/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/GenerationalUtf8Cache.java @@ -3,7 +3,6 @@ import datadog.communication.serialization.EncodingCache; import datadog.trace.common.writer.ddagent.SimpleUtf8Cache.CacheEntry; import java.nio.charset.StandardCharsets; -import java.util.Arrays; /** * 2-level generational cache of UTF8 values - primarily intended to be used for tag values @@ -65,6 +64,9 @@ * provide better cache utilization. */ public final class GenerationalUtf8Cache implements EncodingCache { + static final int MAX_EDEN_CAPACITY = 512; + static final int MAX_TENURED_CAPACITY = 1024; + private static final int MAX_EDEN_PROBES = 4; private static final int MAX_TENURED_PROBES = 8; @@ -75,6 +77,9 @@ public final class GenerationalUtf8Cache implements EncodingCache { private static final double PURGE_THRESHOLD = 0.25D; private static final double PROMOTION_THRESHOLD_ADJ_FACTOR = 1.5; + private static final double EDEN_PROPORTION = 1D / 3D; + private static final double TENURED_PROPORTION = 1 - EDEN_PROPORTION; + private static final int MAX_ENTRY_LEN = 256; private final CacheEntry[] edenEntries; @@ -92,15 +97,40 @@ public final class GenerationalUtf8Cache implements EncodingCache { int edenEvictions = 0; int tenuredEvictions = 0; - public GenerationalUtf8Cache() { + public GenerationalUtf8Cache(int capacity) { this.accessTimeMs = System.currentTimeMillis(); + int edenCapacity = (int) (capacity * EDEN_PROPORTION); + int edenSize = Caching.cacheSizeFor(Math.min(edenCapacity, MAX_EDEN_CAPACITY)); + // These sizes must be powers of 2 - this.edenEntries = new CacheEntry[64]; - this.edenMarkers = new int[64]; + this.edenEntries = new CacheEntry[edenSize]; + this.edenMarkers = new int[edenSize]; + + int tenuredCapacity = (int) (capacity * TENURED_PROPORTION); + int tenuredSize = Caching.cacheSizeFor(Math.min(tenuredCapacity, MAX_TENURED_CAPACITY)); // The size must be a power of 2 - this.tenuredEntries = new CacheEntry[128]; + this.tenuredEntries = new CacheEntry[tenuredSize]; + } + + public GenerationalUtf8Cache(int edenCapacity, int tenuredCapacity) { + this.accessTimeMs = System.currentTimeMillis(); + + int edenSize = Caching.cacheSizeFor(Math.min(tenuredCapacity, MAX_EDEN_CAPACITY)); + this.edenEntries = new CacheEntry[edenSize]; + this.edenMarkers = new int[edenSize]; + + int tenuredSize = Caching.cacheSizeFor(Math.min(tenuredCapacity, MAX_TENURED_CAPACITY)); + this.tenuredEntries = new CacheEntry[tenuredSize]; + } + + public int edenCapacity() { + return this.edenEntries.length; + } + + public int tenuredCapacity() { + return this.tenuredEntries.length; } /** Updates access time used @link {@link #getUtf8(String, String)} to the provided value */ @@ -129,25 +159,9 @@ public synchronized void recalibrate() { public void recalibrate(long accessTimeMs) { this.accessTimeMs = accessTimeMs; - CacheEntry[] edenEntries = this.edenEntries; - for (int i = 0; i < edenEntries.length; ++i) { - CacheEntry entry = edenEntries[i]; - if (entry == null) continue; - - boolean purge = entry.decay(); - if (purge) edenEntries[i] = null; - } - - Arrays.fill(this.edenMarkers, 0); - - CacheEntry[] tenuredEntries = this.tenuredEntries; - for (int i = 0; i < tenuredEntries.length; ++i) { - CacheEntry entry = tenuredEntries[i]; - if (entry == null) continue; - - boolean purge = entry.decay(); - if (purge) tenuredEntries[i] = null; - } + recalibrate(this.edenEntries); + Caching.reset(this.edenMarkers); + recalibrate(this.tenuredEntries); int totalPromotions = this.promotions + this.earlyPromotions; if (totalPromotions == 0 && this.promotionThreshold >= MIN_PROMOTION_TRESHOLD) { @@ -164,6 +178,16 @@ public void recalibrate(long accessTimeMs) { this.tenuredEvictions = 0; } + static final void recalibrate(CacheEntry[] entries) { + for (int i = 0; i < entries.length; ++i) { + CacheEntry entry = entries[i]; + if (entry == null) continue; + + boolean purge = entry.decay(); + if (purge) entries[i] = null; + } + } + @Override public byte[] encode(CharSequence charSeq) { if (charSeq instanceof String) { @@ -186,7 +210,7 @@ public final byte[] getUtf8(String value) { public final byte[] getUtf8(String value, long accessTimeMs) { if (value.length() > MAX_ENTRY_LEN) return CacheEntry.utf8(value); - int adjHash = CacheEntry.adjHash(value); + int adjHash = Caching.adjHash(value); long lookupTimeMs = this.accessTimeMs; CacheEntry[] tenuredEntries = this.tenuredEntries; @@ -220,7 +244,7 @@ public final byte[] getUtf8(String value, long accessTimeMs) { return edenEntry.utf8(); } - boolean wasMarked = mark(this.edenMarkers, adjHash); + boolean wasMarked = Caching.mark(this.edenMarkers, adjHash); // If slot isn't marked, this is likely the first request // Don't create an entry yet @@ -265,7 +289,7 @@ public final byte[] getUtf8(String value, long accessTimeMs) { } static final int findAvailableIndex(CacheEntry[] entries, int numProbes, int newAdjHash) { - int initialBucketIndex = initialBucketIndex(entries, newAdjHash); + int initialBucketIndex = Caching.bucketIndex(entries, newAdjHash); for (int probe = 0, index = initialBucketIndex; probe < numProbes; ++probe, ++index) { if (index >= entries.length) index = 0; @@ -280,7 +304,7 @@ static final int findFirstAvailableOrMfuIndex( double mfuScore = Double.MIN_VALUE; int mfuIndex = -1; - int initialBucketIndex = initialBucketIndex(entries, newAdjHash); + int initialBucketIndex = Caching.bucketIndex(entries, newAdjHash); for (int probe = 0, index = initialBucketIndex; probe < numProbes; ++probe, ++index) { if (index >= entries.length) index = 0; @@ -296,37 +320,8 @@ static final int findFirstAvailableOrMfuIndex( return mfuIndex; } - static final boolean mark(int[] marks, int newAdjHash) { - int index = initialBucketIndex(marks, newAdjHash); - - // This is the 4th iteration of the marking strategy - // First version - used a mark entry, but that would prematurely - // burn a slot in the cache - // Second version - used a mark boolean, that worked well, but - // was a overly permissive in allowing the next request to the same slot - // to immediately create a CacheEntry - // Third version - used a mark hash that to match exactly, - // that could lead to access order fights over the cache slot - // So this version is a hybrid of 2nd & 3rd, using a bloom filter - // that effectively degenerates to a boolean - - // This approach provides a nice balance when there's an A-B-A access pattern - // The first A will mark the slot - // Then B will mark the slot with A | B - // Then either A or B can claim and reset the slot - - int priorMarkHash = marks[index]; - boolean match = ((priorMarkHash & newAdjHash) == newAdjHash); - if (match) { - marks[index] = 0; - } else { - marks[index] = priorMarkHash | newAdjHash; - } - return match; - } - static final boolean lfuInsert(CacheEntry[] entries, int numProbes, CacheEntry newEntry) { - int initialBucketIndex = initialBucketIndex(entries, newEntry.adjHash()); + int initialBucketIndex = Caching.bucketIndex(entries, newEntry.adjHash()); // initial scan to see if there's an empty slot or marker entry is already present double lowestScore = Double.MAX_VALUE; @@ -353,7 +348,7 @@ static final boolean lfuInsert(CacheEntry[] entries, int numProbes, CacheEntry n } static final boolean lruInsert(CacheEntry[] entries, int numProbes, CacheEntry newEntry) { - int initialBucketIndex = initialBucketIndex(entries, newEntry.adjHash()); + int initialBucketIndex = Caching.bucketIndex(entries, newEntry.adjHash()); // initial scan to see if there's an empty slot or entry is already present long lowestUsedMs = Long.MAX_VALUE; @@ -378,17 +373,9 @@ static final boolean lruInsert(CacheEntry[] entries, int numProbes, CacheEntry n return true; } - static final int initialBucketIndex(CacheEntry[] entries, int adjHash) { - return adjHash & (entries.length - 1); - } - - static final int initialBucketIndex(int[] marks, int adjHash) { - return adjHash & (marks.length - 1); - } - static final int lookupEntryIndex( CacheEntry[] entries, int numProbes, int adjHash, String value) { - int initialBucketIndex = initialBucketIndex(entries, adjHash); + int initialBucketIndex = Caching.bucketIndex(entries, adjHash); for (int probe = 0, index = initialBucketIndex; probe < numProbes; ++probe, ++index) { if (index >= entries.length) index = 0; @@ -456,11 +443,6 @@ boolean isPurgeable() { return (this.score < PURGE_THRESHOLD); } - static final int adjHash(String value) { - int hash = value.hashCode(); - return (hash == 0) ? 0xDA7AD06 : hash; - } - static final byte[] utf8(String value) { return value.getBytes(StandardCharsets.UTF_8); } diff --git a/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/SimpleUtf8Cache.java b/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/SimpleUtf8Cache.java index 04c4768fc2c..56cb10594a6 100644 --- a/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/SimpleUtf8Cache.java +++ b/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/SimpleUtf8Cache.java @@ -2,7 +2,6 @@ import datadog.communication.serialization.EncodingCache; import java.nio.charset.StandardCharsets; -import java.util.Arrays; /** * A simple UTF8 cache - primarily intended for tag names @@ -44,12 +43,14 @@ * a LFU: least frequently used eviction policy is used to free up a slot. */ public final class SimpleUtf8Cache implements EncodingCache { + static final int MAX_CAPACITY = 1024; + private static final int MAX_PROBES = 4; private final int SIZE = 64; - private final int[] markers = new int[SIZE]; - private final CacheEntry[] entries = new CacheEntry[SIZE]; + private final int[] markers; + private final CacheEntry[] entries; private static final double HIT_DECAY = 0.5D; private static final double PURGE_THRESHOLD = 0.25D; @@ -59,6 +60,17 @@ public final class SimpleUtf8Cache implements EncodingCache { protected int hits = 0; protected int evictions = 0; + public SimpleUtf8Cache(int capacity) { + int size = Caching.cacheSizeFor(Math.min(capacity, MAX_CAPACITY)); + + this.markers = new int[size]; + this.entries = new CacheEntry[size]; + } + + public int capacity() { + return this.entries.length; + } + /** * Recalibrates the cache Applies a decay to existing entries - and purges entries below the * PURGE_THRESHOLD @@ -75,7 +87,7 @@ public synchronized void recalibrate() { if (purge) thisEntries[i] = null; } - Arrays.fill(this.markers, 0); + Caching.reset(this.markers); } @Override @@ -94,7 +106,7 @@ public final byte[] getUtf8(String value) { CacheEntry[] thisEntries = this.entries; - int adjHash = CacheEntry.adjHash(value); + int adjHash = Caching.adjHash(value); CacheEntry matchingEntry = lookupEntry(thisEntries, adjHash, value); if (matchingEntry != null) { @@ -104,7 +116,7 @@ public final byte[] getUtf8(String value) { return matchingEntry.utf8(); } - boolean wasMarked = mark(this.markers, adjHash); + boolean wasMarked = Caching.mark(this.markers, adjHash); if (!wasMarked) return CacheEntry.utf8(value); CacheEntry newEntry = new CacheEntry(adjHash, value); @@ -117,7 +129,7 @@ public final byte[] getUtf8(String value) { } static final CacheEntry lookupEntry(CacheEntry[] entries, int adjHash, String value) { - int initialBucketIndex = initialBucketIndex(entries, adjHash); + int initialBucketIndex = Caching.bucketIndex(entries, adjHash); for (int probe = 0, index = initialBucketIndex; probe < MAX_PROBES; ++probe, ++index) { if (index >= entries.length) index = 0; @@ -130,7 +142,7 @@ static final CacheEntry lookupEntry(CacheEntry[] entries, int adjHash, String va } static final boolean lfuInsert(CacheEntry[] entries, CacheEntry newEntry) { - int initialBucketIndex = initialBucketIndex(entries, newEntry.adjHash()); + int initialBucketIndex = Caching.bucketIndex(entries, newEntry.adjHash()); // initial scan to see if there's an empty slot or marker entry is already present double lowestHits = Double.MAX_VALUE; @@ -156,43 +168,6 @@ static final boolean lfuInsert(CacheEntry[] entries, CacheEntry newEntry) { return true; } - static final int initialBucketIndex(CacheEntry[] entries, int adjHash) { - return adjHash & (entries.length - 1); - } - - static final int initialBucketIndex(int[] marks, int adjHash) { - return adjHash & (marks.length - 1); - } - - static final boolean mark(int[] marks, int newAdjHash) { - int index = initialBucketIndex(marks, newAdjHash); - - // This is the 4th iteration of the marking strategy - // First version - used a mark entry, but that would prematurely - // burn a slot in the cache - // Second version - used a mark boolean, that worked well, but - // was a overly permissive in allowing the next request to the same slot - // to immediately create a CacheEntry - // Third version - used a mark hash that to match exactly, - // that could lead to access order fights over the cache slot - // So this version is a hybrid of 2nd & 3rd, using a bloom filter - // that effectively degenerates to a boolean - - // This approach provides a nice balance when there's an A-B-A access pattern - // The first A will mark the slot - // Then B will mark the slot with A | B - // Then either A or B can claim and reset the slot - - int priorMarkHash = marks[index]; - boolean match = ((priorMarkHash & newAdjHash) == newAdjHash); - if (match) { - marks[index] = 0; - } else { - marks[index] = priorMarkHash | newAdjHash; - } - return match; - } - static final class CacheEntry { final int adjHash; final String value; @@ -243,11 +218,6 @@ boolean isPurgeable() { return (this.score < PURGE_THRESHOLD); } - static final int adjHash(String value) { - int hash = value.hashCode(); - return (hash == 0) ? 0xDA7AD06 : hash ^ (hash >>> 16); - } - static final byte[] utf8(String value) { return value.getBytes(StandardCharsets.UTF_8); } diff --git a/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/TraceMapperV0_4.java b/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/TraceMapperV0_4.java index 475fe0dee8d..6c9fca64112 100644 --- a/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/TraceMapperV0_4.java +++ b/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/TraceMapperV0_4.java @@ -25,10 +25,14 @@ public final class TraceMapperV0_4 implements TraceMapper { static final SimpleUtf8Cache TAG_CACHE = - Config.get().isUtf8CacheEnabled() ? new SimpleUtf8Cache() : null; + Config.get().getTagNameUtf8CacheSize() > 0 + ? new SimpleUtf8Cache(Config.get().getTagNameUtf8CacheSize()) + : null; static final GenerationalUtf8Cache VALUE_CACHE = - Config.get().isUtf8CacheEnabled() ? new GenerationalUtf8Cache() : null; + Config.get().getTagValueUtf8CacheSize() > 0 + ? new GenerationalUtf8Cache(Config.get().getTagValueUtf8CacheSize()) + : null; private final int size; diff --git a/dd-trace-core/src/test/java/datadog/trace/common/writer/ddagent/CachingTest.java b/dd-trace-core/src/test/java/datadog/trace/common/writer/ddagent/CachingTest.java new file mode 100644 index 00000000000..87e709d8910 --- /dev/null +++ b/dd-trace-core/src/test/java/datadog/trace/common/writer/ddagent/CachingTest.java @@ -0,0 +1,45 @@ +package datadog.trace.common.writer.ddagent; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +import org.junit.Test; + +public class CachingTest { + @Test + public void capacity() { + // exact + assertEquals(64, Caching.cacheSizeFor(64)); + assertEquals(128, Caching.cacheSizeFor(128)); + + // next power of 2 + assertEquals(64, Caching.cacheSizeFor(63)); + assertEquals(64, Caching.cacheSizeFor(33)); + } + + @Test + public void marking_exact() { + int[] marks = new int[Caching.cacheSizeFor(32)]; + + assertFalse(Caching.mark(marks, 31)); + assertTrue(Caching.mark(marks, 31)); + + // should have been reset + assertFalse(Caching.mark(marks, 31)); + } + + @Test + public void marking_collision() { + // deliberately using tiny array to force collision + int[] marks = new int[1]; + + // powers of 2 to reduce false positives in test + assertFalse(Caching.mark(marks, 128)); + assertFalse(Caching.mark(marks, 64)); + + assertTrue(Caching.mark(marks, 128)); + // should now be reset + assertFalse(Caching.mark(marks, 64)); + } +} diff --git a/dd-trace-core/src/test/java/datadog/trace/common/writer/ddagent/GenerationalUtf8CacheTest.java b/dd-trace-core/src/test/java/datadog/trace/common/writer/ddagent/GenerationalUtf8CacheTest.java index 65a470ce915..3412a40dc13 100644 --- a/dd-trace-core/src/test/java/datadog/trace/common/writer/ddagent/GenerationalUtf8CacheTest.java +++ b/dd-trace-core/src/test/java/datadog/trace/common/writer/ddagent/GenerationalUtf8CacheTest.java @@ -14,10 +14,14 @@ import org.junit.jupiter.params.provider.ValueSource; public class GenerationalUtf8CacheTest { + static final GenerationalUtf8Cache create() { + return new GenerationalUtf8Cache(64, 128); + } + @ParameterizedTest @ValueSource(strings = {"foo", "bar", "baz", "quux"}) public void getUtf8(String value) { - GenerationalUtf8Cache cache = new GenerationalUtf8Cache(); + GenerationalUtf8Cache cache = create(); for (int i = 0; i < 10; ++i) { byte[] valueUtf8 = cache.getUtf8(value); @@ -25,9 +29,39 @@ public void getUtf8(String value) { } } + @Test + public void capacity() { + GenerationalUtf8Cache cache = new GenerationalUtf8Cache(192); + assertEquals(64, cache.edenCapacity()); + assertEquals(128, cache.tenuredCapacity()); + } + + @Test + public void maxCapacity() { + GenerationalUtf8Cache cache = + new GenerationalUtf8Cache( + GenerationalUtf8Cache.MAX_EDEN_CAPACITY + 1, + GenerationalUtf8Cache.MAX_TENURED_CAPACITY + 1); + + assertEquals(GenerationalUtf8Cache.MAX_EDEN_CAPACITY, cache.edenCapacity()); + assertEquals(GenerationalUtf8Cache.MAX_TENURED_CAPACITY, cache.tenuredCapacity()); + } + + @Test + public void maxCapacity_combined() { + GenerationalUtf8Cache cache = + new GenerationalUtf8Cache( + GenerationalUtf8Cache.MAX_EDEN_CAPACITY + + GenerationalUtf8Cache.MAX_TENURED_CAPACITY + + 2); + + assertEquals(GenerationalUtf8Cache.MAX_EDEN_CAPACITY, cache.edenCapacity()); + assertEquals(GenerationalUtf8Cache.MAX_TENURED_CAPACITY, cache.tenuredCapacity()); + } + @Test public void caching() { - GenerationalUtf8Cache cache = new GenerationalUtf8Cache(); + GenerationalUtf8Cache cache = create(); String value = "bar"; byte[] expected = value.getBytes(StandardCharsets.UTF_8); @@ -50,7 +84,7 @@ public void caching() { @Test public void promotion() { - GenerationalUtf8Cache cache = new GenerationalUtf8Cache(); + GenerationalUtf8Cache cache = create(); String value = "bar"; byte[] expected = value.getBytes(StandardCharsets.UTF_8); @@ -87,7 +121,7 @@ public void fuzz() { int edenHits = 0; int promotedHits = 0; - GenerationalUtf8Cache cache = new GenerationalUtf8Cache(); + GenerationalUtf8Cache cache = create(); for (int i = 0; i < 1_000; ++i) { cache.recalibrate(); @@ -119,7 +153,7 @@ public void bigString_dont_cache() { } byte[] expected = lorem.getBytes(StandardCharsets.UTF_8); - GenerationalUtf8Cache cache = new GenerationalUtf8Cache(); + GenerationalUtf8Cache cache = create(); byte[] first = cache.getUtf8(lorem); assertArrayEquals(expected, first); diff --git a/dd-trace-core/src/test/java/datadog/trace/common/writer/ddagent/SimpleUtf8CacheTest.java b/dd-trace-core/src/test/java/datadog/trace/common/writer/ddagent/SimpleUtf8CacheTest.java index 8785252fef8..5921a751cd5 100644 --- a/dd-trace-core/src/test/java/datadog/trace/common/writer/ddagent/SimpleUtf8CacheTest.java +++ b/dd-trace-core/src/test/java/datadog/trace/common/writer/ddagent/SimpleUtf8CacheTest.java @@ -14,10 +14,27 @@ import org.junit.jupiter.params.provider.ValueSource; public class SimpleUtf8CacheTest { + static final SimpleUtf8Cache create() { + return new SimpleUtf8Cache(64); + } + + @Test + public void capacity() { + SimpleUtf8Cache cache = new SimpleUtf8Cache(128); + assertEquals(128, cache.capacity()); + } + + @Test + public void maxCapacity() { + SimpleUtf8Cache cache = new SimpleUtf8Cache(SimpleUtf8Cache.MAX_CAPACITY + 1); + + assertEquals(SimpleUtf8Cache.MAX_CAPACITY, cache.capacity()); + } + @ParameterizedTest @ValueSource(strings = {"foo", "bar", "baz", "quux"}) public void getUtf8(String value) { - SimpleUtf8Cache cache = new SimpleUtf8Cache(); + SimpleUtf8Cache cache = create(); for (int i = 0; i < 10; ++i) { byte[] valueUtf8 = cache.getUtf8(value); @@ -27,7 +44,7 @@ public void getUtf8(String value) { @Test public void caching() { - SimpleUtf8Cache cache = new SimpleUtf8Cache(); + SimpleUtf8Cache cache = create(); String value = "bar"; byte[] expected = value.getBytes(StandardCharsets.UTF_8); @@ -54,7 +71,7 @@ public void fuzz() { int hits = 0; - SimpleUtf8Cache cache = new SimpleUtf8Cache(); + SimpleUtf8Cache cache = create(); for (int i = 0; i < 1_000; ++i) { cache.recalibrate(); @@ -84,7 +101,7 @@ public void bigString_dont_cache() { } byte[] expected = lorem.getBytes(StandardCharsets.UTF_8); - SimpleUtf8Cache cache = new SimpleUtf8Cache(); + SimpleUtf8Cache cache = create(); byte[] first = cache.getUtf8(lorem); assertArrayEquals(expected, first); diff --git a/internal-api/src/main/java/datadog/trace/api/Config.java b/internal-api/src/main/java/datadog/trace/api/Config.java index 8a6c7d9e420..db135c64703 100644 --- a/internal-api/src/main/java/datadog/trace/api/Config.java +++ b/internal-api/src/main/java/datadog/trace/api/Config.java @@ -1226,7 +1226,8 @@ public static String getHostName() { private final boolean jdkSocketEnabled; private final boolean optimizedMapEnabled; - private final boolean utf8CacheEnabled; + private final int tagNameUtf8CacheSize; + private final int tagValueUtf8CacheSize; private final int stackTraceLengthLimit; private final RumInjectorConfig rumInjectorConfig; @@ -2734,7 +2735,10 @@ PROFILING_DATADOG_PROFILER_ENABLED, isDatadogProfilerSafeInCurrentEnvironment()) this.optimizedMapEnabled = configProvider.getBoolean(GeneralConfig.OPTIMIZED_MAP_ENABLED, false); - this.utf8CacheEnabled = configProvider.getBoolean(GeneralConfig.UTF8_CACHE_ENABLED, true); + this.tagNameUtf8CacheSize = + Math.max(configProvider.getInteger(GeneralConfig.TAG_NAME_UTF8_CACHE_SIZE, 128), 0); + this.tagValueUtf8CacheSize = + Math.max(configProvider.getInteger(GeneralConfig.TAG_VALUE_UTF8_CACHE_SIZE, 384), 0); int defaultStackTraceLengthLimit = instrumenterConfig.isCiVisibilityEnabled() @@ -4442,8 +4446,12 @@ public boolean isOptimizedMapEnabled() { return optimizedMapEnabled; } - public boolean isUtf8CacheEnabled() { - return utf8CacheEnabled; + public int getTagNameUtf8CacheSize() { + return tagNameUtf8CacheSize; + } + + public int getTagValueUtf8CacheSize() { + return tagValueUtf8CacheSize; } public int getStackTraceLengthLimit() { From bd17af9d4946f9fddc48b5153f6efe7312abdc9d Mon Sep 17 00:00:00 2001 From: Brice Dutheil Date: Fri, 5 Sep 2025 09:14:29 +0200 Subject: [PATCH 22/23] fix: small compilation fix --- .../datadog/trace/common/writer/ddagent/Utf8Benchmark.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dd-trace-core/src/jmh/java/datadog/trace/common/writer/ddagent/Utf8Benchmark.java b/dd-trace-core/src/jmh/java/datadog/trace/common/writer/ddagent/Utf8Benchmark.java index 4177e37ebd0..1fa592de539 100644 --- a/dd-trace-core/src/jmh/java/datadog/trace/common/writer/ddagent/Utf8Benchmark.java +++ b/dd-trace-core/src/jmh/java/datadog/trace/common/writer/ddagent/Utf8Benchmark.java @@ -102,7 +102,7 @@ public static final void valueUtf8_baseline(Blackhole bh) { } } - static final GenerationalUtf8Cache VALUE_CACHE = new GenerationalUtf8Cache(); + static final GenerationalUtf8Cache VALUE_CACHE = new GenerationalUtf8Cache(128); @Benchmark public static final void valueUtf8_cache_generational(Blackhole bh) { @@ -118,7 +118,7 @@ public static final void valueUtf8_cache_generational(Blackhole bh) { } } - static final SimpleUtf8Cache SIMPLE_VALUE_CACHE = new SimpleUtf8Cache(); + static final SimpleUtf8Cache SIMPLE_VALUE_CACHE = new SimpleUtf8Cache(128); @Benchmark public static final void valueUtf8_cache_simple(Blackhole bh) { From c923194dea7b7fe4e37854e08b0e3cab596002d1 Mon Sep 17 00:00:00 2001 From: Douglas Q Hawkins Date: Fri, 5 Sep 2025 10:15:57 -0400 Subject: [PATCH 23/23] Adding missing size parameters tp benchmark --- .../datadog/trace/common/writer/ddagent/Utf8Benchmark.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dd-trace-core/src/jmh/java/datadog/trace/common/writer/ddagent/Utf8Benchmark.java b/dd-trace-core/src/jmh/java/datadog/trace/common/writer/ddagent/Utf8Benchmark.java index 1fa592de539..37d63e9e783 100644 --- a/dd-trace-core/src/jmh/java/datadog/trace/common/writer/ddagent/Utf8Benchmark.java +++ b/dd-trace-core/src/jmh/java/datadog/trace/common/writer/ddagent/Utf8Benchmark.java @@ -79,7 +79,7 @@ public static final byte[] tagUtf8_nocache() { return tag.getBytes(StandardCharsets.UTF_8); } - static final SimpleUtf8Cache TAG_CACHE = new SimpleUtf8Cache(); + static final SimpleUtf8Cache TAG_CACHE = new SimpleUtf8Cache(128); @Benchmark public static final byte[] tagUtf8_w_cache() { @@ -102,7 +102,7 @@ public static final void valueUtf8_baseline(Blackhole bh) { } } - static final GenerationalUtf8Cache VALUE_CACHE = new GenerationalUtf8Cache(128); + static final GenerationalUtf8Cache VALUE_CACHE = new GenerationalUtf8Cache(64, 128); @Benchmark public static final void valueUtf8_cache_generational(Blackhole bh) {