Skip to content

Commit 8203a22

Browse files
authored
Creating HashingUtils as alternative to Objects.hash (#10628)
* Creating HashingUtils Adding HashingUtils which is a replacement for the hashing routines of java.util.Objects. HashingUtils is specifically designed to avoid var-args allocation which can lead to lots of allocation if hashCode is hot. To demonstrate the potential impact see HashingBenchmark * Renamed Objects -> HashingUtils * Fix-up after renaming
1 parent a93dac2 commit 8203a22

3 files changed

Lines changed: 537 additions & 0 deletions

File tree

Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
package datadog.trace.util;
2+
3+
import java.util.concurrent.ThreadLocalRandom;
4+
import java.util.function.Supplier;
5+
import org.openjdk.jmh.annotations.Benchmark;
6+
import org.openjdk.jmh.annotations.Fork;
7+
import org.openjdk.jmh.annotations.Measurement;
8+
import org.openjdk.jmh.annotations.Threads;
9+
import org.openjdk.jmh.annotations.Warmup;
10+
11+
/**
12+
* In contrast to java.util.Objects.hash, datadog.util.HashingUtils.hash has overrides for different
13+
* parameter counts that allow most callers to avoid calling the var-arg version. This avoids the
14+
* common situation where the JIT's escape analysis is unable to elide the var-arg array allocation.
15+
*
16+
* <p>This results in 3-4x throughput, but more importantly no allocation as compared to GiBs / sec
17+
* with var-args. <code>
18+
* MacBook M1 using 8 threads/cores with -prof gc
19+
*
20+
* Benchmark Mode Cnt Score Error Units
21+
*
22+
* HashingBenchmark.hash2 thrpt 6 3365779949.250 ± 270198455.226 ops/s
23+
* HashingBenchmark.hash2:gc.alloc.rate thrpt 6 0.001 ± 0.001 MB/sec
24+
*
25+
* HashingBenchmark.hash2_varargs thrpt 6 1194884232.767 ± 39724408.823 ops/s
26+
* HashingBenchmark.hash2_varargs:gc.alloc.rate thrpt 6 27330.473 ± 909.029 MB/sec
27+
*
28+
*
29+
* HashingBenchmark.hash3 thrpt 6 2314013984.714 ± 181952393.469 ops/s
30+
* HashingBenchmark.hash3:gc.alloc.rate thrpt 6 0.001 ± 0.001 MB/sec
31+
*
32+
* HashingBenchmark.hash3_varags thrpt 6 869246242.250 ± 121680442.505 ops/s
33+
* HashingBenchmark.hash3_varags:gc.alloc.rate thrpt 6 26514.569 ± 3709.819 MB/sec
34+
*
35+
*
36+
* HashingBenchmark.hash4 thrpt 6 1866997193.226 ± 181198915.326 ops/s
37+
* HashingBenchmark.hash4:gc.alloc.rate thrpt 6 0.001 ± 0.001 MB/sec
38+
*
39+
* HashingBenchmark.hash4_varargs thrpt 6 702697142.147 ± 24458612.481 ops/s
40+
* HashingBenchmark.hash4_varargs:gc.alloc.rate thrpt 6 21437.996 ± 748.911 MB/sec
41+
*
42+
*
43+
* HashingBenchmark.hash5 thrpt 6 1803117534.112 ± 242918817.144 ops/s
44+
* HashingBenchmark.hash5:gc.alloc.rate thrpt 6 0.001 ± 0.001 MB/sec
45+
*
46+
* HashingBenchmark.hash5_varargs thrpt 6 579139583.196 ± 29525483.594 ops/s
47+
* HashingBenchmark.hash5_varargs:gc.alloc.rate thrpt 6 22082.357 ± 1125.413 MB/sec
48+
* </code>
49+
*/
50+
@Fork(2)
51+
@Warmup(iterations = 2)
52+
@Measurement(iterations = 3)
53+
@Threads(8)
54+
public class HashingBenchmark {
55+
static <T> T init(Supplier<T> supplier) {
56+
return supplier.get();
57+
}
58+
59+
// strings used in hashing are set up ahead of time, so that the only allocation is from var-args
60+
static String[] TEST_STRINGS =
61+
init(
62+
() -> {
63+
ThreadLocalRandom random = ThreadLocalRandom.current();
64+
65+
String[] strings = new String[1024];
66+
for (int i = 0; i < strings.length; ++i) {
67+
strings[i] = Double.toString(random.nextDouble());
68+
}
69+
return strings;
70+
});
71+
72+
static {
73+
Thread updaterThread =
74+
new Thread(
75+
() -> {
76+
ThreadLocalRandom random = ThreadLocalRandom.current();
77+
78+
while (!Thread.interrupted()) {
79+
str0 = TEST_STRINGS[random.nextInt(0, TEST_STRINGS.length)];
80+
str1 = TEST_STRINGS[random.nextInt(0, TEST_STRINGS.length)];
81+
str2 = TEST_STRINGS[random.nextInt(0, TEST_STRINGS.length)];
82+
str3 = TEST_STRINGS[random.nextInt(0, TEST_STRINGS.length)];
83+
str4 = TEST_STRINGS[random.nextInt(0, TEST_STRINGS.length)];
84+
}
85+
});
86+
updaterThread.setDaemon(true);
87+
updaterThread.start();
88+
}
89+
90+
static String str0;
91+
static String str1;
92+
static String str2;
93+
static String str3;
94+
static String str4;
95+
96+
@Benchmark
97+
public int hash2() {
98+
return datadog.trace.util.HashingUtils.hash(str0, str1);
99+
}
100+
101+
@Benchmark
102+
public int hash2_varargs() {
103+
return java.util.Objects.hash(str0, str1);
104+
}
105+
106+
@Benchmark
107+
public int hash3() {
108+
return datadog.trace.util.HashingUtils.hash(str0, str1, str2);
109+
}
110+
111+
@Benchmark
112+
public int hash3_varags() {
113+
return java.util.Objects.hash(str0, str1, str2);
114+
}
115+
116+
@Benchmark
117+
public int hash4() {
118+
return datadog.trace.util.HashingUtils.hash(str0, str1, str2, str3);
119+
}
120+
121+
@Benchmark
122+
public int hash4_varargs() {
123+
return java.util.Objects.hash(str0, str1, str2, str3);
124+
}
125+
126+
@Benchmark
127+
public int hash5() {
128+
return datadog.trace.util.HashingUtils.hash(str0, str1, str2, str3, str4);
129+
}
130+
131+
@Benchmark
132+
public int hash5_varargs() {
133+
return java.util.Objects.hash(str0, str1, str2, str3, str4);
134+
}
135+
}
Lines changed: 158 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,158 @@
1+
package datadog.trace.util;
2+
3+
/**
4+
* This class is intended to be a drop-in replacement for the hashing portions of java.util.Objects.
5+
* This class provides more convenience methods for hashing primitives and includes overrides for
6+
* <code>hash</code> that take many argument lengths to avoid var-args allocation.
7+
*/
8+
public final class HashingUtils {
9+
private HashingUtils() {}
10+
11+
public static final int hashCode(Object obj) {
12+
return obj != null ? obj.hashCode() : 0;
13+
}
14+
15+
public static final int hash(boolean value) {
16+
return Boolean.hashCode(value);
17+
}
18+
19+
public static final int hash(char value) {
20+
return Character.hashCode(value);
21+
}
22+
23+
public static final int hash(byte value) {
24+
return Byte.hashCode(value);
25+
}
26+
27+
public static final int hash(short value) {
28+
return Short.hashCode(value);
29+
}
30+
31+
public static final int hash(int value) {
32+
return Integer.hashCode(value);
33+
}
34+
35+
public static final int hash(long value) {
36+
return Long.hashCode(value);
37+
}
38+
39+
public static final int hash(float value) {
40+
return Float.hashCode(value);
41+
}
42+
43+
public static final int hash(double value) {
44+
return Double.hashCode(value);
45+
}
46+
47+
public static final int hash(Object obj) {
48+
return obj != null ? obj.hashCode() : 0;
49+
}
50+
51+
public static final int hash(Object obj0, Object obj1) {
52+
return hash(hash(obj0), hash(obj1));
53+
}
54+
55+
public static final int hash(int hash0, int hash1) {
56+
return 31 * hash0 + hash1;
57+
}
58+
59+
public static final int hash(Object obj0, Object obj1, Object obj2) {
60+
return hash(hashCode(obj0), hashCode(obj1), hashCode(obj2));
61+
}
62+
63+
public static final int hash(int hash0, int hash1, int hash2) {
64+
// DQH - Micro-optimizing, 31 * 31 will constant fold
65+
// Since there are multiple execution ports for load & store,
66+
// this will make good use of the core.
67+
return 31 * 31 * hash0 + 31 * hash1 + hash2;
68+
}
69+
70+
public static final int hash(Object obj0, Object obj1, Object obj2, Object obj3) {
71+
return hash(hashCode(obj0), hashCode(obj1), hashCode(obj2), hashCode(obj3));
72+
}
73+
74+
public static final int hash(int hash0, int hash1, int hash2, int hash3) {
75+
// DQH - Micro-optimizing, 31 * 31 will constant fold
76+
// Since there are multiple execution ports for load & store,
77+
// this will make good use of the core.
78+
return 31 * 31 * 31 * hash0 + 31 * 31 * hash1 + 31 * hash2 + hash3;
79+
}
80+
81+
public static final int hash(Object obj0, Object obj1, Object obj2, Object obj3, Object obj4) {
82+
return hash(hashCode(obj0), hashCode(obj1), hashCode(obj2), hashCode(obj3));
83+
}
84+
85+
public static final int hash(int hash0, int hash1, int hash2, int hash3, int hash4) {
86+
// DQH - Micro-optimizing, 31 * 31 will constant fold
87+
// Since there are multiple execution ports for load & store,
88+
// this will make good use of the core.
89+
return 31 * 31 * 31 * 31 * hash0 + 31 * 31 * 31 * hash1 + 31 * 31 * hash2 + 31 * hash3 + hash4;
90+
}
91+
92+
@Deprecated
93+
public static final int hash(int[] hashes) {
94+
int result = 0;
95+
for (int hash : hashes) {
96+
result = addToHash(result, hash);
97+
}
98+
return result;
99+
}
100+
101+
public static final int addToHash(int hash, int value) {
102+
return 31 * hash + value;
103+
}
104+
105+
public static final int addToHash(int hash, Object obj) {
106+
return addToHash(hash, hashCode(obj));
107+
}
108+
109+
public static final int addToHash(int hash, boolean value) {
110+
return addToHash(hash, Boolean.hashCode(value));
111+
}
112+
113+
public static final int addToHash(int hash, char value) {
114+
return addToHash(hash, Character.hashCode(value));
115+
}
116+
117+
public static final int addToHash(int hash, byte value) {
118+
return addToHash(hash, Byte.hashCode(value));
119+
}
120+
121+
public static final int addToHash(int hash, short value) {
122+
return addToHash(hash, Short.hashCode(value));
123+
}
124+
125+
public static final int addToHash(int hash, long value) {
126+
return addToHash(hash, Long.hashCode(value));
127+
}
128+
129+
public static final int addToHash(int hash, float value) {
130+
return addToHash(hash, Float.hashCode(value));
131+
}
132+
133+
public static final int addToHash(int hash, double value) {
134+
return addToHash(hash, Double.hashCode(value));
135+
}
136+
137+
public static final int hash(Iterable<?> objs) {
138+
int result = 0;
139+
for (Object obj : objs) {
140+
result = addToHash(result, obj);
141+
}
142+
return result;
143+
}
144+
145+
/**
146+
* Calling this var-arg version can result in large amounts of allocation (see HashingBenchmark)
147+
* Rather than calliing this method, add another override of hash that handles a larger number of
148+
* arguments or use calls to addToHash.
149+
*/
150+
@Deprecated
151+
public static final int hash(Object[] objs) {
152+
int result = 0;
153+
for (Object obj : objs) {
154+
result = addToHash(result, obj);
155+
}
156+
return result;
157+
}
158+
}

0 commit comments

Comments
 (0)