Skip to content

Commit c18559c

Browse files
authored
Merge pull request #236 from blemale/bastien.lemale/perf_do_not_allocate_computing_metadata_size
perf: Do not allocate when computing metadata size
2 parents 20b6515 + 1a461da commit c18559c

3 files changed

Lines changed: 130 additions & 2 deletions

File tree

src/main/java/com/timgroup/statsd/NonBlockingDirectStatsDClient.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -64,10 +64,10 @@ public final boolean writeTo(StringBuilder builder, int capacity, String contain
6464
private int metadataSize(StringBuilder builder, String containerID) {
6565
if (metadataSize == -1) {
6666
final int previousLength = builder.length();
67-
final int previousEncodedLength = builder.toString().getBytes(UTF_8).length;
67+
final int previousEncodedLength = Utf8.encodedLength(builder);
6868
writeHeadMetadata(builder);
6969
writeTailMetadata(builder, containerID);
70-
metadataSize = builder.toString().getBytes(UTF_8).length - previousEncodedLength;
70+
metadataSize = Utf8.encodedLength(builder) - previousEncodedLength;
7171
builder.setLength(previousLength);
7272
}
7373
return metadataSize;
Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
/*
2+
* Copyright (C) 2013 The Guava Authors
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
5+
* in compliance with the License. You may obtain a copy of the License at
6+
*
7+
* http://www.apache.org/licenses/LICENSE-2.0
8+
*
9+
* Unless required by applicable law or agreed to in writing, software distributed under the License
10+
* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
11+
* or implied. See the License for the specific language governing permissions and limitations under
12+
* the License.
13+
*/
14+
15+
package com.timgroup.statsd;
16+
17+
import static java.lang.Character.MAX_SURROGATE;
18+
import static java.lang.Character.MIN_SURROGATE;
19+
20+
import java.nio.charset.StandardCharsets;
21+
22+
/**
23+
* This class is a partial copy of the {@code com.google.common.base.Utf8}
24+
* <a href="https://github.com/google/guava/blob/v33.0.0/guava/src/com/google/common/base/Utf8.java">class</a>
25+
* from the Guava library.
26+
* It is copied here to avoid a dependency on Guava.
27+
*/
28+
final class Utf8 {
29+
30+
private static final int UTF8_REPLACEMENT_LENGTH = StandardCharsets.UTF_8.newEncoder().replacement().length;
31+
32+
private Utf8() {
33+
}
34+
35+
/**
36+
* Returns the number of bytes in the UTF-8-encoded form of {@code sequence}. For a string, this
37+
* method is equivalent to {@code string.getBytes(UTF_8).length}, but is more efficient in both
38+
* time and space.
39+
*
40+
* @throws IllegalArgumentException if {@code sequence} contains ill-formed UTF-16 (unpaired
41+
* surrogates)
42+
*/
43+
public static int encodedLength(CharSequence sequence) {
44+
// Warning to maintainers: this implementation is highly optimized.
45+
int utf16Length = sequence.length();
46+
int utf8Length = utf16Length;
47+
int index = 0;
48+
49+
// This loop optimizes for pure ASCII.
50+
while (index < utf16Length && sequence.charAt(index) < 0x80) {
51+
index++;
52+
}
53+
54+
// This loop optimizes for chars less than 0x800.
55+
for (; index < utf16Length; index++) {
56+
char character = sequence.charAt(index);
57+
if (character < 0x800) {
58+
utf8Length += ((0x7f - character) >>> 31); // branch free!
59+
} else {
60+
utf8Length += encodedLengthGeneral(sequence, index);
61+
break;
62+
}
63+
}
64+
65+
if (utf8Length < utf16Length) {
66+
// Necessary and sufficient condition for overflow because of maximum 3x expansion
67+
throw new IllegalArgumentException(
68+
"UTF-8 length does not fit in int: " + (utf8Length + (1L << 32)));
69+
}
70+
return utf8Length;
71+
}
72+
73+
private static int encodedLengthGeneral(CharSequence sequence, int start) {
74+
int utf16Length = sequence.length();
75+
int utf8Length = 0;
76+
for (int index = start; index < utf16Length; index++) {
77+
char character = sequence.charAt(index);
78+
if (character < 0x800) {
79+
utf8Length += (0x7f - character) >>> 31; // branch free!
80+
} else {
81+
utf8Length += 2;
82+
// jdk7+: if (Character.isSurrogate(character)) {
83+
if (MIN_SURROGATE <= character && character <= MAX_SURROGATE) {
84+
// Check that we have a well-formed surrogate pair.
85+
if (Character.codePointAt(sequence, index) == character) {
86+
// Bad input so deduct char length and account for the replacement characters
87+
utf8Length += -2 + UTF8_REPLACEMENT_LENGTH - 1;
88+
} else {
89+
index++;
90+
}
91+
}
92+
}
93+
}
94+
return utf8Length;
95+
}
96+
}
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
package com.timgroup.statsd;
2+
3+
import org.junit.Test;
4+
5+
import java.nio.ByteBuffer;
6+
import java.nio.CharBuffer;
7+
import java.nio.charset.CharacterCodingException;
8+
import java.nio.charset.CharsetEncoder;
9+
import java.nio.charset.CodingErrorAction;
10+
import java.nio.charset.StandardCharsets;
11+
12+
import static java.lang.Character.MIN_SURROGATE;
13+
import static org.hamcrest.MatcherAssert.assertThat;
14+
import static org.hamcrest.Matchers.equalTo;
15+
16+
public class Utf8Test {
17+
18+
@Test
19+
public void should_handle_malformed_inputs() throws CharacterCodingException {
20+
shouldHandleMalformedInput("foo" + MIN_SURROGATE + "bar");
21+
shouldHandleMalformedInput("🍻☀️😎🏖️" + MIN_SURROGATE + "🍻☀️😎🏖️");
22+
}
23+
24+
private static void shouldHandleMalformedInput(String malformedInput) throws CharacterCodingException {
25+
CharsetEncoder utf8Encoder = StandardCharsets.UTF_8.newEncoder()
26+
.onMalformedInput(CodingErrorAction.REPLACE)
27+
.onUnmappableCharacter(CodingErrorAction.REPLACE);
28+
ByteBuffer encoded = utf8Encoder.encode(CharBuffer.wrap(malformedInput));
29+
30+
assertThat(Utf8.encodedLength(malformedInput), equalTo(encoded.limit()));
31+
}
32+
}

0 commit comments

Comments
 (0)