Skip to content

Commit d39b583

Browse files
klueverError Prone Team
authored andcommitted
Introduce PreferCharsetOverload, a new Error Prone check.
This check suggests using overloads that accept `java.nio.charset.Charset` instead of those that accept a `String` encoding name. The checker identifies method and constructor invocations where a `String` parameter is likely used to specify a charset (based on parameter names like `charset`, `encoding`, `cs`). It then looks for an overload where this `String` parameter is replaced by a `Charset`. The suggested fixes include: * Replacing constant charset names (e.g., `"UTF-8"`) with the corresponding `StandardCharsets` constant (e.g., `UTF_8`). * Removing redundant calls to `Charset.name()`, `displayName()`, or `toString()` when a `Charset` instance is already available. * Wrapping other `String` expressions with `Charset.forName()` as a fallback. PiperOrigin-RevId: 929813068
1 parent a1c7a7c commit d39b583

4 files changed

Lines changed: 795 additions & 0 deletions

File tree

Lines changed: 318 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,318 @@
1+
/*
2+
* Copyright 2026 The Error Prone Authors.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
package com.google.errorprone.bugpatterns;
17+
18+
import static com.google.errorprone.BugPattern.SeverityLevel.WARNING;
19+
import static com.google.errorprone.matchers.method.MethodMatchers.instanceMethod;
20+
import static com.google.errorprone.suppliers.Suppliers.typeFromString;
21+
import static com.google.errorprone.util.ASTHelpers.constValue;
22+
import static com.google.errorprone.util.ASTHelpers.enclosingClass;
23+
import static com.google.errorprone.util.ASTHelpers.findMatchingMethods;
24+
import static com.google.errorprone.util.ASTHelpers.getReceiver;
25+
import static com.google.errorprone.util.ASTHelpers.getSymbol;
26+
import static com.google.errorprone.util.ASTHelpers.isSameType;
27+
import static java.util.Comparator.comparingInt;
28+
29+
import com.google.common.base.Ascii;
30+
import com.google.common.base.CaseFormat;
31+
import com.google.common.collect.ImmutableList;
32+
import com.google.common.collect.ImmutableSet;
33+
import com.google.errorprone.BugPattern;
34+
import com.google.errorprone.VisitorState;
35+
import com.google.errorprone.bugpatterns.BugChecker.MethodInvocationTreeMatcher;
36+
import com.google.errorprone.bugpatterns.BugChecker.NewClassTreeMatcher;
37+
import com.google.errorprone.bugpatterns.argumentselectiondefects.NamedParameterComment;
38+
import com.google.errorprone.fixes.SuggestedFix;
39+
import com.google.errorprone.fixes.SuggestedFixes;
40+
import com.google.errorprone.matchers.Description;
41+
import com.google.errorprone.matchers.Matcher;
42+
import com.google.errorprone.suppliers.Supplier;
43+
import com.sun.source.tree.ExpressionTree;
44+
import com.sun.source.tree.MethodInvocationTree;
45+
import com.sun.source.tree.NewClassTree;
46+
import com.sun.source.tree.Tree.Kind;
47+
import com.sun.tools.javac.code.Symbol.MethodSymbol;
48+
import com.sun.tools.javac.code.Symbol.VarSymbol;
49+
import com.sun.tools.javac.code.Type;
50+
import java.util.Arrays;
51+
import java.util.List;
52+
import java.util.stream.Stream;
53+
import org.jspecify.annotations.Nullable;
54+
55+
/** A {@link BugChecker}; see the associated {@link BugPattern} annotation for details. */
56+
@BugPattern(
57+
summary =
58+
"Prefer calling overloads that accept a Charset over those that accept a String"
59+
+ " encoding name.",
60+
severity = WARNING)
61+
public final class PreferCharsetOverload extends BugChecker
62+
implements MethodInvocationTreeMatcher, NewClassTreeMatcher {
63+
64+
// Explicitly look for common charset parameter name tokens.
65+
private static final ImmutableSet<String> TARGET_PARAM_NAMES =
66+
ImmutableSet.of("charset", "cs", "csn", "enc", "encoding");
67+
68+
// Matches Charset decomposition calls like charset.name(), charset.displayName().
69+
private static final Matcher<ExpressionTree> CHARSET_DECOMPOSITION_MATCHER =
70+
instanceMethod()
71+
.onDescendantOf("java.nio.charset.Charset")
72+
.namedAnyOf("name", "displayName", "toString");
73+
74+
@Override
75+
public Description matchMethodInvocation(MethodInvocationTree tree, VisitorState state) {
76+
return match(tree, tree.getArguments(), getSymbol(tree), state);
77+
}
78+
79+
@Override
80+
public Description matchNewClass(NewClassTree tree, VisitorState state) {
81+
return match(tree, tree.getArguments(), getSymbol(tree), state);
82+
}
83+
84+
private Description match(
85+
ExpressionTree tree,
86+
List<? extends ExpressionTree> arguments,
87+
@Nullable MethodSymbol methodSymbol,
88+
VisitorState state) {
89+
if (methodSymbol == null) {
90+
return Description.NO_MATCH;
91+
}
92+
93+
// 1. Find indices of String parameters that are potential charset names.
94+
ImmutableList<Integer> candidateIndices = findCharsetCandidates(methodSymbol, state);
95+
if (candidateIndices.isEmpty()) {
96+
return Description.NO_MATCH;
97+
}
98+
99+
// 2. Find the best overload that accepts Charset.
100+
MethodSymbol bestOverload = findBestCharsetOverload(methodSymbol, candidateIndices, state);
101+
if (bestOverload == null) {
102+
return Description.NO_MATCH;
103+
}
104+
105+
// 3. Build the fix to use the Charset overload.
106+
SuggestedFix fix = buildFix(arguments, bestOverload, candidateIndices, state);
107+
return fix != null ? describeMatch(tree, fix) : Description.NO_MATCH;
108+
}
109+
110+
/**
111+
* Identifies indices of String parameters in the given method that are likely to represent a
112+
* charset or encoding.
113+
*/
114+
private static ImmutableList<Integer> findCharsetCandidates(
115+
MethodSymbol methodSymbol, VisitorState state) {
116+
// Skip methods/constructors with synthetic parameter names (e.g. inner-class constructor
117+
// implicit parameters like this$0, or when real parameter names are not available).
118+
if (NamedParameterComment.containsSyntheticParameterName(methodSymbol)) {
119+
return ImmutableList.of();
120+
}
121+
122+
ImmutableList.Builder<Integer> candidateIndicesBuilder = ImmutableList.builder();
123+
List<VarSymbol> parameters = methodSymbol.getParameters();
124+
// Find indices of String parameters with charset-like word tokens.
125+
for (int i = 0; i < parameters.size(); i++) {
126+
VarSymbol param = parameters.get(i);
127+
if (isSameType(param.asType(), state.getSymtab().stringType, state)) {
128+
String paramName = param.getSimpleName().toString();
129+
if (tokenize(paramName).anyMatch(TARGET_PARAM_NAMES::contains)) {
130+
candidateIndicesBuilder.add(i);
131+
}
132+
}
133+
}
134+
return candidateIndicesBuilder.build();
135+
}
136+
137+
/**
138+
* Generates a {@link SuggestedFix} to call the {@code bestOverload} instead of the original
139+
* method, by converting the string arguments at {@code candidateIndices} to Charset types.
140+
* Returns null if no fix can be generated.
141+
*/
142+
private static @Nullable SuggestedFix buildFix(
143+
List<? extends ExpressionTree> arguments,
144+
MethodSymbol bestOverload,
145+
ImmutableList<Integer> candidateIndices,
146+
VisitorState state) {
147+
SuggestedFix.Builder fix = SuggestedFix.builder();
148+
List<VarSymbol> bestParams = bestOverload.getParameters();
149+
boolean fixedAnyParam = false;
150+
Type charsetType = CHARSET_TYPE.get(state);
151+
for (int i : candidateIndices) {
152+
if (i >= arguments.size()) {
153+
continue; // Skip if argument is not provided (e.g., varargs).
154+
}
155+
if (isSameType(bestParams.get(i).asType(), charsetType, state)) {
156+
if (!suggestParamFix(arguments.get(i), state, fix)) {
157+
return null;
158+
}
159+
fixedAnyParam = true;
160+
}
161+
}
162+
return fixedAnyParam ? fix.build() : null;
163+
}
164+
165+
private static Stream<String> tokenize(String s) {
166+
return Arrays.stream(CaseFormat.LOWER_CAMEL.to(CaseFormat.LOWER_UNDERSCORE, s).split("_"));
167+
}
168+
169+
private static @Nullable MethodSymbol findBestCharsetOverload(
170+
MethodSymbol methodSymbol, ImmutableList<Integer> candidateIndices, VisitorState state) {
171+
Type startClass = enclosingClass(methodSymbol).asType();
172+
173+
// Find all potential overloads across the class hierarchy.
174+
ImmutableSet<MethodSymbol> matchingMethods =
175+
findMatchingMethods(
176+
methodSymbol.name,
177+
candidate -> isValidCharsetOverload(candidate, methodSymbol, candidateIndices, state),
178+
startClass,
179+
state.getTypes());
180+
181+
// If multiple overloads match, pick the one that replaces the most parameters.
182+
return matchingMethods.stream()
183+
.max(comparingInt(m -> countCharsetParams(m, candidateIndices, state)))
184+
.orElse(null);
185+
}
186+
187+
/**
188+
* Checks if the given {@code candidate} method is a valid {@code Charset}-based overload of the
189+
* given {@code methodSymbol}.
190+
*/
191+
private static boolean isValidCharsetOverload(
192+
MethodSymbol candidate,
193+
MethodSymbol methodSymbol,
194+
ImmutableList<Integer> candidateIndices,
195+
VisitorState state) {
196+
if (candidate.equals(methodSymbol)) {
197+
return false;
198+
}
199+
if (methodSymbol.isConstructor() && !candidate.owner.equals(methodSymbol.owner)) {
200+
return false;
201+
}
202+
if (candidate.isStatic() != methodSymbol.isStatic()) {
203+
return false;
204+
}
205+
if (candidate.isVarArgs() != methodSymbol.isVarArgs()) {
206+
return false;
207+
}
208+
List<VarSymbol> parameters = methodSymbol.getParameters();
209+
List<VarSymbol> candidateParams = candidate.getParameters();
210+
if (parameters.size() != candidateParams.size()) {
211+
return false;
212+
}
213+
214+
Type charsetType = CHARSET_TYPE.get(state);
215+
boolean hasCharsetImprovement = false;
216+
// Check if candidate parameters can be replaced by Charset while keeping
217+
// non-candidate parameters exactly the same.
218+
for (int i = 0; i < parameters.size(); i++) {
219+
Type expectedType = parameters.get(i).asType();
220+
Type candidateParamType = candidateParams.get(i).asType();
221+
222+
if (candidateIndices.contains(i)) {
223+
if (isSameType(candidateParamType, charsetType, state)) {
224+
hasCharsetImprovement = true;
225+
continue;
226+
}
227+
if (isSameType(candidateParamType, state.getSymtab().stringType, state)) {
228+
continue;
229+
}
230+
return false; // Neither String nor Charset
231+
} else {
232+
if (!isSameType(candidateParamType, expectedType, state)) {
233+
return false;
234+
}
235+
}
236+
}
237+
return hasCharsetImprovement
238+
&& isSameType(candidate.getReturnType(), methodSymbol.getReturnType(), state);
239+
}
240+
241+
/** Counts the number of candidate parameters that are Charset types in the given method. */
242+
private static int countCharsetParams(
243+
MethodSymbol method, ImmutableList<Integer> indices, VisitorState state) {
244+
int count = 0;
245+
List<VarSymbol> params = method.getParameters();
246+
Type charsetType = CHARSET_TYPE.get(state);
247+
for (int i : indices) {
248+
if (isSameType(params.get(i).asType(), charsetType, state)) {
249+
count++;
250+
}
251+
}
252+
return count;
253+
}
254+
255+
/**
256+
* Suggests a replacement strategy for a String argument to convert it to a Charset.
257+
*
258+
* <p>The replacement strategy is as follows:
259+
*
260+
* <ol>
261+
* <li>static import {@code StandardCharsets} constants (e.g., {@code UTF_8}, {@code
262+
* ISO_8859_1}, etc.)
263+
* <li>unwrap decomposed {@code Charset} calls (e.g., {@code charset.name()}, {@code
264+
* charset.displayName()}, etc.)
265+
* <li>fallback to {@code Charset.forName()}
266+
* </ol>
267+
*
268+
* @return true if a replacement was successfully added to the {@code fix} builder, or false if no
269+
* fix could be determined (e.g., for null literals).
270+
*/
271+
private static boolean suggestParamFix(
272+
ExpressionTree stringExpr, VisitorState state, SuggestedFix.Builder fix) {
273+
if (stringExpr.getKind() == Kind.NULL_LITERAL) {
274+
return false;
275+
}
276+
277+
String constantName = getStandardCharsetConstant(constValue(stringExpr, String.class));
278+
if (constantName != null) {
279+
fix.addStaticImport("java.nio.charset.StandardCharsets." + constantName);
280+
fix.replace(stringExpr, constantName);
281+
return true;
282+
}
283+
284+
if (CHARSET_DECOMPOSITION_MATCHER.matches(stringExpr, state)) {
285+
fix.replace(stringExpr, state.getSourceForNode(getReceiver(stringExpr)));
286+
return true;
287+
}
288+
289+
String charset = SuggestedFixes.qualifyType(state, fix, "java.nio.charset.Charset");
290+
fix.replace(stringExpr, charset + ".forName(" + state.getSourceForNode(stringExpr) + ")");
291+
return true;
292+
}
293+
294+
/**
295+
* Returns the name of the {@link java.nio.charset.StandardCharsets} constant if the given charset
296+
* name matches a known standard charset (otherwise returns {@code null}).
297+
*/
298+
private static @Nullable String getStandardCharsetConstant(@Nullable String charsetName) {
299+
if (charsetName == null) {
300+
return null;
301+
}
302+
303+
// Normalize the charset name to handle various common formats like "UTF-8", "UTF8", and
304+
// "utf_8" consistently.
305+
String normalized = Ascii.toUpperCase(charsetName).replace("-", "").replace("_", "");
306+
return switch (normalized) {
307+
case "UTF8" -> "UTF_8";
308+
case "ISO88591" -> "ISO_8859_1";
309+
case "USASCII", "ASCII" -> "US_ASCII";
310+
case "UTF16" -> "UTF_16";
311+
case "UTF16BE" -> "UTF_16BE";
312+
case "UTF16LE" -> "UTF_16LE";
313+
default -> null;
314+
};
315+
}
316+
317+
private static final Supplier<Type> CHARSET_TYPE = typeFromString("java.nio.charset.Charset");
318+
}

core/src/main/java/com/google/errorprone/scanner/BuiltInCheckerSuppliers.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -329,6 +329,7 @@
329329
import com.google.errorprone.bugpatterns.PatternMatchingInstanceof;
330330
import com.google.errorprone.bugpatterns.PreconditionsCheckNotNullRepeated;
331331
import com.google.errorprone.bugpatterns.PreconditionsInvalidPlaceholder;
332+
import com.google.errorprone.bugpatterns.PreferCharsetOverload;
332333
import com.google.errorprone.bugpatterns.PreferInstanceofOverGetKind;
333334
import com.google.errorprone.bugpatterns.PreferTestParameter;
334335
import com.google.errorprone.bugpatterns.PreferredInterfaceType;
@@ -1131,6 +1132,7 @@ public static ScannerSupplier warningChecks() {
11311132
ParameterName.class,
11321133
PatternMatchingInstanceof.class,
11331134
PreconditionsCheckNotNullRepeated.class,
1135+
PreferCharsetOverload.class,
11341136
PreferInstanceofOverGetKind.class,
11351137
PreferTestParameter.class,
11361138
PreferThrowsTag.class,

0 commit comments

Comments
 (0)