|
| 1 | +/* |
| 2 | + * SonarQube Python Plugin |
| 3 | + * Copyright (C) SonarSource Sàrl |
| 4 | + * mailto:info AT sonarsource DOT com |
| 5 | + * |
| 6 | + * You can redistribute and/or modify this program under the terms of |
| 7 | + * the Sonar Source-Available License Version 1, as published by SonarSource Sàrl. |
| 8 | + * |
| 9 | + * This program is distributed in the hope that it will be useful, |
| 10 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 11 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. |
| 12 | + * See the Sonar Source-Available License for more details. |
| 13 | + * |
| 14 | + * You should have received a copy of the Sonar Source-Available License |
| 15 | + * along with this program; if not, see https://sonarsource.com/license/ssal/ |
| 16 | + */ |
| 17 | +package org.sonar.python.checks; |
| 18 | + |
| 19 | +import java.util.List; |
| 20 | +import java.util.Optional; |
| 21 | +import org.sonar.check.Rule; |
| 22 | +import org.sonar.plugins.python.api.PythonSubscriptionCheck; |
| 23 | +import org.sonar.plugins.python.api.SubscriptionContext; |
| 24 | +import org.sonar.plugins.python.api.quickfix.PythonQuickFix; |
| 25 | +import org.sonar.plugins.python.api.symbols.v2.SymbolV2; |
| 26 | +import org.sonar.plugins.python.api.symbols.v2.UsageV2; |
| 27 | +import org.sonar.plugins.python.api.tree.ArgList; |
| 28 | +import org.sonar.plugins.python.api.tree.CallExpression; |
| 29 | +import org.sonar.plugins.python.api.tree.ComprehensionFor; |
| 30 | +import org.sonar.plugins.python.api.tree.Expression; |
| 31 | +import org.sonar.plugins.python.api.tree.ForStatement; |
| 32 | +import org.sonar.plugins.python.api.tree.Name; |
| 33 | +import org.sonar.plugins.python.api.tree.RegularArgument; |
| 34 | +import org.sonar.plugins.python.api.tree.Tree; |
| 35 | +import org.sonar.plugins.python.api.types.v2.matchers.TypeMatcher; |
| 36 | +import org.sonar.plugins.python.api.types.v2.matchers.TypeMatchers; |
| 37 | +import org.sonar.python.quickfix.TextEditUtils; |
| 38 | +import org.sonar.python.tree.TreeUtils; |
| 39 | + |
| 40 | +@Rule(key = "S8516") |
| 41 | +public class GroupByIteratorReuseCheck extends PythonSubscriptionCheck { |
| 42 | + |
| 43 | + private static final String MESSAGE = "Convert this group iterator to a list."; |
| 44 | + private static final String QUICK_FIX_MESSAGE = "Wrap with \"list()\""; |
| 45 | + |
| 46 | + private static final TypeMatcher GROUPBY_MATCHER = TypeMatchers.isType("itertools.groupby"); |
| 47 | + |
| 48 | + // SAFE_CONSUMER_MATCHER and CLASS_MATCHER are matched leniently (TRUE or UNKNOWN both pass) so |
| 49 | + // unresolved callees don't trigger false positives. |
| 50 | + private static final TypeMatcher SAFE_CONSUMER_MATCHER = TypeMatchers.any( |
| 51 | + TypeMatchers.isType("list"), |
| 52 | + TypeMatchers.isType("tuple"), |
| 53 | + TypeMatchers.isType("set"), |
| 54 | + TypeMatchers.isType("frozenset"), |
| 55 | + TypeMatchers.isType("sorted"), |
| 56 | + TypeMatchers.isType("sum"), |
| 57 | + TypeMatchers.isType("max"), |
| 58 | + TypeMatchers.isType("min"), |
| 59 | + TypeMatchers.isType("any"), |
| 60 | + TypeMatchers.isType("all") |
| 61 | + ); |
| 62 | + |
| 63 | + // Any class constructor that accepts an iterable invariably materializes it inside __init__ |
| 64 | + private static final TypeMatcher CLASS_MATCHER = TypeMatchers.isObjectOfType("type"); |
| 65 | + |
| 66 | + @Override |
| 67 | + public void initialize(Context context) { |
| 68 | + context.registerSyntaxNodeConsumer(Tree.Kind.FOR_STMT, GroupByIteratorReuseCheck::checkForStatement); |
| 69 | + } |
| 70 | + |
| 71 | + private static void checkForStatement(SubscriptionContext ctx) { |
| 72 | + ForStatement forStatement = (ForStatement) ctx.syntaxNode(); |
| 73 | + |
| 74 | + if (forStatement.testExpressions().size() != 1) { |
| 75 | + return; |
| 76 | + } |
| 77 | + |
| 78 | + if (!(forStatement.testExpressions().get(0) instanceof CallExpression callExpr)) { |
| 79 | + return; |
| 80 | + } |
| 81 | + |
| 82 | + if (!GROUPBY_MATCHER.isTrueFor(callExpr.callee(), ctx)) { |
| 83 | + return; |
| 84 | + } |
| 85 | + |
| 86 | + if (forStatement.expressions().size() != 2) { |
| 87 | + return; |
| 88 | + } |
| 89 | + |
| 90 | + if (!(forStatement.expressions().get(1) instanceof Name groupName)) { |
| 91 | + return; |
| 92 | + } |
| 93 | + |
| 94 | + SymbolV2 groupSymbol = groupName.symbolV2(); |
| 95 | + if (groupSymbol == null) { |
| 96 | + return; |
| 97 | + } |
| 98 | + |
| 99 | + Tree loopBody = forStatement.body(); |
| 100 | + |
| 101 | + // If `group` is rebound anywhere in the loop body (e.g. `group = list(group)`), we can't |
| 102 | + // tell from the AST alone which reads see the original iterator. We conservatively skip. |
| 103 | + boolean isReboundInLoopBody = groupSymbol.usages().stream() |
| 104 | + .filter(usage -> usage.kind() == UsageV2.Kind.ASSIGNMENT_LHS) |
| 105 | + .map(UsageV2::tree) |
| 106 | + .flatMap(TreeUtils.toStreamInstanceOfMapper(Name.class)) |
| 107 | + .anyMatch(reboundName -> isInside(reboundName, loopBody)); |
| 108 | + if (isReboundInLoopBody) { |
| 109 | + return; |
| 110 | + } |
| 111 | + |
| 112 | + List<Name> loopBodyReads = groupSymbol.usages().stream() |
| 113 | + .filter(usage -> !usage.isBindingUsage()) |
| 114 | + .map(UsageV2::tree) |
| 115 | + .flatMap(TreeUtils.toStreamInstanceOfMapper(Name.class)) |
| 116 | + .filter(nameUsage -> isInside(nameUsage, loopBody)) |
| 117 | + .toList(); |
| 118 | + |
| 119 | + List<Name> unsafeReads = loopBodyReads.stream() |
| 120 | + .filter(nameUsage -> !isSafeUsage(nameUsage, forStatement, ctx)) |
| 121 | + .toList(); |
| 122 | + |
| 123 | + // The quickfix wraps a single occurrence in `list(...)`. We only attach it when there is |
| 124 | + // exactly one read of `group` in the loop body — this does not affecting any other consumer. |
| 125 | + boolean canOfferQuickFix = loopBodyReads.size() == 1 && unsafeReads.size() == 1; |
| 126 | + for (Name nameUsage : unsafeReads) { |
| 127 | + var issue = ctx.addIssue(nameUsage, MESSAGE); |
| 128 | + if (canOfferQuickFix) { |
| 129 | + PythonQuickFix quickFix = PythonQuickFix.newQuickFix(QUICK_FIX_MESSAGE) |
| 130 | + .addTextEdit(TextEditUtils.insertBefore(nameUsage, "list(")) |
| 131 | + .addTextEdit(TextEditUtils.insertAfter(nameUsage, ")")) |
| 132 | + .build(); |
| 133 | + issue.addQuickFix(quickFix); |
| 134 | + } |
| 135 | + } |
| 136 | + } |
| 137 | + |
| 138 | + private static boolean isSafeUsage(Name nameUsage, ForStatement enclosingForStatement, SubscriptionContext ctx) { |
| 139 | + // A usage inside a nested function or lambda defined in the loop body is always unsafe |
| 140 | + if (isInsideNestedFunctionOrLambda(nameUsage, enclosingForStatement)) { |
| 141 | + return false; |
| 142 | + } |
| 143 | + |
| 144 | + if (nameUsage.parent() instanceof ComprehensionFor compFor && compFor.iterable() == nameUsage) { |
| 145 | + return true; |
| 146 | + } |
| 147 | + |
| 148 | + if (nameUsage.parent() instanceof ForStatement nestedFor |
| 149 | + && nestedFor.testExpressions().stream().anyMatch(e -> e == nameUsage)) { |
| 150 | + return true; |
| 151 | + } |
| 152 | + |
| 153 | + if (nameUsage.parent() instanceof RegularArgument regularArg && regularArg.keywordArgument() == null) { |
| 154 | + return hasSafeConsumerAncestor(regularArg, ctx); |
| 155 | + } |
| 156 | + |
| 157 | + return false; |
| 158 | + } |
| 159 | + |
| 160 | + // We raise when the group iterator escapes the current iteration and is read after the outer |
| 161 | + // `groupby` advances. A positional-arg call chain ending in a safe consumer cannot escape. |
| 162 | + private static boolean hasSafeConsumerAncestor(RegularArgument regularArg, SubscriptionContext ctx) { |
| 163 | + Optional<CallExpression> currentCall = owningCall(regularArg); |
| 164 | + while (currentCall.isPresent()) { |
| 165 | + CallExpression call = currentCall.get(); |
| 166 | + if (isSafeConsumerCallee(call.callee(), ctx)) { |
| 167 | + return true; |
| 168 | + } |
| 169 | + if (!(call.parent() instanceof RegularArgument outerArg) || outerArg.keywordArgument() != null) { |
| 170 | + return false; |
| 171 | + } |
| 172 | + currentCall = owningCall(outerArg); |
| 173 | + } |
| 174 | + return false; |
| 175 | + } |
| 176 | + |
| 177 | + private static boolean isSafeConsumerCallee(Expression callee, SubscriptionContext ctx) { |
| 178 | + return !SAFE_CONSUMER_MATCHER.evaluateFor(callee, ctx).isFalse() |
| 179 | + || !CLASS_MATCHER.evaluateFor(callee, ctx).isFalse(); |
| 180 | + } |
| 181 | + |
| 182 | + private static boolean isInsideNestedFunctionOrLambda(Name nameUsage, ForStatement enclosingForStatement) { |
| 183 | + Tree functionLikeAncestor = TreeUtils.firstAncestorOfKind(nameUsage, Tree.Kind.FUNCDEF, Tree.Kind.LAMBDA); |
| 184 | + return functionLikeAncestor != null |
| 185 | + && isInside(functionLikeAncestor, enclosingForStatement.body()); |
| 186 | + } |
| 187 | + |
| 188 | + private static boolean isInside(Tree tree, Tree container) { |
| 189 | + return TreeUtils.firstAncestor(tree, ancestor -> ancestor == container) != null; |
| 190 | + } |
| 191 | + |
| 192 | + private static Optional<CallExpression> owningCall(RegularArgument regularArg) { |
| 193 | + if (regularArg.parent() instanceof ArgList argList && argList.parent() instanceof CallExpression callExpr) { |
| 194 | + return Optional.of(callExpr); |
| 195 | + } |
| 196 | + return Optional.empty(); |
| 197 | + } |
| 198 | +} |
0 commit comments