1818
1919import java .util .List ;
2020import java .util .Optional ;
21+ import java .util .Set ;
22+ import java .util .function .Predicate ;
23+ import java .util .stream .Stream ;
2124import org .sonar .check .Rule ;
2225import org .sonar .plugins .python .api .PythonSubscriptionCheck ;
2326import org .sonar .plugins .python .api .SubscriptionContext ;
2427import org .sonar .plugins .python .api .quickfix .PythonQuickFix ;
2528import org .sonar .plugins .python .api .symbols .v2 .SymbolV2 ;
2629import org .sonar .plugins .python .api .symbols .v2 .UsageV2 ;
2730import org .sonar .plugins .python .api .tree .ArgList ;
31+ import org .sonar .plugins .python .api .tree .AssignmentStatement ;
2832import org .sonar .plugins .python .api .tree .CallExpression ;
29- import org .sonar .plugins .python .api .tree .ComprehensionFor ;
3033import org .sonar .plugins .python .api .tree .Expression ;
3134import org .sonar .plugins .python .api .tree .ForStatement ;
3235import org .sonar .plugins .python .api .tree .Name ;
36+ import org .sonar .plugins .python .api .tree .QualifiedExpression ;
3337import org .sonar .plugins .python .api .tree .RegularArgument ;
3438import org .sonar .plugins .python .api .tree .Tree ;
39+ import org .sonar .plugins .python .api .tree .YieldExpression ;
40+ import org .sonar .plugins .python .api .tree .YieldStatement ;
3541import org .sonar .plugins .python .api .types .v2 .matchers .TypeMatcher ;
3642import org .sonar .plugins .python .api .types .v2 .matchers .TypeMatchers ;
3743import org .sonar .python .quickfix .TextEditUtils ;
4046@ Rule (key = "S8516" )
4147public class GroupByIteratorReuseCheck extends PythonSubscriptionCheck {
4248
43- private static final String MESSAGE = "Convert this group iterator to a list ." ;
49+ private static final String MESSAGE = "Consume this group iterator inside the loop, or materialize it into a collection ." ;
4450 private static final String QUICK_FIX_MESSAGE = "Wrap with \" list()\" " ;
4551
4652 private static final TypeMatcher GROUPBY_MATCHER = TypeMatchers .isType ("itertools.groupby" );
4753
48- // SAFE_CONSUMER_MATCHER and CLASS_MATCHER are matched leniently (TRUE or UNKNOWN both pass) so
49- // unresolved callees don't trigger false positives.
5054 private static final TypeMatcher SAFE_CONSUMER_MATCHER = TypeMatchers .any (
5155 TypeMatchers .isType ("list" ),
5256 TypeMatchers .isType ("tuple" ),
@@ -57,11 +61,21 @@ public class GroupByIteratorReuseCheck extends PythonSubscriptionCheck {
5761 TypeMatchers .isType ("max" ),
5862 TypeMatchers .isType ("min" ),
5963 TypeMatchers .isType ("any" ),
60- TypeMatchers .isType ("all" )
64+ TypeMatchers .isType ("all" ),
65+ TypeMatchers .isType ("next" ),
66+ TypeMatchers .isType ("len" ),
67+ TypeMatchers .isType ("str.join" ),
68+ TypeMatchers .isType ("bytes.join" )
6169 );
6270
63- // Any class constructor that accepts an iterable invariably materializes it inside __init__
64- private static final TypeMatcher CLASS_MATCHER = TypeMatchers .isObjectOfType ("type" );
71+ // Matches class objects produced at runtime via `type(...)` (e.g. `Cls = type(obj); Cls(group)`).
72+ // Direct class references (`MyClass`) are NOT matched here
73+ private static final TypeMatcher RUNTIME_CLASS_OBJECT_MATCHER = TypeMatchers .isObjectOfType ("type" );
74+
75+ // Container methods that store their argument *as a single element* without iterating it.
76+ private static final Set <String > STORING_METHOD_NAMES = Set .of (
77+ "append" , "add" , "setdefault"
78+ );
6579
6680 @ Override
6781 public void initialize (Context context ) {
@@ -70,58 +84,33 @@ public void initialize(Context context) {
7084
7185 private static void checkForStatement (SubscriptionContext ctx ) {
7286 ForStatement forStatement = (ForStatement ) ctx .syntaxNode ();
73-
74- if (forStatement .testExpressions ().size () != 1 ) {
75- return ;
76- }
77-
78- if (!(forStatement .testExpressions ().get (0 ) instanceof CallExpression callExpr )) {
87+ Name groupName = extractGroupByLoopVariable (forStatement , ctx ).orElse (null );
88+ if (groupName == null ) {
7989 return ;
8090 }
81-
82- if (!GROUPBY_MATCHER .isTrueFor (callExpr .callee (), ctx )) {
83- return ;
84- }
85-
86- if (forStatement .expressions ().size () != 2 ) {
87- return ;
88- }
89-
90- if (!(forStatement .expressions ().get (1 ) instanceof Name groupName )) {
91- return ;
92- }
93-
9491 SymbolV2 groupSymbol = groupName .symbolV2 ();
9592 if (groupSymbol == null ) {
9693 return ;
9794 }
9895
9996 Tree loopBody = forStatement .body ();
10097
101- // If `group` is rebound anywhere in the loop body (e.g. `group = list(group)`), we can't
102- // tell from the AST alone which reads see the original iterator. We conservatively skip.
103- boolean isReboundInLoopBody = groupSymbol .usages ().stream ()
104- .filter (usage -> usage .kind () == UsageV2 .Kind .ASSIGNMENT_LHS )
105- .map (UsageV2 ::tree )
106- .flatMap (TreeUtils .toStreamInstanceOfMapper (Name .class ))
107- .anyMatch (reboundName -> isInside (reboundName , loopBody ));
98+ // Bail on any rebinding of `group` in the body to avoid requiring a CFG
99+ boolean isReboundInLoopBody = namesInLoopBody (groupSymbol , loopBody ,
100+ usage -> usage .kind () == UsageV2 .Kind .ASSIGNMENT_LHS ).findAny ().isPresent ();
108101 if (isReboundInLoopBody ) {
109102 return ;
110103 }
111104
112- List <Name > loopBodyReads = groupSymbol .usages ().stream ()
113- .filter (usage -> !usage .isBindingUsage ())
114- .map (UsageV2 ::tree )
115- .flatMap (TreeUtils .toStreamInstanceOfMapper (Name .class ))
116- .filter (nameUsage -> isInside (nameUsage , loopBody ))
117- .toList ();
105+ List <Name > loopBodyReads = namesInLoopBody (groupSymbol , loopBody ,
106+ usage -> !usage .isBindingUsage ()).toList ();
118107
119108 List <Name > unsafeReads = loopBodyReads .stream ()
120- .filter (nameUsage -> ! isSafeUsage (nameUsage , forStatement , ctx ))
109+ .filter (nameUsage -> isUnsafeRead (nameUsage , forStatement , ctx ))
121110 .toList ();
122111
123- // The quickfix wraps a single occurrence in `list(...)`. We only attach it when there is
124- // exactly one read of `group` in the loop body — this does not affecting any other consumer .
112+ // Quickfix only when there is a single read in the body: wrapping `group` in `list()`
113+ // consumes the iterator and would silently break any other read .
125114 boolean canOfferQuickFix = loopBodyReads .size () == 1 && unsafeReads .size () == 1 ;
126115 for (Name nameUsage : unsafeReads ) {
127116 var issue = ctx .addIssue (nameUsage , MESSAGE );
@@ -135,54 +124,80 @@ private static void checkForStatement(SubscriptionContext ctx) {
135124 }
136125 }
137126
138- private static boolean isSafeUsage (Name nameUsage , ForStatement enclosingForStatement , SubscriptionContext ctx ) {
139- // A usage inside a nested function or lambda defined in the loop body is always unsafe
140- if (isInsideNestedFunctionOrLambda (nameUsage , enclosingForStatement )) {
141- return false ;
127+ // Matches `for key, group in groupby(...):` and returns the `group` name
128+ private static Optional <Name > extractGroupByLoopVariable (ForStatement forStatement , SubscriptionContext ctx ) {
129+ if (forStatement .testExpressions ().size () != 1
130+ || !(forStatement .testExpressions ().get (0 ) instanceof CallExpression callExpr )
131+ || !GROUPBY_MATCHER .isTrueFor (callExpr .callee (), ctx )
132+ || forStatement .expressions ().size () != 2
133+ || !(forStatement .expressions ().get (1 ) instanceof Name groupName )) {
134+ return Optional .empty ();
142135 }
136+ return Optional .of (groupName );
137+ }
143138
144- if (nameUsage .parent () instanceof ComprehensionFor compFor && compFor .iterable () == nameUsage ) {
139+ // Recognized escape sinks: lambda/nested-function capture, assignment rvalue, yield, and
140+ // positional argument of a known storing-method. Anything else is treated as safe.
141+ private static boolean isUnsafeRead (Name nameUsage , ForStatement enclosingForStatement , SubscriptionContext ctx ) {
142+ if (isCapturedByNestedFunctionOrLambda (nameUsage , enclosingForStatement )) {
145143 return true ;
146144 }
145+ return reachesSink (nameUsage , ctx );
146+ }
147147
148- if (nameUsage .parent () instanceof ForStatement nestedFor
149- && nestedFor .testExpressions ().stream ().anyMatch (e -> e == nameUsage )) {
148+ private static boolean reachesSink (Expression expression , SubscriptionContext ctx ) {
149+ Tree parent = expression .parent ();
150+ if (parent instanceof AssignmentStatement assign && assign .assignedValue () == expression ) {
150151 return true ;
151152 }
152-
153- if (nameUsage .parent () instanceof RegularArgument regularArg && regularArg .keywordArgument () == null ) {
154- return hasSafeConsumerAncestor (regularArg , ctx );
153+ if (parent instanceof YieldExpression || parent instanceof YieldStatement ) {
154+ return true ;
155+ }
156+ // Keyword arguments are skipped (treated as safe): mapping them to the callee's parameter would
157+ // require signature resolution, and iterators are overwhelmingly passed positionally in practice.
158+ if (parent instanceof RegularArgument regularArg && regularArg .keywordArgument () == null ) {
159+ return chainReachesSink (regularArg , ctx );
155160 }
156-
157161 return false ;
158162 }
159163
160- // We raise when the group iterator escapes the current iteration and is read after the outer
161- // `groupby` advances. A positional-arg call chain ending in a safe consumer cannot escape.
162- private static boolean hasSafeConsumerAncestor (RegularArgument regularArg , SubscriptionContext ctx ) {
163- Optional <CallExpression > currentCall = owningCall (regularArg );
164- while (currentCall .isPresent ()) {
165- CallExpression call = currentCall .get ();
166- if (isSafeConsumerCallee (call .callee (), ctx )) {
167- return true ;
168- }
169- if (!(call .parent () instanceof RegularArgument outerArg ) || outerArg .keywordArgument () != null ) {
170- return false ;
171- }
172- currentCall = owningCall (outerArg );
164+ private static boolean chainReachesSink (RegularArgument arg , SubscriptionContext ctx ) {
165+ CallExpression call = owningCall (arg ).orElse (null );
166+ if (call == null ) {
167+ return false ;
173168 }
174- return false ;
169+ if (isSafeConsumerCallee (call .callee (), ctx )) {
170+ return false ;
171+ }
172+ if (isStoringMethodCall (call )) {
173+ return true ;
174+ }
175+ return reachesSink (call , ctx );
175176 }
176177
177178 private static boolean isSafeConsumerCallee (Expression callee , SubscriptionContext ctx ) {
178179 return !SAFE_CONSUMER_MATCHER .evaluateFor (callee , ctx ).isFalse ()
179- || !CLASS_MATCHER .evaluateFor (callee , ctx ).isFalse ();
180+ || !RUNTIME_CLASS_OBJECT_MATCHER .evaluateFor (callee , ctx ).isFalse ();
181+ }
182+
183+ // Name-only on purpose: gating on the receiver type would silently miss the case where the
184+ // receiver's type cannot be resolved. Middle ground between FP risk and raising actual issues.
185+ private static boolean isStoringMethodCall (CallExpression call ) {
186+ return call .callee () instanceof QualifiedExpression qualified
187+ && STORING_METHOD_NAMES .contains (qualified .name ().name ());
180188 }
181189
182- private static boolean isInsideNestedFunctionOrLambda (Name nameUsage , ForStatement enclosingForStatement ) {
190+ private static boolean isCapturedByNestedFunctionOrLambda (Name nameUsage , ForStatement enclosingForStatement ) {
183191 Tree functionLikeAncestor = TreeUtils .firstAncestorOfKind (nameUsage , Tree .Kind .FUNCDEF , Tree .Kind .LAMBDA );
184- return functionLikeAncestor != null
185- && isInside (functionLikeAncestor , enclosingForStatement .body ());
192+ return functionLikeAncestor != null && isInside (functionLikeAncestor , enclosingForStatement .body ());
193+ }
194+
195+ private static Stream <Name > namesInLoopBody (SymbolV2 symbol , Tree loopBody , Predicate <UsageV2 > usageFilter ) {
196+ return symbol .usages ().stream ()
197+ .filter (usageFilter )
198+ .map (UsageV2 ::tree )
199+ .flatMap (TreeUtils .toStreamInstanceOfMapper (Name .class ))
200+ .filter (name -> isInside (name , loopBody ));
186201 }
187202
188203 private static boolean isInside (Tree tree , Tree container ) {
0 commit comments