Skip to content

Commit c2c96b9

Browse files
committed
Python: Use global data-flow for ContainsNonContainer.ql
Replaces the `classTracker`-based approach with one based on global data-flow. To make it easy to share across queries, this is implemented as a parameterised module. The data-flow configuration itself keeps track of two flow states: whether we're tracking a reference to a class or a reference to an instance.
1 parent a2ec5b3 commit c2c96b9

File tree

3 files changed

+169
-59
lines changed

3 files changed

+169
-59
lines changed
Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
/**
2+
* Provides a reusable data-flow configuration for tracking class instances
3+
* through global data-flow with full path support.
4+
*
5+
* This module is designed for quality queries that check whether instances
6+
* of certain classes reach operations that require a specific interface
7+
* (e.g., `__contains__`, `__iter__`, `__hash__`).
8+
*
9+
* The configuration uses two flow states:
10+
* - `TrackingClass`: tracking a reference to the class itself
11+
* - `TrackingInstance`: tracking an instance of the class
12+
*
13+
* At instantiation points (e.g., `cls()`), the state transitions from
14+
* `TrackingClass` to `TrackingInstance`. Sinks are only matched in the
15+
* `TrackingInstance` state.
16+
*/
17+
18+
private import python
19+
import semmle.python.dataflow.new.DataFlow
20+
private import semmle.python.dataflow.new.internal.DataFlowDispatch
21+
private import semmle.python.ApiGraphs
22+
23+
/** A flow state for tracking class references and their instances. */
24+
abstract class ClassInstanceFlowState extends string {
25+
bindingset[this]
26+
ClassInstanceFlowState() { any() }
27+
}
28+
29+
/** A state signifying that the tracked value is a reference to the class itself. */
30+
class TrackingClass extends ClassInstanceFlowState {
31+
TrackingClass() { this = "TrackingClass" }
32+
}
33+
34+
/** A state signifying that the tracked value is an instance of the class. */
35+
class TrackingInstance extends ClassInstanceFlowState {
36+
TrackingInstance() { this = "TrackingInstance" }
37+
}
38+
39+
/**
40+
* Signature module for parameterizing `ClassInstanceFlow` per query.
41+
*/
42+
signature module ClassInstanceFlowSig {
43+
/** Holds if `cls` is a class whose instances should be tracked to sinks. */
44+
predicate isRelevantClass(Class cls);
45+
46+
/** Holds if `sink` is a location where reaching instances indicate a violation. */
47+
predicate isInstanceSink(DataFlow::Node sink);
48+
49+
/**
50+
* Holds if an `isinstance` check against `checkedType` should act as a barrier,
51+
* suppressing alerts when the instance has been verified to have the expected interface.
52+
*/
53+
predicate isGuardType(DataFlow::Node checkedType);
54+
}
55+
56+
/**
57+
* Constructs a global data-flow configuration for tracking instances of
58+
* relevant classes from their definition to violation sinks.
59+
*/
60+
module ClassInstanceFlow<ClassInstanceFlowSig Sig> {
61+
/**
62+
* Holds if `guard` is an `isinstance` call checking `node` against a type
63+
* that should suppress the alert.
64+
*/
65+
private predicate isinstanceGuard(DataFlow::GuardNode guard, ControlFlowNode node, boolean branch) {
66+
exists(DataFlow::CallCfgNode isinstance_call |
67+
isinstance_call = API::builtin("isinstance").getACall() and
68+
isinstance_call.getArg(0).asCfgNode() = node and
69+
(
70+
Sig::isGuardType(isinstance_call.getArg(1))
71+
or
72+
// Also handle tuples of types: isinstance(x, (T1, T2))
73+
Sig::isGuardType(DataFlow::exprNode(isinstance_call.getArg(1).asExpr().(Tuple).getAnElt()))
74+
) and
75+
guard = isinstance_call.asCfgNode() and
76+
branch = true
77+
)
78+
}
79+
80+
private module Config implements DataFlow::StateConfigSig {
81+
class FlowState = ClassInstanceFlowState;
82+
83+
predicate isSource(DataFlow::Node source, FlowState state) {
84+
exists(ClassExpr ce |
85+
Sig::isRelevantClass(ce.getInnerScope()) and
86+
source.asExpr() = ce and
87+
state instanceof TrackingClass
88+
)
89+
}
90+
91+
predicate isSink(DataFlow::Node sink, FlowState state) {
92+
Sig::isInstanceSink(sink) and
93+
state instanceof TrackingInstance
94+
}
95+
96+
predicate isBarrier(DataFlow::Node node) {
97+
node = DataFlow::BarrierGuard<isinstanceGuard/3>::getABarrierNode()
98+
}
99+
100+
predicate isAdditionalFlowStep(
101+
DataFlow::Node nodeFrom, FlowState stateFrom, DataFlow::Node nodeTo, FlowState stateTo
102+
) {
103+
// Instantiation: class reference at the call function position
104+
// flows to the call result as an instance.
105+
stateFrom instanceof TrackingClass and
106+
stateTo instanceof TrackingInstance and
107+
exists(CallNode call |
108+
nodeFrom.asCfgNode() = call.getFunction() and
109+
nodeTo.asCfgNode() = call and
110+
// Exclude decorator applications, where the result is a proxy
111+
// rather than a typical instance.
112+
not call.getNode() = any(FunctionExpr fe).getADecoratorCall()
113+
)
114+
}
115+
}
116+
117+
module Flow = DataFlow::GlobalWithState<Config>;
118+
}
Lines changed: 29 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
/**
22
* @name Membership test with a non-container
33
* @description A membership test, such as 'item in sequence', with a non-container on the right hand side will raise a 'TypeError'.
4-
* @kind problem
4+
* @kind path-problem
55
* @tags quality
66
* reliability
77
* correctness
@@ -14,6 +14,7 @@
1414
import python
1515
import semmle.python.dataflow.new.DataFlow
1616
private import semmle.python.dataflow.new.internal.DataFlowDispatch
17+
private import semmle.python.dataflow.new.internal.ClassInstanceFlow
1718
private import semmle.python.ApiGraphs
1819

1920
predicate rhs_in_expr(Expr rhs, Compare cmp) {
@@ -22,65 +23,36 @@ predicate rhs_in_expr(Expr rhs, Compare cmp) {
2223
)
2324
}
2425

25-
/**
26-
* Holds if `origin` is the result of applying a class as a decorator to a function.
27-
* Such decorator classes act as proxies, and the runtime value of the decorated
28-
* attribute may be of a different type than the decorator class itself.
29-
*/
30-
predicate isDecoratorApplication(DataFlow::LocalSourceNode origin) {
31-
exists(FunctionExpr fe | origin.asExpr() = fe.getADecoratorCall())
32-
}
26+
module ContainsNonContainerSig implements ClassInstanceFlowSig {
27+
predicate isRelevantClass(Class cls) {
28+
not DuckTyping::isContainer(cls) and
29+
not DuckTyping::hasUnresolvedBase(getADirectSuperclass*(cls)) and
30+
not exists(CallNode setattr_call |
31+
setattr_call.getFunction().(NameNode).getId() = "setattr" and
32+
setattr_call.getArg(0).(NameNode).getId() = cls.getName() and
33+
setattr_call.getScope() = cls.getScope()
34+
)
35+
}
3336

34-
/**
35-
* Holds if `cls` has methods dynamically added via `setattr`, so we cannot
36-
* statically determine its full interface.
37-
*/
38-
predicate hasDynamicMethods(Class cls) {
39-
exists(CallNode setattr_call |
40-
setattr_call.getFunction().(NameNode).getId() = "setattr" and
41-
setattr_call.getArg(0).(NameNode).getId() = cls.getName() and
42-
setattr_call.getScope() = cls.getScope()
43-
)
44-
}
37+
predicate isInstanceSink(DataFlow::Node sink) { rhs_in_expr(sink.asExpr(), _) }
4538

46-
/**
47-
* Holds if `cls_arg` references a known container builtin type, either directly
48-
* or as an element of a tuple.
49-
*/
50-
private predicate isContainerTypeArg(DataFlow::Node cls_arg) {
51-
cls_arg =
52-
API::builtin(["list", "tuple", "set", "frozenset", "dict", "str", "bytes", "bytearray"])
53-
.getAValueReachableFromSource()
54-
or
55-
isContainerTypeArg(DataFlow::exprNode(cls_arg.asExpr().(Tuple).getAnElt()))
39+
predicate isGuardType(DataFlow::Node checkedType) {
40+
checkedType =
41+
API::builtin(["list", "tuple", "set", "frozenset", "dict", "str", "bytes", "bytearray"])
42+
.getAValueReachableFromSource()
43+
}
5644
}
5745

58-
/**
59-
* Holds if `rhs` is guarded by an `isinstance` check that tests for
60-
* a container type.
61-
*/
62-
predicate guardedByIsinstanceContainer(DataFlow::Node rhs) {
63-
exists(
64-
DataFlow::GuardNode guard, DataFlow::CallCfgNode isinstance_call, DataFlow::LocalSourceNode src
65-
|
66-
isinstance_call = API::builtin("isinstance").getACall() and
67-
src.flowsTo(isinstance_call.getArg(0)) and
68-
src.flowsTo(rhs) and
69-
isContainerTypeArg(isinstance_call.getArg(1)) and
70-
guard = isinstance_call.asCfgNode() and
71-
guard.controlsBlock(rhs.asCfgNode().getBasicBlock(), true)
72-
)
73-
}
46+
module ContainsNonContainerFlow = ClassInstanceFlow<ContainsNonContainerSig>;
47+
48+
import ContainsNonContainerFlow::Flow::PathGraph
7449

75-
from Compare cmp, DataFlow::LocalSourceNode origin, DataFlow::Node rhs, Class cls
50+
from
51+
ContainsNonContainerFlow::Flow::PathNode source, ContainsNonContainerFlow::Flow::PathNode sink,
52+
ClassExpr ce
7653
where
77-
origin = classInstanceTracker(cls) and
78-
origin.flowsTo(rhs) and
79-
not DuckTyping::isContainer(cls) and
80-
not DuckTyping::hasUnresolvedBase(getADirectSuperclass*(cls)) and
81-
not isDecoratorApplication(origin) and
82-
not hasDynamicMethods(cls) and
83-
not guardedByIsinstanceContainer(rhs) and
84-
rhs_in_expr(rhs.asExpr(), cmp)
85-
select cmp, "This test may raise an Exception as the $@ may be of non-container class $@.", origin,
86-
"target", cls, cls.getName()
54+
ContainsNonContainerFlow::Flow::flowPath(source, sink) and
55+
source.getNode().asExpr() = ce
56+
select sink.getNode(), source, sink,
57+
"This test may raise an Exception as the $@ may be of non-container class $@.", source.getNode(),
58+
"target", ce.getInnerScope(), ce.getInnerScope().getName()
Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,22 @@
1-
| expressions_test.py:89:8:89:15 | Compare | This test may raise an Exception as the $@ may be of non-container class $@. | expressions_test.py:88:11:88:17 | ControlFlowNode for XIter() | target | expressions_test.py:77:1:77:20 | Class XIter | XIter |
2-
| expressions_test.py:91:8:91:19 | Compare | This test may raise an Exception as the $@ may be of non-container class $@. | expressions_test.py:88:11:88:17 | ControlFlowNode for XIter() | target | expressions_test.py:77:1:77:20 | Class XIter | XIter |
1+
edges
2+
| expressions_test.py:77:1:77:20 | ControlFlowNode for ClassExpr | expressions_test.py:77:7:77:11 | ControlFlowNode for XIter | provenance | |
3+
| expressions_test.py:77:7:77:11 | ControlFlowNode for XIter | expressions_test.py:88:11:88:15 | ControlFlowNode for XIter | provenance | |
4+
| expressions_test.py:88:5:88:7 | ControlFlowNode for seq | expressions_test.py:89:13:89:15 | ControlFlowNode for seq | provenance | |
5+
| expressions_test.py:88:5:88:7 | ControlFlowNode for seq | expressions_test.py:91:17:91:19 | ControlFlowNode for seq | provenance | |
6+
| expressions_test.py:88:5:88:7 | ControlFlowNode for seq | expressions_test.py:91:17:91:19 | ControlFlowNode for seq | provenance | |
7+
| expressions_test.py:88:11:88:15 | ControlFlowNode for XIter | expressions_test.py:88:11:88:17 | ControlFlowNode for XIter() | provenance | Config |
8+
| expressions_test.py:88:11:88:17 | ControlFlowNode for XIter() | expressions_test.py:88:5:88:7 | ControlFlowNode for seq | provenance | |
9+
nodes
10+
| expressions_test.py:77:1:77:20 | ControlFlowNode for ClassExpr | semmle.label | ControlFlowNode for ClassExpr |
11+
| expressions_test.py:77:7:77:11 | ControlFlowNode for XIter | semmle.label | ControlFlowNode for XIter |
12+
| expressions_test.py:88:5:88:7 | ControlFlowNode for seq | semmle.label | ControlFlowNode for seq |
13+
| expressions_test.py:88:11:88:15 | ControlFlowNode for XIter | semmle.label | ControlFlowNode for XIter |
14+
| expressions_test.py:88:11:88:17 | ControlFlowNode for XIter() | semmle.label | ControlFlowNode for XIter() |
15+
| expressions_test.py:89:13:89:15 | ControlFlowNode for seq | semmle.label | ControlFlowNode for seq |
16+
| expressions_test.py:91:17:91:19 | ControlFlowNode for seq | semmle.label | ControlFlowNode for seq |
17+
| expressions_test.py:91:17:91:19 | ControlFlowNode for seq | semmle.label | ControlFlowNode for seq |
18+
subpaths
19+
#select
20+
| expressions_test.py:89:13:89:15 | ControlFlowNode for seq | expressions_test.py:77:1:77:20 | ControlFlowNode for ClassExpr | expressions_test.py:89:13:89:15 | ControlFlowNode for seq | This test may raise an Exception as the $@ may be of non-container class $@. | expressions_test.py:77:1:77:20 | ControlFlowNode for ClassExpr | target | expressions_test.py:77:1:77:20 | Class XIter | XIter |
21+
| expressions_test.py:91:17:91:19 | ControlFlowNode for seq | expressions_test.py:77:1:77:20 | ControlFlowNode for ClassExpr | expressions_test.py:91:17:91:19 | ControlFlowNode for seq | This test may raise an Exception as the $@ may be of non-container class $@. | expressions_test.py:77:1:77:20 | ControlFlowNode for ClassExpr | target | expressions_test.py:77:1:77:20 | Class XIter | XIter |
22+
| expressions_test.py:91:17:91:19 | ControlFlowNode for seq | expressions_test.py:77:1:77:20 | ControlFlowNode for ClassExpr | expressions_test.py:91:17:91:19 | ControlFlowNode for seq | This test may raise an Exception as the $@ may be of non-container class $@. | expressions_test.py:77:1:77:20 | ControlFlowNode for ClassExpr | target | expressions_test.py:77:1:77:20 | Class XIter | XIter |

0 commit comments

Comments
 (0)