Skip to content

Commit caa680c

Browse files
authored
Merge pull request #4149 from RasmusWL/python-more-additional-taint-steps
Python: more additional taint steps
2 parents b9a6183 + c5e3333 commit caa680c

17 files changed

Lines changed: 813 additions & 123 deletions

File tree

python/ql/src/experimental/dataflow/internal/TaintTrackingPrivate.qll

Lines changed: 104 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,16 @@ predicate localAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeT
3030
subscriptStep(nodeFrom, nodeTo)
3131
or
3232
stringManipulation(nodeFrom, nodeTo)
33+
or
34+
jsonStep(nodeFrom, nodeTo)
35+
or
36+
containerStep(nodeFrom, nodeTo)
37+
or
38+
copyStep(nodeFrom, nodeTo)
39+
or
40+
forStep(nodeFrom, nodeTo)
41+
or
42+
unpackingAssignmentStep(nodeFrom, nodeTo)
3343
}
3444

3545
/**
@@ -118,8 +128,101 @@ predicate stringManipulation(DataFlow::CfgNode nodeFrom, DataFlow::CfgNode nodeT
118128
)
119129
or
120130
// f-strings
121-
nodeTo.getNode().getNode().(Fstring).getAValue() = nodeFrom.getNode().getNode()
131+
nodeTo.asExpr().(Fstring).getAValue() = nodeFrom.asExpr()
122132
// TODO: Handle encode/decode from base64/quopri
123133
// TODO: Handle os.path.join
124134
// TODO: Handle functions in https://docs.python.org/3/library/binascii.html
125135
}
136+
137+
/**
138+
* Holds if taint can flow from `nodeFrom` to `nodeTo` with a step related to JSON encoding/decoding.
139+
*/
140+
predicate jsonStep(DataFlow::CfgNode nodeFrom, DataFlow::CfgNode nodeTo) {
141+
exists(CallNode call | call = nodeTo.getNode() |
142+
call.getFunction().(AttrNode).getObject(["load", "loads", "dumps"]).(NameNode).getId() = "json" and
143+
call.getArg(0) = nodeFrom.getNode()
144+
)
145+
}
146+
147+
/**
148+
* Holds if taint can flow from `nodeFrom` to `nodeTo` with a step related to containers
149+
* (lists/sets/dictionaries): literals, constructor invocation, methods. Note that this
150+
* is currently very imprecise, as an example, since we model `dict.get`, we treat any
151+
* `<tainted object>.get(<arg>)` will be tainted, whether it's true or not.
152+
*/
153+
predicate containerStep(DataFlow::CfgNode nodeFrom, DataFlow::Node nodeTo) {
154+
// construction by literal
155+
// TODO: Not limiting the content argument here feels like a BIG hack, but we currently get nothing for free :|
156+
storeStep(nodeFrom, _, nodeTo)
157+
or
158+
// constructor call
159+
exists(CallNode call | call = nodeTo.asCfgNode() |
160+
call.getFunction().(NameNode).getId() in ["list", "set", "frozenset", "dict", "defaultdict",
161+
"tuple"] and
162+
call.getArg(0) = nodeFrom.getNode()
163+
)
164+
or
165+
// functions operating on collections
166+
exists(CallNode call | call = nodeTo.asCfgNode() |
167+
call.getFunction().(NameNode).getId() in ["sorted", "reversed", "iter", "next"] and
168+
call.getArg(0) = nodeFrom.getNode()
169+
)
170+
or
171+
// methods
172+
exists(CallNode call, string name | call = nodeTo.asCfgNode() |
173+
name in ["copy",
174+
// general
175+
"pop",
176+
// dict
177+
"values", "items", "get", "popitem"] and
178+
call.getFunction().(AttrNode).getObject(name) = nodeFrom.asCfgNode()
179+
)
180+
or
181+
// list.append, set.add
182+
exists(CallNode call, string name |
183+
name in ["append", "add"] and
184+
call.getFunction().(AttrNode).getObject(name) =
185+
nodeTo.(PostUpdateNode).getPreUpdateNode().asCfgNode() and
186+
call.getArg(0) = nodeFrom.getNode()
187+
)
188+
}
189+
190+
/**
191+
* Holds if taint can flow from `nodeFrom` to `nodeTo` with a step related to copying.
192+
*/
193+
predicate copyStep(DataFlow::CfgNode nodeFrom, DataFlow::CfgNode nodeTo) {
194+
exists(CallNode call | call = nodeTo.getNode() |
195+
// Fully qualified: copy.copy, copy.deepcopy
196+
(
197+
call.getFunction().(NameNode).getId() in ["copy", "deepcopy"]
198+
or
199+
call.getFunction().(AttrNode).getObject(["copy", "deepcopy"]).(NameNode).getId() = "copy"
200+
) and
201+
call.getArg(0) = nodeFrom.getNode()
202+
)
203+
}
204+
205+
/**
206+
* Holds if taint can flow from `nodeFrom` to `nodeTo` with a step related to `for`-iteration,
207+
* for example `for x in xs`, or `for x,y in points`.
208+
*/
209+
predicate forStep(DataFlow::CfgNode nodeFrom, DataFlow::EssaNode nodeTo) {
210+
exists(EssaNodeDefinition defn, For for |
211+
for.getTarget().getAChildNode*() = defn.getDefiningNode().getNode() and
212+
nodeTo.getVar() = defn and
213+
nodeFrom.asExpr() = for.getIter()
214+
)
215+
}
216+
217+
/**
218+
* Holds if taint can flow from `nodeFrom` to `nodeTo` with a step related to iterable unpacking.
219+
* Only handles normal assignment (`x,y = calc_point()`), since `for x,y in points` is handled by `forStep`.
220+
*/
221+
predicate unpackingAssignmentStep(DataFlow::CfgNode nodeFrom, DataFlow::EssaNode nodeTo) {
222+
// `a, b = myiterable` or `head, *tail = myiterable` (only Python 3)
223+
exists(MultiAssignmentDefinition defn, Assign assign |
224+
assign.getATarget().contains(defn.getDefiningNode().getNode()) and
225+
nodeTo.getVar() = defn and
226+
nodeFrom.asExpr() = assign.getValue()
227+
)
228+
}

python/ql/test/experimental/dataflow/tainttracking/TestTaintLib.qll

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,8 @@ class TestTaintTrackingConfiguration extends TaintTracking::Configuration {
66
TestTaintTrackingConfiguration() { this = "TestTaintTrackingConfiguration" }
77

88
override predicate isSource(DataFlow::Node source) {
9-
source.(DataFlow::CfgNode).getNode().(NameNode).getId() in ["TAINTED_STRING", "TAINTED_BYTES"]
9+
source.(DataFlow::CfgNode).getNode().(NameNode).getId() in ["TAINTED_STRING", "TAINTED_BYTES",
10+
"TAINTED_LIST", "TAINTED_DICT"]
1011
}
1112

1213
override predicate isSink(DataFlow::Node sink) {
@@ -44,7 +45,8 @@ private string repr(Expr e) {
4445

4546
query predicate test_taint(string arg_location, string test_res, string function_name, string repr) {
4647
exists(Call call, Expr arg, boolean expected_taint, boolean has_taint |
47-
call.getLocation().getFile().getShortName() = "test.py" and
48+
// only consider files that are extracted as part of the test
49+
exists(call.getLocation().getFile().getRelativePath()) and
4850
(
4951
call.getFunc().(Name).getId() = "ensure_tainted" and
5052
expected_taint = true
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
| test_collections.py:16 | ok | test_access | tainted_list.copy() |
2+
| test_collections.py:24 | ok | list_clear | tainted_list |
3+
| test_collections.py:27 | fail | list_clear | tainted_list |
4+
| test_string.py:17 | ok | str_methods | ts.casefold() |
5+
| test_string.py:19 | ok | str_methods | ts.format_map(..) |
6+
| test_string.py:20 | ok | str_methods | "{unsafe}".format_map(..) |
7+
| test_string.py:31 | fail | binary_decode_encode | base64.a85encode(..) |
8+
| test_string.py:32 | fail | binary_decode_encode | base64.a85decode(..) |
9+
| test_string.py:35 | fail | binary_decode_encode | base64.b85encode(..) |
10+
| test_string.py:36 | fail | binary_decode_encode | base64.b85decode(..) |
11+
| test_string.py:39 | fail | binary_decode_encode | base64.encodebytes(..) |
12+
| test_string.py:40 | fail | binary_decode_encode | base64.decodebytes(..) |
13+
| test_string.py:48 | ok | f_strings | Fstring |
14+
| test_unpacking.py:18 | ok | extended_unpacking | first |
15+
| test_unpacking.py:18 | ok | extended_unpacking | last |
16+
| test_unpacking.py:18 | ok | extended_unpacking | rest |
17+
| test_unpacking.py:23 | ok | also_allowed | a |
18+
| test_unpacking.py:31 | ok | also_allowed | b |
19+
| test_unpacking.py:31 | ok | also_allowed | c |
20+
| test_unpacking.py:39 | ok | nested | x |
21+
| test_unpacking.py:39 | ok | nested | xs |
22+
| test_unpacking.py:39 | ok | nested | ys |

python/ql/test/experimental/dataflow/tainttracking/string-py3/TestTaint.ql renamed to python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep-py3/TestTaint.ql

File renamed without changes.

python/ql/test/experimental/dataflow/tainttracking/string-py3/options renamed to python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep-py3/options

File renamed without changes.
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
# Add taintlib to PATH so it can be imported during runtime without any hassle
2+
import sys; import os; sys.path.append(os.path.dirname(os.path.dirname((__file__))))
3+
from taintlib import *
4+
5+
# This has no runtime impact, but allows autocomplete to work
6+
from typing import TYPE_CHECKING
7+
if TYPE_CHECKING:
8+
from ..taintlib import *
9+
10+
# Actual tests
11+
12+
def test_access():
13+
tainted_list = TAINTED_LIST
14+
15+
ensure_tainted(
16+
tainted_list.copy(),
17+
)
18+
19+
20+
def list_clear():
21+
tainted_string = TAINTED_STRING
22+
tainted_list = [tainted_string]
23+
24+
ensure_tainted(tainted_list)
25+
26+
tainted_list.clear()
27+
ensure_not_tainted(tainted_list)
28+
29+
# Make tests runable
30+
31+
test_access()
32+
list_clear()

python/ql/test/experimental/dataflow/tainttracking/string-py3/test.py renamed to python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep-py3/test_string.py

Lines changed: 8 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,11 @@
1-
# Python 3 specific taint tracking for string
2-
3-
TAINTED_STRING = "TAINTED_STRING"
4-
TAINTED_BYTES = b"TAINTED_BYTES"
5-
6-
7-
def ensure_tainted(*args):
8-
print("- ensure_tainted")
9-
for i, arg in enumerate(args):
10-
print("arg {}: {!r}".format(i, arg))
11-
12-
13-
def ensure_not_tainted(*args):
14-
print("- ensure_not_tainted")
15-
for i, arg in enumerate(args):
16-
print("arg {}: {!r}".format(i, arg))
17-
1+
# Add taintlib to PATH so it can be imported during runtime without any hassle
2+
import sys; import os; sys.path.append(os.path.dirname(os.path.dirname((__file__))))
3+
from taintlib import *
4+
5+
# This has no runtime impact, but allows autocomplete to work
6+
from typing import TYPE_CHECKING
7+
if TYPE_CHECKING:
8+
from ..taintlib import *
189

1910
# Actual tests
2011

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
# Add taintlib to PATH so it can be imported during runtime without any hassle
2+
import sys; import os; sys.path.append(os.path.dirname(os.path.dirname((__file__))))
3+
from taintlib import *
4+
5+
# This has no runtime impact, but allows autocomplete to work
6+
from typing import TYPE_CHECKING
7+
if TYPE_CHECKING:
8+
from ..taintlib import *
9+
10+
# Actual tests
11+
12+
# Extended Iterable Unpacking -- PEP 3132
13+
# https://www.python.org/dev/peps/pep-3132/
14+
15+
16+
def extended_unpacking():
17+
first, *rest, last = TAINTED_LIST
18+
ensure_tainted(first, rest, last)
19+
20+
21+
def also_allowed():
22+
*a, = TAINTED_LIST
23+
ensure_tainted(a)
24+
25+
# for b, *c in [(1, 2, 3), (4, 5, 6, 7)]:
26+
# print(c)
27+
# i=0; c=[2,3]
28+
# i=1; c=[5,6,7]
29+
30+
for b, *c in [TAINTED_LIST, TAINTED_LIST]:
31+
ensure_tainted(b, c)
32+
33+
34+
def nested():
35+
l = TAINTED_LIST
36+
ll = [l,l]
37+
38+
[[x, *xs], ys] = ll
39+
ensure_tainted(x, xs, ys)
40+
41+
42+
# Make tests runable
43+
44+
extended_unpacking()
45+
also_allowed()
46+
nested()

0 commit comments

Comments
 (0)