@@ -30,6 +30,16 @@ predicate localAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeT
3030 subscriptStep ( nodeFrom , nodeTo )
3131 or
3232 stringManipulation ( nodeFrom , nodeTo )
33+ or
34+ jsonStep ( nodeFrom , nodeTo )
35+ or
36+ containerStep ( nodeFrom , nodeTo )
37+ or
38+ copyStep ( nodeFrom , nodeTo )
39+ or
40+ forStep ( nodeFrom , nodeTo )
41+ or
42+ unpackingAssignmentStep ( nodeFrom , nodeTo )
3343}
3444
3545/**
@@ -118,8 +128,101 @@ predicate stringManipulation(DataFlow::CfgNode nodeFrom, DataFlow::CfgNode nodeT
118128 )
119129 or
120130 // f-strings
121- nodeTo .getNode ( ) .getNode ( ) . ( Fstring ) .getAValue ( ) = nodeFrom .getNode ( ) . getNode ( )
131+ nodeTo .asExpr ( ) .( Fstring ) .getAValue ( ) = nodeFrom .asExpr ( )
122132 // TODO: Handle encode/decode from base64/quopri
123133 // TODO: Handle os.path.join
124134 // TODO: Handle functions in https://docs.python.org/3/library/binascii.html
125135}
136+
137+ /**
138+ * Holds if taint can flow from `nodeFrom` to `nodeTo` with a step related to JSON encoding/decoding.
139+ */
140+ predicate jsonStep ( DataFlow:: CfgNode nodeFrom , DataFlow:: CfgNode nodeTo ) {
141+ exists ( CallNode call | call = nodeTo .getNode ( ) |
142+ call .getFunction ( ) .( AttrNode ) .getObject ( [ "load" , "loads" , "dumps" ] ) .( NameNode ) .getId ( ) = "json" and
143+ call .getArg ( 0 ) = nodeFrom .getNode ( )
144+ )
145+ }
146+
147+ /**
148+ * Holds if taint can flow from `nodeFrom` to `nodeTo` with a step related to containers
149+ * (lists/sets/dictionaries): literals, constructor invocation, methods. Note that this
150+ * is currently very imprecise, as an example, since we model `dict.get`, we treat any
151+ * `<tainted object>.get(<arg>)` will be tainted, whether it's true or not.
152+ */
153+ predicate containerStep ( DataFlow:: CfgNode nodeFrom , DataFlow:: Node nodeTo ) {
154+ // construction by literal
155+ // TODO: Not limiting the content argument here feels like a BIG hack, but we currently get nothing for free :|
156+ storeStep ( nodeFrom , _, nodeTo )
157+ or
158+ // constructor call
159+ exists ( CallNode call | call = nodeTo .asCfgNode ( ) |
160+ call .getFunction ( ) .( NameNode ) .getId ( ) in [ "list" , "set" , "frozenset" , "dict" , "defaultdict" ,
161+ "tuple" ] and
162+ call .getArg ( 0 ) = nodeFrom .getNode ( )
163+ )
164+ or
165+ // functions operating on collections
166+ exists ( CallNode call | call = nodeTo .asCfgNode ( ) |
167+ call .getFunction ( ) .( NameNode ) .getId ( ) in [ "sorted" , "reversed" , "iter" , "next" ] and
168+ call .getArg ( 0 ) = nodeFrom .getNode ( )
169+ )
170+ or
171+ // methods
172+ exists ( CallNode call , string name | call = nodeTo .asCfgNode ( ) |
173+ name in [ "copy" ,
174+ // general
175+ "pop" ,
176+ // dict
177+ "values" , "items" , "get" , "popitem" ] and
178+ call .getFunction ( ) .( AttrNode ) .getObject ( name ) = nodeFrom .asCfgNode ( )
179+ )
180+ or
181+ // list.append, set.add
182+ exists ( CallNode call , string name |
183+ name in [ "append" , "add" ] and
184+ call .getFunction ( ) .( AttrNode ) .getObject ( name ) =
185+ nodeTo .( PostUpdateNode ) .getPreUpdateNode ( ) .asCfgNode ( ) and
186+ call .getArg ( 0 ) = nodeFrom .getNode ( )
187+ )
188+ }
189+
190+ /**
191+ * Holds if taint can flow from `nodeFrom` to `nodeTo` with a step related to copying.
192+ */
193+ predicate copyStep ( DataFlow:: CfgNode nodeFrom , DataFlow:: CfgNode nodeTo ) {
194+ exists ( CallNode call | call = nodeTo .getNode ( ) |
195+ // Fully qualified: copy.copy, copy.deepcopy
196+ (
197+ call .getFunction ( ) .( NameNode ) .getId ( ) in [ "copy" , "deepcopy" ]
198+ or
199+ call .getFunction ( ) .( AttrNode ) .getObject ( [ "copy" , "deepcopy" ] ) .( NameNode ) .getId ( ) = "copy"
200+ ) and
201+ call .getArg ( 0 ) = nodeFrom .getNode ( )
202+ )
203+ }
204+
205+ /**
206+ * Holds if taint can flow from `nodeFrom` to `nodeTo` with a step related to `for`-iteration,
207+ * for example `for x in xs`, or `for x,y in points`.
208+ */
209+ predicate forStep ( DataFlow:: CfgNode nodeFrom , DataFlow:: EssaNode nodeTo ) {
210+ exists ( EssaNodeDefinition defn , For for |
211+ for .getTarget ( ) .getAChildNode * ( ) = defn .getDefiningNode ( ) .getNode ( ) and
212+ nodeTo .getVar ( ) = defn and
213+ nodeFrom .asExpr ( ) = for .getIter ( )
214+ )
215+ }
216+
217+ /**
218+ * Holds if taint can flow from `nodeFrom` to `nodeTo` with a step related to iterable unpacking.
219+ * Only handles normal assignment (`x,y = calc_point()`), since `for x,y in points` is handled by `forStep`.
220+ */
221+ predicate unpackingAssignmentStep ( DataFlow:: CfgNode nodeFrom , DataFlow:: EssaNode nodeTo ) {
222+ // `a, b = myiterable` or `head, *tail = myiterable` (only Python 3)
223+ exists ( MultiAssignmentDefinition defn , Assign assign |
224+ assign .getATarget ( ) .contains ( defn .getDefiningNode ( ) .getNode ( ) ) and
225+ nodeTo .getVar ( ) = defn and
226+ nodeFrom .asExpr ( ) = assign .getValue ( )
227+ )
228+ }
0 commit comments