@@ -11,8 +11,11 @@ private import experimental.semmle.python.Concepts
1111
1212/** Provides models for the Python standard library. */
1313private module Stdlib {
14+ // ---------------------------------------------------------------------------
15+ // os
16+ // ---------------------------------------------------------------------------
1417 /** Gets a reference to the `os` module. */
15- DataFlow:: Node os ( DataFlow:: TypeTracker t ) {
18+ private DataFlow:: Node os ( DataFlow:: TypeTracker t ) {
1619 t .start ( ) and
1720 result = DataFlow:: importModule ( "os" )
1821 or
@@ -22,53 +25,60 @@ private module Stdlib {
2225 /** Gets a reference to the `os` module. */
2326 DataFlow:: Node os ( ) { result = os ( DataFlow:: TypeTracker:: end ( ) ) }
2427
25- /** Provides models for the `os` module. */
26- module os {
27- /** Gets a reference to the `os.system` function. */
28- DataFlow:: Node system ( DataFlow:: TypeTracker t ) {
29- t .start ( ) and
30- result = DataFlow:: importMember ( "os" , "system" )
31- or
32- t .startInAttr ( "system" ) and
33- result = os ( )
34- or
35- exists ( DataFlow:: TypeTracker t2 | result = os:: system ( t2 ) .track ( t2 , t ) )
36- }
37-
38- /** Gets a reference to the `os.system` function. */
39- DataFlow:: Node system ( ) { result = os:: system ( DataFlow:: TypeTracker:: end ( ) ) }
40-
41- /** Gets a reference to the `os.popen` function. */
42- DataFlow:: Node popen ( DataFlow:: TypeTracker t ) {
28+ /**
29+ * Gets a reference to the attribute `attr_name` of the `os` module.
30+ * WARNING: Only holds for a few predefined attributes.
31+ *
32+ * For example, using `attr_name = "system"` will get all uses of `os.system`.
33+ */
34+ private DataFlow:: Node os_attr ( DataFlow:: TypeTracker t , string attr_name ) {
35+ attr_name in [ "system" , "popen" ,
36+ // exec
37+ "execl" , "execle" , "execlp" , "execlpe" , "execv" , "execve" , "execvp" , "execvpe" ,
38+ // spawn
39+ "spawnl" , "spawnle" , "spawnlp" , "spawnlpe" , "spawnv" , "spawnve" , "spawnvp" , "spawnvpe" ,
40+ "posix_spawn" , "posix_spawnp" ,
41+ // modules
42+ "path" ] and
43+ (
4344 t .start ( ) and
44- result = DataFlow:: importMember ( "os" , "popen" )
45+ result = DataFlow:: importMember ( "os" , attr_name )
4546 or
46- t .startInAttr ( "popen" ) and
47- result = os ( )
48- or
49- exists ( DataFlow:: TypeTracker t2 | result = os:: popen ( t2 ) .track ( t2 , t ) )
50- }
47+ t .startInAttr ( attr_name ) and
48+ result = DataFlow:: importModule ( "os" )
49+ )
50+ or
51+ // Due to bad performance when using normal setup with `os_attr(t2, attr_name).track(t2, t)`
52+ // we have inlined that code and forced a join
53+ exists ( DataFlow:: TypeTracker t2 |
54+ exists ( DataFlow:: StepSummary summary |
55+ os_attr_first_join ( t2 , attr_name , result , summary ) and
56+ t = t2 .append ( summary )
57+ )
58+ )
59+ }
5160
52- /** Gets a reference to the `os.popen` function. */
53- DataFlow:: Node popen ( ) { result = os:: popen ( DataFlow:: TypeTracker:: end ( ) ) }
61+ pragma [ nomagic]
62+ private predicate os_attr_first_join (
63+ DataFlow:: TypeTracker t2 , string attr_name , DataFlow:: Node res , DataFlow:: StepSummary summary
64+ ) {
65+ DataFlow:: StepSummary:: step ( os_attr ( t2 , attr_name ) , res , summary )
66+ }
5467
55- /** Gets a reference to the `os.path` module. */
56- private DataFlow:: Node path ( DataFlow:: TypeTracker t ) {
57- t .start ( ) and
58- (
59- result = DataFlow:: importMember ( "os" , "path" )
60- or
61- result = DataFlow:: importModule ( "os.path" )
62- )
63- or
64- t .startInAttr ( "path" ) and
65- result = os ( )
66- or
67- exists ( DataFlow:: TypeTracker t2 | result = path ( t2 ) .track ( t2 , t ) )
68- }
68+ /**
69+ * Gets a reference to the attribute `attr_name` of the `os` module.
70+ * WARNING: Only holds for a few predefined attributes.
71+ *
72+ * For example, using `"system"` will get all uses of `os.system`.
73+ */
74+ private DataFlow:: Node os_attr ( string attr_name ) {
75+ result = os_attr ( DataFlow:: TypeTracker:: end ( ) , attr_name )
76+ }
6977
78+ /** Provides models for the `os` module. */
79+ module os {
7080 /** Gets a reference to the `os.path` module. */
71- DataFlow:: Node path ( ) { result = path ( DataFlow :: TypeTracker :: end ( ) ) }
81+ DataFlow:: Node path ( ) { result = os_attr ( "path" ) }
7282
7383 /** Provides models for the `os.path` module */
7484 module path {
@@ -83,7 +93,7 @@ private module Stdlib {
8393 exists ( DataFlow:: TypeTracker t2 | result = join ( t2 ) .track ( t2 , t ) )
8494 }
8595
86- /** Gets a reference to the `os.join` module . */
96+ /** Gets a reference to the `os.path. join` function . */
8797 DataFlow:: Node join ( ) { result = join ( DataFlow:: TypeTracker:: end ( ) ) }
8898 }
8999 }
@@ -93,7 +103,7 @@ private module Stdlib {
93103 * See https://docs.python.org/3/library/os.html#os.system
94104 */
95105 private class OsSystemCall extends SystemCommandExecution:: Range {
96- OsSystemCall ( ) { this .asCfgNode ( ) .( CallNode ) .getFunction ( ) = os :: system ( ) .asCfgNode ( ) }
106+ OsSystemCall ( ) { this .asCfgNode ( ) .( CallNode ) .getFunction ( ) = os_attr ( "system" ) .asCfgNode ( ) }
97107
98108 override DataFlow:: Node getCommand ( ) {
99109 result .asCfgNode ( ) = this .asCfgNode ( ) .( CallNode ) .getArg ( 0 )
@@ -105,7 +115,57 @@ private module Stdlib {
105115 * See https://docs.python.org/3/library/os.html#os.popen
106116 */
107117 private class OsPopenCall extends SystemCommandExecution:: Range {
108- OsPopenCall ( ) { this .asCfgNode ( ) .( CallNode ) .getFunction ( ) = os:: popen ( ) .asCfgNode ( ) }
118+ OsPopenCall ( ) { this .asCfgNode ( ) .( CallNode ) .getFunction ( ) = os_attr ( "popen" ) .asCfgNode ( ) }
119+
120+ override DataFlow:: Node getCommand ( ) {
121+ result .asCfgNode ( ) = this .asCfgNode ( ) .( CallNode ) .getArg ( 0 )
122+ }
123+ }
124+
125+ /**
126+ * A call to any of the `os.exec*` functions
127+ * See https://docs.python.org/3.8/library/os.html#os.execl
128+ */
129+ private class OsExecCall extends SystemCommandExecution:: Range {
130+ OsExecCall ( ) {
131+ exists ( string name |
132+ name in [ "execl" , "execle" , "execlp" , "execlpe" , "execv" , "execve" , "execvp" , "execvpe" ] and
133+ this .asCfgNode ( ) .( CallNode ) .getFunction ( ) = os_attr ( name ) .asCfgNode ( )
134+ )
135+ }
136+
137+ override DataFlow:: Node getCommand ( ) {
138+ result .asCfgNode ( ) = this .asCfgNode ( ) .( CallNode ) .getArg ( 0 )
139+ }
140+ }
141+
142+ /**
143+ * A call to any of the `os.spawn*` functions
144+ * See https://docs.python.org/3.8/library/os.html#os.spawnl
145+ */
146+ private class OsSpawnCall extends SystemCommandExecution:: Range {
147+ OsSpawnCall ( ) {
148+ exists ( string name |
149+ name in [ "spawnl" , "spawnle" , "spawnlp" , "spawnlpe" , "spawnv" , "spawnve" , "spawnvp" ,
150+ "spawnvpe" ] and
151+ this .asCfgNode ( ) .( CallNode ) .getFunction ( ) = os_attr ( name ) .asCfgNode ( )
152+ )
153+ }
154+
155+ override DataFlow:: Node getCommand ( ) {
156+ result .asCfgNode ( ) = this .asCfgNode ( ) .( CallNode ) .getArg ( 1 )
157+ }
158+ }
159+
160+ /**
161+ * A call to any of the `os.posix_spawn*` functions
162+ * See https://docs.python.org/3.8/library/os.html#os.posix_spawn
163+ */
164+ private class OsPosixSpawnCall extends SystemCommandExecution:: Range {
165+ OsPosixSpawnCall ( ) {
166+ this .asCfgNode ( ) .( CallNode ) .getFunction ( ) =
167+ os_attr ( [ "posix_spawn" , "posix_spawnp" ] ) .asCfgNode ( )
168+ }
109169
110170 override DataFlow:: Node getCommand ( ) {
111171 result .asCfgNode ( ) = this .asCfgNode ( ) .( CallNode ) .getArg ( 0 )
@@ -123,4 +183,148 @@ private module Stdlib {
123183 // TODO: Handle pathlib (like we do for os.path.join)
124184 }
125185 }
186+
187+ // ---------------------------------------------------------------------------
188+ // subprocess
189+ // ---------------------------------------------------------------------------
190+ /** Gets a reference to the `subprocess` module. */
191+ private DataFlow:: Node subprocess ( DataFlow:: TypeTracker t ) {
192+ t .start ( ) and
193+ result = DataFlow:: importModule ( "subprocess" )
194+ or
195+ exists ( DataFlow:: TypeTracker t2 | result = subprocess ( t2 ) .track ( t2 , t ) )
196+ }
197+
198+ /** Gets a reference to the `subprocess` module. */
199+ DataFlow:: Node subprocess ( ) { result = subprocess ( DataFlow:: TypeTracker:: end ( ) ) }
200+
201+ /**
202+ * Gets a reference to the attribute `attr_name` of the `subprocess` module.
203+ * WARNING: Only holds for a few predefined attributes.
204+ *
205+ * For example, using `attr_name = "Popen"` will get all uses of `subprocess.Popen`.
206+ */
207+ private DataFlow:: Node subprocess_attr ( DataFlow:: TypeTracker t , string attr_name ) {
208+ attr_name in [ "Popen" , "call" , "check_call" , "check_output" , "run" ] and
209+ (
210+ t .start ( ) and
211+ result = DataFlow:: importMember ( "subprocess" , attr_name )
212+ or
213+ t .startInAttr ( attr_name ) and
214+ result = DataFlow:: importModule ( "subprocess" )
215+ )
216+ or
217+ // Due to bad performance when using normal setup with `subprocess_attr(t2, attr_name).track(t2, t)`
218+ // we have inlined that code and forced a join
219+ exists ( DataFlow:: TypeTracker t2 |
220+ exists ( DataFlow:: StepSummary summary |
221+ subprocess_attr_first_join ( t2 , attr_name , result , summary ) and
222+ t = t2 .append ( summary )
223+ )
224+ )
225+ }
226+
227+ pragma [ nomagic]
228+ private predicate subprocess_attr_first_join (
229+ DataFlow:: TypeTracker t2 , string attr_name , DataFlow:: Node res , DataFlow:: StepSummary summary
230+ ) {
231+ DataFlow:: StepSummary:: step ( subprocess_attr ( t2 , attr_name ) , res , summary )
232+ }
233+
234+ /**
235+ * Gets a reference to the attribute `attr_name` of the `subprocess` module.
236+ * WARNING: Only holds for a few predefined attributes.
237+ *
238+ * For example, using `attr_name = "Popen"` will get all uses of `subprocess.Popen`.
239+ */
240+ private DataFlow:: Node subprocess_attr ( string attr_name ) {
241+ result = subprocess_attr ( DataFlow:: TypeTracker:: end ( ) , attr_name )
242+ }
243+
244+ /**
245+ * A call to `subprocess.Popen` or helper functions (call, check_call, check_output, run)
246+ * See https://docs.python.org/3.8/library/subprocess.html#subprocess.Popen
247+ */
248+ private class SubprocessPopenCall extends SystemCommandExecution:: Range {
249+ CallNode call ;
250+
251+ SubprocessPopenCall ( ) {
252+ call = this .asCfgNode ( ) and
253+ exists ( string name |
254+ name in [ "Popen" , "call" , "check_call" , "check_output" , "run" ] and
255+ call .getFunction ( ) = subprocess_attr ( name ) .asCfgNode ( )
256+ )
257+ }
258+
259+ /** Gets the ControlFlowNode for the `args` argument, if any. */
260+ private ControlFlowNode get_args_arg ( ) {
261+ result = call .getArg ( 0 )
262+ or
263+ result = call .getArgByName ( "args" )
264+ }
265+
266+ /** Gets the ControlFlowNode for the `shell` argument, if any. */
267+ private ControlFlowNode get_shell_arg ( ) {
268+ result = call .getArg ( 8 )
269+ or
270+ result = call .getArgByName ( "shell" )
271+ }
272+
273+ private boolean get_shell_arg_value ( ) {
274+ not exists ( this .get_shell_arg ( ) ) and
275+ result = false
276+ or
277+ exists ( ControlFlowNode shell_arg | shell_arg = this .get_shell_arg ( ) |
278+ result = shell_arg .getNode ( ) .( ImmutableLiteral ) .booleanValue ( )
279+ or
280+ // TODO: Track the "shell" argument to determine possible values
281+ not shell_arg .getNode ( ) instanceof ImmutableLiteral and
282+ (
283+ result = true
284+ or
285+ result = false
286+ )
287+ )
288+ }
289+
290+ /** Gets the ControlFlowNode for the `executable` argument, if any. */
291+ private ControlFlowNode get_executable_arg ( ) {
292+ result = call .getArg ( 2 )
293+ or
294+ result = call .getArgByName ( "executable" )
295+ }
296+
297+ override DataFlow:: Node getCommand ( ) {
298+ // TODO: Track arguments ("args" and "shell")
299+ // TODO: Handle using `args=["sh", "-c", <user-input>]`
300+ result .asCfgNode ( ) = this .get_executable_arg ( )
301+ or
302+ exists ( ControlFlowNode arg_args , boolean shell |
303+ arg_args = get_args_arg ( ) and
304+ shell = get_shell_arg_value ( )
305+ |
306+ // When "executable" argument is set, and "shell" argument is `False`, the
307+ // "args" argument will only be used to set the program name and arguments to
308+ // the program, so we should not consider any of them as command execution.
309+ not (
310+ exists ( this .get_executable_arg ( ) ) and
311+ shell = false
312+ ) and
313+ (
314+ // When the "args" argument is an iterable, first element is the command to
315+ // run, so if we're able to, we only mark the first element as the command
316+ // (and not the arguments to the command).
317+ //
318+ result .asCfgNode ( ) = arg_args .( SequenceNode ) .getElement ( 0 )
319+ or
320+ // Either the "args" argument is not a sequence (which is valid) or we where
321+ // just not able to figure it out. Simply mark the "args" argument as the
322+ // command.
323+ //
324+ not arg_args instanceof SequenceNode and
325+ result .asCfgNode ( ) = arg_args
326+ )
327+ )
328+ }
329+ }
126330}
0 commit comments