Skip to content

Commit fce76e2

Browse files
authored
Merge pull request #4354 from RasmusWL/python-command-execution-modeling
Python: Better command execution modeling
2 parents 2e4a614 + e5b9ac8 commit fce76e2

File tree

10 files changed

+518
-53
lines changed

10 files changed

+518
-53
lines changed

python/ql/src/experimental/Security-new-dataflow/CWE-078/CommandInjection.ql

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,34 @@ class CommandInjectionConfiguration extends TaintTracking::Configuration {
2727
override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
2828

2929
override predicate isSink(DataFlow::Node sink) {
30-
sink = any(SystemCommandExecution e).getCommand()
30+
sink = any(SystemCommandExecution e).getCommand() and
31+
// Since the implementation of standard library functions such `os.popen` looks like
32+
// ```py
33+
// def popen(cmd, mode="r", buffering=-1):
34+
// ...
35+
// proc = subprocess.Popen(cmd, ...)
36+
// ```
37+
// any time we would report flow to the `os.popen` sink, we can ALSO report the flow
38+
// from the `cmd` parameter to the `subprocess.Popen` sink -- obviously we don't
39+
// want that.
40+
//
41+
// However, simply removing taint edges out of a sink is not a good enough solution,
42+
// since we would only flag one of the `os.system` calls in the following example
43+
// due to use-use flow
44+
// ```py
45+
// os.system(cmd)
46+
// os.system(cmd)
47+
// ```
48+
//
49+
// Best solution I could come up with is to exclude all sinks inside the `os` and
50+
// `subprocess` modules. This does have a downside: If we have overlooked a function
51+
// in any of these, that internally runs a command, we no longer give an alert :|
52+
//
53+
// This does not only affect `os.popen`, but also the helper functions in
54+
// `subprocess`. See:
55+
// https://github.com/python/cpython/blob/fa7ce080175f65d678a7d5756c94f82887fc9803/Lib/os.py#L974
56+
// https://github.com/python/cpython/blob/fa7ce080175f65d678a7d5756c94f82887fc9803/Lib/subprocess.py#L341
57+
not sink.getScope().getEnclosingModule().getName() in ["os", "subprocess"]
3158
}
3259
}
3360

python/ql/src/experimental/semmle/python/frameworks/Stdlib.qll

Lines changed: 249 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,11 @@ private import experimental.semmle.python.Concepts
1111

1212
/** Provides models for the Python standard library. */
1313
private module Stdlib {
14+
// ---------------------------------------------------------------------------
15+
// os
16+
// ---------------------------------------------------------------------------
1417
/** Gets a reference to the `os` module. */
15-
DataFlow::Node os(DataFlow::TypeTracker t) {
18+
private DataFlow::Node os(DataFlow::TypeTracker t) {
1619
t.start() and
1720
result = DataFlow::importModule("os")
1821
or
@@ -22,53 +25,60 @@ private module Stdlib {
2225
/** Gets a reference to the `os` module. */
2326
DataFlow::Node os() { result = os(DataFlow::TypeTracker::end()) }
2427

25-
/** Provides models for the `os` module. */
26-
module os {
27-
/** Gets a reference to the `os.system` function. */
28-
DataFlow::Node system(DataFlow::TypeTracker t) {
29-
t.start() and
30-
result = DataFlow::importMember("os", "system")
31-
or
32-
t.startInAttr("system") and
33-
result = os()
34-
or
35-
exists(DataFlow::TypeTracker t2 | result = os::system(t2).track(t2, t))
36-
}
37-
38-
/** Gets a reference to the `os.system` function. */
39-
DataFlow::Node system() { result = os::system(DataFlow::TypeTracker::end()) }
40-
41-
/** Gets a reference to the `os.popen` function. */
42-
DataFlow::Node popen(DataFlow::TypeTracker t) {
28+
/**
29+
* Gets a reference to the attribute `attr_name` of the `os` module.
30+
* WARNING: Only holds for a few predefined attributes.
31+
*
32+
* For example, using `attr_name = "system"` will get all uses of `os.system`.
33+
*/
34+
private DataFlow::Node os_attr(DataFlow::TypeTracker t, string attr_name) {
35+
attr_name in ["system", "popen",
36+
// exec
37+
"execl", "execle", "execlp", "execlpe", "execv", "execve", "execvp", "execvpe",
38+
// spawn
39+
"spawnl", "spawnle", "spawnlp", "spawnlpe", "spawnv", "spawnve", "spawnvp", "spawnvpe",
40+
"posix_spawn", "posix_spawnp",
41+
// modules
42+
"path"] and
43+
(
4344
t.start() and
44-
result = DataFlow::importMember("os", "popen")
45+
result = DataFlow::importMember("os", attr_name)
4546
or
46-
t.startInAttr("popen") and
47-
result = os()
48-
or
49-
exists(DataFlow::TypeTracker t2 | result = os::popen(t2).track(t2, t))
50-
}
47+
t.startInAttr(attr_name) and
48+
result = DataFlow::importModule("os")
49+
)
50+
or
51+
// Due to bad performance when using normal setup with `os_attr(t2, attr_name).track(t2, t)`
52+
// we have inlined that code and forced a join
53+
exists(DataFlow::TypeTracker t2 |
54+
exists(DataFlow::StepSummary summary |
55+
os_attr_first_join(t2, attr_name, result, summary) and
56+
t = t2.append(summary)
57+
)
58+
)
59+
}
5160

52-
/** Gets a reference to the `os.popen` function. */
53-
DataFlow::Node popen() { result = os::popen(DataFlow::TypeTracker::end()) }
61+
pragma[nomagic]
62+
private predicate os_attr_first_join(
63+
DataFlow::TypeTracker t2, string attr_name, DataFlow::Node res, DataFlow::StepSummary summary
64+
) {
65+
DataFlow::StepSummary::step(os_attr(t2, attr_name), res, summary)
66+
}
5467

55-
/** Gets a reference to the `os.path` module. */
56-
private DataFlow::Node path(DataFlow::TypeTracker t) {
57-
t.start() and
58-
(
59-
result = DataFlow::importMember("os", "path")
60-
or
61-
result = DataFlow::importModule("os.path")
62-
)
63-
or
64-
t.startInAttr("path") and
65-
result = os()
66-
or
67-
exists(DataFlow::TypeTracker t2 | result = path(t2).track(t2, t))
68-
}
68+
/**
69+
* Gets a reference to the attribute `attr_name` of the `os` module.
70+
* WARNING: Only holds for a few predefined attributes.
71+
*
72+
* For example, using `"system"` will get all uses of `os.system`.
73+
*/
74+
private DataFlow::Node os_attr(string attr_name) {
75+
result = os_attr(DataFlow::TypeTracker::end(), attr_name)
76+
}
6977

78+
/** Provides models for the `os` module. */
79+
module os {
7080
/** Gets a reference to the `os.path` module. */
71-
DataFlow::Node path() { result = path(DataFlow::TypeTracker::end()) }
81+
DataFlow::Node path() { result = os_attr("path") }
7282

7383
/** Provides models for the `os.path` module */
7484
module path {
@@ -83,7 +93,7 @@ private module Stdlib {
8393
exists(DataFlow::TypeTracker t2 | result = join(t2).track(t2, t))
8494
}
8595

86-
/** Gets a reference to the `os.join` module. */
96+
/** Gets a reference to the `os.path.join` function. */
8797
DataFlow::Node join() { result = join(DataFlow::TypeTracker::end()) }
8898
}
8999
}
@@ -93,7 +103,7 @@ private module Stdlib {
93103
* See https://docs.python.org/3/library/os.html#os.system
94104
*/
95105
private class OsSystemCall extends SystemCommandExecution::Range {
96-
OsSystemCall() { this.asCfgNode().(CallNode).getFunction() = os::system().asCfgNode() }
106+
OsSystemCall() { this.asCfgNode().(CallNode).getFunction() = os_attr("system").asCfgNode() }
97107

98108
override DataFlow::Node getCommand() {
99109
result.asCfgNode() = this.asCfgNode().(CallNode).getArg(0)
@@ -105,7 +115,57 @@ private module Stdlib {
105115
* See https://docs.python.org/3/library/os.html#os.popen
106116
*/
107117
private class OsPopenCall extends SystemCommandExecution::Range {
108-
OsPopenCall() { this.asCfgNode().(CallNode).getFunction() = os::popen().asCfgNode() }
118+
OsPopenCall() { this.asCfgNode().(CallNode).getFunction() = os_attr("popen").asCfgNode() }
119+
120+
override DataFlow::Node getCommand() {
121+
result.asCfgNode() = this.asCfgNode().(CallNode).getArg(0)
122+
}
123+
}
124+
125+
/**
126+
* A call to any of the `os.exec*` functions
127+
* See https://docs.python.org/3.8/library/os.html#os.execl
128+
*/
129+
private class OsExecCall extends SystemCommandExecution::Range {
130+
OsExecCall() {
131+
exists(string name |
132+
name in ["execl", "execle", "execlp", "execlpe", "execv", "execve", "execvp", "execvpe"] and
133+
this.asCfgNode().(CallNode).getFunction() = os_attr(name).asCfgNode()
134+
)
135+
}
136+
137+
override DataFlow::Node getCommand() {
138+
result.asCfgNode() = this.asCfgNode().(CallNode).getArg(0)
139+
}
140+
}
141+
142+
/**
143+
* A call to any of the `os.spawn*` functions
144+
* See https://docs.python.org/3.8/library/os.html#os.spawnl
145+
*/
146+
private class OsSpawnCall extends SystemCommandExecution::Range {
147+
OsSpawnCall() {
148+
exists(string name |
149+
name in ["spawnl", "spawnle", "spawnlp", "spawnlpe", "spawnv", "spawnve", "spawnvp",
150+
"spawnvpe"] and
151+
this.asCfgNode().(CallNode).getFunction() = os_attr(name).asCfgNode()
152+
)
153+
}
154+
155+
override DataFlow::Node getCommand() {
156+
result.asCfgNode() = this.asCfgNode().(CallNode).getArg(1)
157+
}
158+
}
159+
160+
/**
161+
* A call to any of the `os.posix_spawn*` functions
162+
* See https://docs.python.org/3.8/library/os.html#os.posix_spawn
163+
*/
164+
private class OsPosixSpawnCall extends SystemCommandExecution::Range {
165+
OsPosixSpawnCall() {
166+
this.asCfgNode().(CallNode).getFunction() =
167+
os_attr(["posix_spawn", "posix_spawnp"]).asCfgNode()
168+
}
109169

110170
override DataFlow::Node getCommand() {
111171
result.asCfgNode() = this.asCfgNode().(CallNode).getArg(0)
@@ -123,4 +183,148 @@ private module Stdlib {
123183
// TODO: Handle pathlib (like we do for os.path.join)
124184
}
125185
}
186+
187+
// ---------------------------------------------------------------------------
188+
// subprocess
189+
// ---------------------------------------------------------------------------
190+
/** Gets a reference to the `subprocess` module. */
191+
private DataFlow::Node subprocess(DataFlow::TypeTracker t) {
192+
t.start() and
193+
result = DataFlow::importModule("subprocess")
194+
or
195+
exists(DataFlow::TypeTracker t2 | result = subprocess(t2).track(t2, t))
196+
}
197+
198+
/** Gets a reference to the `subprocess` module. */
199+
DataFlow::Node subprocess() { result = subprocess(DataFlow::TypeTracker::end()) }
200+
201+
/**
202+
* Gets a reference to the attribute `attr_name` of the `subprocess` module.
203+
* WARNING: Only holds for a few predefined attributes.
204+
*
205+
* For example, using `attr_name = "Popen"` will get all uses of `subprocess.Popen`.
206+
*/
207+
private DataFlow::Node subprocess_attr(DataFlow::TypeTracker t, string attr_name) {
208+
attr_name in ["Popen", "call", "check_call", "check_output", "run"] and
209+
(
210+
t.start() and
211+
result = DataFlow::importMember("subprocess", attr_name)
212+
or
213+
t.startInAttr(attr_name) and
214+
result = DataFlow::importModule("subprocess")
215+
)
216+
or
217+
// Due to bad performance when using normal setup with `subprocess_attr(t2, attr_name).track(t2, t)`
218+
// we have inlined that code and forced a join
219+
exists(DataFlow::TypeTracker t2 |
220+
exists(DataFlow::StepSummary summary |
221+
subprocess_attr_first_join(t2, attr_name, result, summary) and
222+
t = t2.append(summary)
223+
)
224+
)
225+
}
226+
227+
pragma[nomagic]
228+
private predicate subprocess_attr_first_join(
229+
DataFlow::TypeTracker t2, string attr_name, DataFlow::Node res, DataFlow::StepSummary summary
230+
) {
231+
DataFlow::StepSummary::step(subprocess_attr(t2, attr_name), res, summary)
232+
}
233+
234+
/**
235+
* Gets a reference to the attribute `attr_name` of the `subprocess` module.
236+
* WARNING: Only holds for a few predefined attributes.
237+
*
238+
* For example, using `attr_name = "Popen"` will get all uses of `subprocess.Popen`.
239+
*/
240+
private DataFlow::Node subprocess_attr(string attr_name) {
241+
result = subprocess_attr(DataFlow::TypeTracker::end(), attr_name)
242+
}
243+
244+
/**
245+
* A call to `subprocess.Popen` or helper functions (call, check_call, check_output, run)
246+
* See https://docs.python.org/3.8/library/subprocess.html#subprocess.Popen
247+
*/
248+
private class SubprocessPopenCall extends SystemCommandExecution::Range {
249+
CallNode call;
250+
251+
SubprocessPopenCall() {
252+
call = this.asCfgNode() and
253+
exists(string name |
254+
name in ["Popen", "call", "check_call", "check_output", "run"] and
255+
call.getFunction() = subprocess_attr(name).asCfgNode()
256+
)
257+
}
258+
259+
/** Gets the ControlFlowNode for the `args` argument, if any. */
260+
private ControlFlowNode get_args_arg() {
261+
result = call.getArg(0)
262+
or
263+
result = call.getArgByName("args")
264+
}
265+
266+
/** Gets the ControlFlowNode for the `shell` argument, if any. */
267+
private ControlFlowNode get_shell_arg() {
268+
result = call.getArg(8)
269+
or
270+
result = call.getArgByName("shell")
271+
}
272+
273+
private boolean get_shell_arg_value() {
274+
not exists(this.get_shell_arg()) and
275+
result = false
276+
or
277+
exists(ControlFlowNode shell_arg | shell_arg = this.get_shell_arg() |
278+
result = shell_arg.getNode().(ImmutableLiteral).booleanValue()
279+
or
280+
// TODO: Track the "shell" argument to determine possible values
281+
not shell_arg.getNode() instanceof ImmutableLiteral and
282+
(
283+
result = true
284+
or
285+
result = false
286+
)
287+
)
288+
}
289+
290+
/** Gets the ControlFlowNode for the `executable` argument, if any. */
291+
private ControlFlowNode get_executable_arg() {
292+
result = call.getArg(2)
293+
or
294+
result = call.getArgByName("executable")
295+
}
296+
297+
override DataFlow::Node getCommand() {
298+
// TODO: Track arguments ("args" and "shell")
299+
// TODO: Handle using `args=["sh", "-c", <user-input>]`
300+
result.asCfgNode() = this.get_executable_arg()
301+
or
302+
exists(ControlFlowNode arg_args, boolean shell |
303+
arg_args = get_args_arg() and
304+
shell = get_shell_arg_value()
305+
|
306+
// When "executable" argument is set, and "shell" argument is `False`, the
307+
// "args" argument will only be used to set the program name and arguments to
308+
// the program, so we should not consider any of them as command execution.
309+
not (
310+
exists(this.get_executable_arg()) and
311+
shell = false
312+
) and
313+
(
314+
// When the "args" argument is an iterable, first element is the command to
315+
// run, so if we're able to, we only mark the first element as the command
316+
// (and not the arguments to the command).
317+
//
318+
result.asCfgNode() = arg_args.(SequenceNode).getElement(0)
319+
or
320+
// Either the "args" argument is not a sequence (which is valid) or we where
321+
// just not able to figure it out. Simply mark the "args" argument as the
322+
// command.
323+
//
324+
not arg_args instanceof SequenceNode and
325+
result.asCfgNode() = arg_args
326+
)
327+
)
328+
}
329+
}
126330
}

python/ql/test/experimental/library-tests/frameworks/stdlib/ConceptsTest.expected

Whitespace-only changes.
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
import python
2+
import experimental.meta.ConceptsTest

0 commit comments

Comments
 (0)