diff --git a/src/microsoft/Likely Bugs/Format/NonConstantFormat-1-bad.c b/src/microsoft/Likely Bugs/Format/NonConstantFormat-1-bad.c new file mode 100644 index 00000000..d01df0f8 --- /dev/null +++ b/src/microsoft/Likely Bugs/Format/NonConstantFormat-1-bad.c @@ -0,0 +1,6 @@ +#include +int main(int argc, char** argv) { + for(int i = 1; i < argc; ++i) { + printf(argv[i]); + } +} \ No newline at end of file diff --git a/src/microsoft/Likely Bugs/Format/NonConstantFormat-1-good.c b/src/microsoft/Likely Bugs/Format/NonConstantFormat-1-good.c new file mode 100644 index 00000000..33fff968 --- /dev/null +++ b/src/microsoft/Likely Bugs/Format/NonConstantFormat-1-good.c @@ -0,0 +1,6 @@ +#include +int main(int argc, char** argv) { + for(int i = 1; i < argc; ++i) { + printf("%s", argv[i]); + } +} \ No newline at end of file diff --git a/src/microsoft/Likely Bugs/Format/NonConstantFormat-2-bad.c b/src/microsoft/Likely Bugs/Format/NonConstantFormat-2-bad.c new file mode 100644 index 00000000..f43c1af1 --- /dev/null +++ b/src/microsoft/Likely Bugs/Format/NonConstantFormat-2-bad.c @@ -0,0 +1,13 @@ +void log_with_timestamp(const char* message) { + struct tm now; + time(&now); + printf("[%s] ", asctime(now)); + printf(message); +} + +int main(int argc, char** argv) { + log_with_timestamp("Application is starting...\n"); + /* ... */ + log_with_timestamp("Application is closing...\n"); + return 0; +} \ No newline at end of file diff --git a/src/microsoft/Likely Bugs/Format/NonConstantFormat-2-good.c b/src/microsoft/Likely Bugs/Format/NonConstantFormat-2-good.c new file mode 100644 index 00000000..82802e9a --- /dev/null +++ b/src/microsoft/Likely Bugs/Format/NonConstantFormat-2-good.c @@ -0,0 +1,16 @@ +void log_with_timestamp(const char* message, ...) { + va_list args; + va_start(args, message); + struct tm now; + time(&now); + printf("[%s] ", asctime(now)); + vprintf(message, args); + va_end(args); +} + +int main(int argc, char** argv) { + log_with_timestamp("%s is starting...\n", argv[0]); + /* ... */ + log_with_timestamp("%s is closing...\n", argv[0]); + return 0; +} \ No newline at end of file diff --git a/src/microsoft/Likely Bugs/Format/NonConstantFormat-2-ok.c b/src/microsoft/Likely Bugs/Format/NonConstantFormat-2-ok.c new file mode 100644 index 00000000..576654af --- /dev/null +++ b/src/microsoft/Likely Bugs/Format/NonConstantFormat-2-ok.c @@ -0,0 +1,12 @@ +void log_with_timestamp(const char* message) { + struct tm now; + time(&now); + printf("[%s] %s", asctime(now), message); +} + +int main(int argc, char** argv) { + log_with_timestamp("Application is starting...\n"); + /* ... */ + log_with_timestamp("Application is closing...\n"); + return 0; +} \ No newline at end of file diff --git a/src/microsoft/Likely Bugs/Format/NonConstantFormat.md b/src/microsoft/Likely Bugs/Format/NonConstantFormat.md new file mode 100644 index 00000000..11571d4e --- /dev/null +++ b/src/microsoft/Likely Bugs/Format/NonConstantFormat.md @@ -0,0 +1,98 @@ +# Non-constant format string +The `printf` function, related functions like `sprintf` and `fprintf`, and other functions built atop `vprintf` all accept a format string as one of their arguments. When such format strings are literal constants, it is easy for the programmer (and static analysis tools) to verify that the format specifiers (such as `%s` and `%02x`) in the format string are compatible with the trailing arguments of the function call. When such format strings are not literal constants, it is more difficult to maintain the program: programmers (and static analysis tools) must perform non-local data-flow analysis to deduce what values the format string argument might take. + + +## Recommendation +If the argument passed as a format string is meant to be a plain string rather than a format string, then pass `%s` as the format string, and pass the original argument as the sole trailing argument. + +If the argument passed as a format string is a parameter to the enclosing function, then consider redesigning the enclosing function's API to be less brittle. + + +## Example +The following program is meant to echo its command line arguments: + + +```c +#include +int main(int argc, char** argv) { + for(int i = 1; i < argc; ++i) { + printf(argv[i]); + } +} +``` +The above program behaves as expected in most cases, but breaks when one of its command line arguments contains a percent character. In such cases, the behavior of the program is undefined: it might echo garbage, it might crash, or it might give a malicious attacker root access. One way of addressing the problem is to use a constant `%s` format string, as in the following program: + + +```c +#include +int main(int argc, char** argv) { + for(int i = 1; i < argc; ++i) { + printf("%s", argv[i]); + } +} +``` + +## Example +The following program defines a `log_with_timestamp` function: + + +```c +void log_with_timestamp(const char* message) { + struct tm now; + time(&now); + printf("[%s] ", asctime(now)); + printf(message); +} + +int main(int argc, char** argv) { + log_with_timestamp("Application is starting...\n"); + /* ... */ + log_with_timestamp("Application is closing...\n"); + return 0; +} +``` +In the code that is visible, the reader can verify that `log_with_timestamp` is never called with a log message containing a percent character, but even if all current calls are correct, this presents an ongoing maintenance burden to ensure that newly-introduced calls don't contain percent characters. As in the previous example, one solution is to make the log message a trailing argument of the function call: + + +```c +void log_with_timestamp(const char* message) { + struct tm now; + time(&now); + printf("[%s] %s", asctime(now), message); +} + +int main(int argc, char** argv) { + log_with_timestamp("Application is starting...\n"); + /* ... */ + log_with_timestamp("Application is closing...\n"); + return 0; +} +``` +An alternative solution is to allow `log_with_timestamp` to accept format arguments: + + +```c +void log_with_timestamp(const char* message, ...) { + va_list args; + va_start(args, message); + struct tm now; + time(&now); + printf("[%s] ", asctime(now)); + vprintf(message, args); + va_end(args); +} + +int main(int argc, char** argv) { + log_with_timestamp("%s is starting...\n", argv[0]); + /* ... */ + log_with_timestamp("%s is closing...\n", argv[0]); + return 0; +} +``` +In this formulation, the non-constant format string to `printf` has been replaced with a non-constant format string to `vprintf`. The analysis will no longer consider the body of `log_with_timestamp` to be a problem, and will instead check that every call to `log_with_timestamp` passes a constant format string. + + +## References +* CERT C Coding Standard: [FIO30-C. Exclude user input from format strings](https://www.securecoding.cert.org/confluence/display/c/FIO30-C.+Exclude+user+input+from+format+strings). +* M. Howard, D. Leblanc, J. Viega, *19 Deadly Sins of Software Security: Programming Flaws and How to Fix Them*. +* Common Weakness Enumeration: [CWE-134](https://cwe.mitre.org/data/definitions/134.html). diff --git a/src/microsoft/Likely Bugs/Format/NonConstantFormat.qhelp b/src/microsoft/Likely Bugs/Format/NonConstantFormat.qhelp new file mode 100644 index 00000000..6994e499 --- /dev/null +++ b/src/microsoft/Likely Bugs/Format/NonConstantFormat.qhelp @@ -0,0 +1,59 @@ + + + +

The printf function, related functions like sprintf and fprintf, +and other functions built atop vprintf all accept a format string as one of their arguments. +When such format strings are literal constants, it is easy for the programmer (and static analysis tools) +to verify that the format specifiers (such as %s and %02x) in the format string +are compatible with the trailing arguments of the function call. When such format strings are not literal +constants, it is more difficult to maintain the program: programmers (and static analysis tools) must +perform non-local data-flow analysis to deduce what values the format string argument might take.

+ +
+ +

If the argument passed as a format string is meant to be a plain string rather than a format string, +then pass %s as the format string, and pass the original argument as the sole trailing +argument.

+ +

If the argument passed as a format string is a parameter to the enclosing function, then consider +redesigning the enclosing function's API to be less brittle.

+ +
+ +

The following program is meant to echo its command line arguments:

+ +

The above program behaves as expected in most cases, but breaks when one of its command line arguments +contains a percent character. In such cases, the behavior of the program is undefined: it might echo +garbage, it might crash, or it might give a malicious attacker root access. One way of addressing +the problem is to use a constant %s format string, as in the following program:

+ + +
+ +

The following program defines a log_with_timestamp function:

+ +

In the code that is visible, the reader can verify that log_with_timestamp is never called +with a log message containing a percent character, but even if all current calls are correct, this presents +an ongoing maintenance burden to ensure that newly-introduced calls don't contain percent characters. As +in the previous example, one solution is to make the log message a trailing argument of the function call:

+ +

An alternative solution is to allow log_with_timestamp to accept format arguments:

+ +

In this formulation, the non-constant format string to printf has been replaced with +a non-constant format string to vprintf. The analysis will no longer consider the body of +log_with_timestamp to be a problem, and will instead check that every call to +log_with_timestamp passes a constant format string.

+ +
+ + + +
  • CERT C Coding +Standard: FIO30-C. Exclude user input from format strings.
  • +
  • M. Howard, D. Leblanc, J. Viega, 19 Deadly Sins of Software Security: Programming Flaws and How to Fix Them.
  • + + +
    +
    diff --git a/src/microsoft/Likely Bugs/Format/NonConstantFormat.ql b/src/microsoft/Likely Bugs/Format/NonConstantFormat.ql new file mode 100644 index 00000000..55d8706f --- /dev/null +++ b/src/microsoft/Likely Bugs/Format/NonConstantFormat.ql @@ -0,0 +1,198 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +/** + * @name Non-constant format string + * @description Passing a value that is not a string literal 'format' string to a printf-like function can lead + * to a mismatch between the number of arguments defined by the 'format' and the number + * of arguments actually passed to the function. If the format string ultimately stems + * from an untrusted source, this can be used for exploits. + * This query finds format strings coming from non-literal sources. Note that format strings of + * type `const char*` it is still considered non-constant if the value is not coming from a string + * literal. For example, for a parameter with type `const char*` of an exported function that is + * used as a format string, there is no way to ensure the originating value was a string literal. + * @kind path-problem + * @problem.severity recommendation + * @security-severity 9.3 + * @precision high + * @id cpp/microsoft/public/likely-bugs/format/non-constant-format + * @tags maintainability + * correctness + * security + * external/cwe/cwe-134 + */ + +import semmle.code.cpp.ir.dataflow.TaintTracking +import semmle.code.cpp.commons.Printf +import semmle.code.cpp.security.FlowSources +import semmle.code.cpp.ir.dataflow.internal.ModelUtil +import semmle.code.cpp.models.interfaces.DataFlow +import semmle.code.cpp.models.interfaces.Taint +import semmle.code.cpp.ir.IR +import NonConstFlow::PathGraph + +class UncalledFunction extends Function { + UncalledFunction() { + not exists(Call c | c.getTarget() = this) and + // Ignore functions that appear to be function pointers + // function pointers may be seen as uncalled statically + not exists(FunctionAccess fa | fa.getTarget() = this) + } +} + +/** The `unsigned short` type. */ +class UnsignedShort extends ShortType { + UnsignedShort() { this.isUnsigned() } +} + +/** + * Holds if `t` cannot refer to a string. That is, it's a built-in + * or arithmetic type that is not a "`char` like" type. + */ +predicate cannotContainString(Type t) { + exists(Type unspecified | + unspecified = t.getUnspecifiedType() and + not unspecified instanceof UnknownType and + not unspecified instanceof CharType and + not unspecified instanceof WideCharType and + not unspecified instanceof Char8Type and + not unspecified instanceof Char16Type and + not unspecified instanceof Char32Type and + // C often defines `wchar_t` as `unsigned short` + not unspecified instanceof UnsignedShort + | + unspecified instanceof ArithmeticType or + unspecified instanceof BuiltInType + ) +} + +predicate dataFlowOrTaintFlowFunction(Function func, FunctionOutput output) { + func.(DataFlowFunction).hasDataFlow(_, output) or + func.(TaintFunction).hasTaintFlow(_, output) +} + +/** + * Holds if `node` is a non-constant source of data flow for non-const format string detection. + * This is defined as either: + * 1) a `FlowSource` + * 2) a parameter of an 'uncalled' function + * 3) an argument to a function with no definition that is not known to define the output through its input + * 4) an out arg of a function with no definition that is not known to define the output through its input + * + * The latter two cases address identifying standard string manipulation libraries as input sources + * e.g., strcpy. More simply, functions without definitions that are known to manipulate the + * input to produce an output are not sources. Instead the ultimate source of input to these functions + * should be considered as the source. + * + * False Negative Implication: This approach has false negatives (fails to identify non-const sources) + * when the source is a field of a struct or object and the initialization is not observed statically. + * There are 3 general cases where this can occur: + * 1) Parameters of uncalled functions that are structs/objects and a field is accessed for a format string. + * 2) A local variable that is a struct/object and initialization of the field occurs in code that is unseen statically. + * e.g., an object constructor isn't known statically, or a function sets fields + * of a struct, but the function is not known statically. + * 3) A function meeting cases (3) and (4) above returns (through an out argument or return value) + * a struct or object where a field containing a format string has been initialized. + * + * Note, uninitialized variables used as format strings are never detected by design. + * Uninitialized variables are a separate vulnerability concern and should be addressed by a separate query. + */ +predicate isNonConst(DataFlow::Node node) { + node instanceof FlowSource + or + // Parameters of uncalled functions that aren't const + exists(UncalledFunction f, Parameter p | + f.getAParameter() = p and + // We pick the indirection of the parameter since this query is focused + // on strings. + p = node.asParameter(1) and + // Ignore main's argv parameter as it is already considered a `FlowSource` + // not ignoring it will result in path redundancies + (f.getName() = "main" implies p != f.getParameter(1)) + ) + or + // Consider as an input any out arg of a function or a function's return where the function is not: + // 1. a function with a known dataflow or taintflow from input to output and the `node` is the output + // 2. a function where there is a known definition + // i.e., functions that with unknown bodies and are not known to define the output through its input + // are considered as possible non-const sources + // The function's output must also not be const to be considered a non-const source + exists(Function func, CallInstruction call | + not func.hasDefinition() and + func = call.getStaticCallTarget() + | + // Case 1: It's a known dataflow or taintflow function with flow to the return value + call.getUnconvertedResultExpression() = node.asIndirectExpr() and + not exists(FunctionOutput output | + dataFlowOrTaintFlowFunction(func, output) and + output.isReturnValueDeref(_) and + node = callOutput(call, output) + ) + or + // Case 2: It's a known dataflow or taintflow function with flow to an output parameter + exists(int i | + call.getPositionalArgumentOperand(i).getDef().getUnconvertedResultExpression() = + node.asDefiningArgument() and + not exists(FunctionOutput output | + dataFlowOrTaintFlowFunction(func, output) and + output.isParameterDeref(i, _) and + node = callOutput(call, output) + ) + ) + ) +} + +/** + * Holds if `sink` is a sink is a format string of any + * `FormattingFunctionCall`. + */ +predicate isSinkImpl(DataFlow::Node sink, Expr formatString) { + sink.asIndirectExpr() = formatString and + exists(FormattingFunctionCall fc | formatString = fc.getArgument(fc.getFormatParameterIndex())) +} + +module NonConstFlowConfig implements DataFlow::ConfigSig { + predicate isSource(DataFlow::Node source) { + exists(Type t | + isNonConst(source) and + t = source.getType() and + not cannotContainString(t) + ) + } + + predicate isSink(DataFlow::Node sink) { isSinkImpl(sink, _) } + + predicate isBarrier(DataFlow::Node node) { + // Ignore tracing non-const through array indices + exists(ArrayExpr a | a.getArrayOffset() = node.asIndirectExpr()) + or + exists(Type t | + t = node.getType() and + cannotContainString(t) + ) + } + + predicate observeDiffInformedIncrementalMode() { any() } + + Location getASelectedSinkLocation(DataFlow::Node sink) { + exists(FormattingFunctionCall call, Expr formatString | + result = [call.getLocation(), sink.getLocation()] + | + isSinkImpl(sink, formatString) and + call.getArgument(call.getFormatParameterIndex()) = formatString + ) + } +} + +module NonConstFlow = TaintTracking::Global; + +from + FormattingFunctionCall call, Expr formatString, NonConstFlow::PathNode sink, + NonConstFlow::PathNode source +where + isSinkImpl(sink.getNode(), formatString) and + call.getArgument(call.getFormatParameterIndex()) = formatString and + NonConstFlow::flowPath(source, sink) +select sink.getNode(), source, sink, + "The format string argument to $@ has a source which cannot be " + + "verified to originate from a string literal.", call, call.getTarget().getName() diff --git a/src/microsoft/Likely Bugs/Memory Management/Buffer.qll b/src/microsoft/Likely Bugs/Memory Management/Buffer.qll new file mode 100644 index 00000000..7213f06e --- /dev/null +++ b/src/microsoft/Likely Bugs/Memory Management/Buffer.qll @@ -0,0 +1,17 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +import cpp + +// an expression of the form sizeof(e) or strlen(e) +class BufferSizeExpr extends Expr { + BufferSizeExpr() { + this instanceof SizeofExprOperator or + this instanceof StrlenCall + } + + Expr getArg() { + result = this.(SizeofExprOperator).getExprOperand() or + result = this.(StrlenCall).getStringExpr() + } +} diff --git a/src/microsoft/Likely Bugs/Memory Management/ImproperNullTermination.md b/src/microsoft/Likely Bugs/Memory Management/ImproperNullTermination.md new file mode 100644 index 00000000..ae206aa0 --- /dev/null +++ b/src/microsoft/Likely Bugs/Memory Management/ImproperNullTermination.md @@ -0,0 +1,42 @@ +# Potential improper null termination +Built-in C string functions such as `strcat` require that their input string arguments are null terminated. If the input string arguments are not null terminated, these functions will read/write beyond the length of the buffer containing the string, resulting in either buffer over-read or buffer overflow, respectively. + + +## Recommendation +Review the code and consider whether the variable that holds the string should have an initializer or whether some path through the program fails to null terminate the string. + + +## Example +The destination variable `dest` used in the call to `strcat` does not (necessarily) contain a null terminator. Consequently, the call to `strcat` may result in a buffer overflow. + + +```cpp +char source[100]; +memset(source, 'A', 100-1); +source[100-1] = '\0'; // null terminate source + +char dest[200]; +memset(dest, 'B', 100-1); + +strcat(dest, source); +``` +In the revised example, `dest` is properly null terminated before the the call to `strcat`. + + +```cpp +char source[100]; +memset(source, 'A', 100-1); +source[100-1] = '\0'; // null terminate source + +char dest[200]; +memset(dest, 'B', 100-1); +dest[100-1] = '\0'; // null terminate destination + +strcat(dest, source); +``` + +## References +* B. Chess and J. West, *Secure Programming with Static Analysis*, 6.2 Maintaining the Null Terminator. Addison-Wesley. 2007. +* Linux Programmer's Manual: [STRCAT(3)](http://man7.org/linux/man-pages/man3/strncat.3.html). +* Common Weakness Enumeration: [CWE-170](https://cwe.mitre.org/data/definitions/170.html). +* Common Weakness Enumeration: [CWE-665](https://cwe.mitre.org/data/definitions/665.html). diff --git a/src/microsoft/Likely Bugs/Memory Management/ImproperNullTermination.qhelp b/src/microsoft/Likely Bugs/Memory Management/ImproperNullTermination.qhelp new file mode 100644 index 00000000..f539bb3b --- /dev/null +++ b/src/microsoft/Likely Bugs/Memory Management/ImproperNullTermination.qhelp @@ -0,0 +1,47 @@ + + + + + +

    Built-in C string functions such as strcat require that their +input string arguments are null terminated. If the input string arguments are +not null terminated, these functions will read/write beyond the length of the +buffer containing the string, resulting in either buffer over-read or buffer +overflow, respectively. +

    + +
    + + +

    +Review the code and consider whether the variable that holds the string should have +an initializer or whether some path through the program fails to null terminate the +string. +

    + +
    + +

    The destination variable dest used in the call to strcat +does not (necessarily) contain a null terminator. Consequently, the call to strcat +may result in a buffer overflow. +

    + + + +

    In the revised example, dest is properly null terminated before the +the call to strcat. +

    + + + + +
    + + +
  • B. Chess and J. West, Secure Programming with Static Analysis, 6.2 Maintaining the Null Terminator. Addison-Wesley. 2007.
  • +
  • Linux Programmer's Manual: STRCAT(3).
  • + +
    +
    diff --git a/src/microsoft/Likely Bugs/Memory Management/ImproperNullTermination.ql b/src/microsoft/Likely Bugs/Memory Management/ImproperNullTermination.ql new file mode 100644 index 00000000..a448849b --- /dev/null +++ b/src/microsoft/Likely Bugs/Memory Management/ImproperNullTermination.ql @@ -0,0 +1,102 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +/** + * @name Potential improper null termination + * @description Using a string that may not be null terminated as an argument + * to a string function can result in buffer overflow or buffer over-read. + * @kind problem + * @id cpp/microsoft/public/likely-bugs/memory-management/improper-null-termination + * @problem.severity warning + * @security-severity 7.8 + * @tags security + * external/cwe/cwe-170 + * external/cwe/cwe-665 + */ + +import cpp +import semmle.code.cpp.controlflow.StackVariableReachability +import semmle.code.cpp.commons.NullTermination + +/** + * A declaration of a local variable that leaves the variable uninitialized. + */ +DeclStmt declWithNoInit(LocalVariable v) { + result.getADeclaration() = v and + not exists(v.getInitializer()) +} + +/** + * Control flow reachability from a buffer that is not null terminated to a + * sink that requires null termination. + */ +class ImproperNullTerminationReachability extends StackVariableReachabilityWithReassignment { + ImproperNullTerminationReachability() { this = "ImproperNullTerminationReachability" } + + override predicate isSourceActual(ControlFlowNode node, StackVariable v) { + node = declWithNoInit(v) + or + exists(Call c, int bufferArg, int sizeArg | + c = node and + ( + c.getTarget().hasName("readlink") and bufferArg = 1 and sizeArg = 2 + or + c.getTarget().hasName("readlinkat") and bufferArg = 2 and sizeArg = 3 + ) and + c.getArgument(bufferArg).(VariableAccess).getTarget() = v and + ( + // buffer size parameter likely matches the full buffer size + c.getArgument(sizeArg) instanceof SizeofOperator or + c.getArgument(sizeArg).getValue().toInt() = v.getType().getSize() + ) + ) + } + + override predicate isSinkActual(ControlFlowNode node, StackVariable v) { + node.(VariableAccess).getTarget() = v and + variableMustBeNullTerminated(node) + } + + override predicate isBarrier(ControlFlowNode node, StackVariable v) { + exprDefinition(v, node, _) or + this.isSinkActual(node, v) // only report first use + } +} + +/** + * Flow from a place where null termination is added, to a sink of + * `ImproperNullTerminationReachability`. This was previously implemented as a + * simple barrier in `ImproperNullTerminationReachability`, but there were + * false positive results involving multiple paths from source to sink. We'd + * prefer to report only the results we are sure of. + */ +class NullTerminationReachability extends StackVariableReachabilityWithReassignment { + NullTerminationReachability() { this = "NullTerminationReachability" } + + override predicate isSourceActual(ControlFlowNode node, StackVariable v) { + mayAddNullTerminator(node, v.getAnAccess()) or // null termination + node.(AddressOfExpr).getOperand() = v.getAnAccess() // address taken (possible null termination) + } + + override predicate isSinkActual(ControlFlowNode node, StackVariable v) { + // have the same sinks as `ImproperNullTerminationReachability`. + exists(ImproperNullTerminationReachability r | r.isSinkActual(node, v)) + } + + override predicate isBarrier(ControlFlowNode node, StackVariable v) { + // don't look further back than the source, or further forward than the sink + exists(ImproperNullTerminationReachability r | r.isSourceActual(node, v)) or + exists(ImproperNullTerminationReachability r | r.isSinkActual(node, v)) + } +} + +from + ImproperNullTerminationReachability reaches, NullTerminationReachability nullTermReaches, + ControlFlowNode source, LocalVariable v, VariableAccess sink +where + reaches.reaches(source, v, sink) and + not exists(ControlFlowNode termination | + nullTermReaches.reaches(termination, _, sink) and + termination != source + ) +select sink, "Variable $@ may not be null terminated.", v, v.getName() diff --git a/src/microsoft/Likely Bugs/Memory Management/ImproperNullTerminationBad.cpp b/src/microsoft/Likely Bugs/Memory Management/ImproperNullTerminationBad.cpp new file mode 100644 index 00000000..7e9d220c --- /dev/null +++ b/src/microsoft/Likely Bugs/Memory Management/ImproperNullTerminationBad.cpp @@ -0,0 +1,8 @@ +char source[100]; +memset(source, 'A', 100-1); +source[100-1] = '\0'; // null terminate source + +char dest[200]; +memset(dest, 'B', 100-1); + +strcat(dest, source); \ No newline at end of file diff --git a/src/microsoft/Likely Bugs/Memory Management/ImproperNullTerminationGood.cpp b/src/microsoft/Likely Bugs/Memory Management/ImproperNullTerminationGood.cpp new file mode 100644 index 00000000..3baa3cdc --- /dev/null +++ b/src/microsoft/Likely Bugs/Memory Management/ImproperNullTerminationGood.cpp @@ -0,0 +1,9 @@ +char source[100]; +memset(source, 'A', 100-1); +source[100-1] = '\0'; // null terminate source + +char dest[200]; +memset(dest, 'B', 100-1); +dest[100-1] = '\0'; // null terminate destination + +strcat(dest, source); \ No newline at end of file diff --git a/src/microsoft/Likely Bugs/Memory Management/StrncpyFlippedArgs.md b/src/microsoft/Likely Bugs/Memory Management/StrncpyFlippedArgs.md new file mode 100644 index 00000000..a4defdda --- /dev/null +++ b/src/microsoft/Likely Bugs/Memory Management/StrncpyFlippedArgs.md @@ -0,0 +1,48 @@ +# Possibly wrong buffer size in string copy +The standard library function `strncpy` copies a source string to a destination buffer. The third argument defines the maximum number of characters to copy and should be less than or equal to the size of the destination buffer. Calls of the form `strncpy(dest, src, strlen(src))` or `strncpy(dest, src, sizeof(src))` incorrectly set the third argument to the size of the source buffer. Executing a call of this type may cause a buffer overflow. Buffer overflows can lead to anything from a segmentation fault to a security vulnerability. + + +## Recommendation +Check the highlighted function calls carefully, and ensure that the size parameter is derived from the size of the destination buffer, not the source buffer. + + +## Example +In the following examples, the size of the source buffer is incorrectly used as a parameter to `strncpy`: + + +```cpp +char src[256]; +char dest1[128]; + +... + +strncpy(dest1, src, sizeof(src)); // wrong: size of dest should be used + +char *dest2 = (char *)malloc(sz1 + sz2 + sz3); +strncpy(dest2, src, strlen(src)); // wrong: size of dest should be used + +``` +The corrected version uses the size of the destination buffer, or a variable containing the size of the destination buffer as the size parameter to `strncpy`: + + +```cpp +char src[256]; +char dest1[128]; + +... + +strncpy(dest1, src, sizeof(dest1)); // correct + +size_t destSize = sz1 + sz2 + sz3; +char *dest2 = (char *)malloc(destSize); +strncpy(dest2, src, destSize); // correct + +``` + +## References +* cplusplus.com: [strncpy](https://cplusplus.com/reference/cstring/strncpy/). +* I. Gerg. *An Overview and Example of the Buffer-Overflow Exploit*. IANewsletter vol 7 no 4. 2005. +* M. Donaldson. *Inside the Buffer Overflow Attack: Mechanism, Method & Prevention*. SANS Institute InfoSec Reading Room. 2002. +* Common Weakness Enumeration: [CWE-676](https://cwe.mitre.org/data/definitions/676.html). +* Common Weakness Enumeration: [CWE-119](https://cwe.mitre.org/data/definitions/119.html). +* Common Weakness Enumeration: [CWE-251](https://cwe.mitre.org/data/definitions/251.html). diff --git a/src/microsoft/Likely Bugs/Memory Management/StrncpyFlippedArgs.qhelp b/src/microsoft/Likely Bugs/Memory Management/StrncpyFlippedArgs.qhelp new file mode 100644 index 00000000..201b9057 --- /dev/null +++ b/src/microsoft/Likely Bugs/Memory Management/StrncpyFlippedArgs.qhelp @@ -0,0 +1,38 @@ + + + +

    The standard library function strncpy copies a source string to a destination buffer. The third argument defines the maximum number of characters to copy and should be less than +or equal to the size of the destination buffer. Calls of the form strncpy(dest, src, strlen(src)) or strncpy(dest, src, sizeof(src)) incorrectly set the third argument to the size of the source buffer. Executing a call of this type may cause a buffer overflow. Buffer overflows can lead to anything from a segmentation fault to a security vulnerability.

    + +
    + +

    Check the highlighted function calls carefully, and ensure that the size parameter is derived from the size of the destination buffer, +not the source buffer.

    + +
    + + +

    In the following examples, the size of the source buffer is incorrectly used as a parameter to strncpy:

    + + + +

    The corrected version uses the size of the destination buffer, or a variable containing the size of the destination buffer as the size parameter to strncpy:

    + + +
    + + + +
  • cplusplus.com: strncpy.
  • +
  • + I. Gerg. An Overview and Example of the Buffer-Overflow Exploit. IANewsletter vol 7 no 4. 2005. +
  • +
  • + M. Donaldson. Inside the Buffer Overflow Attack: Mechanism, Method & Prevention. SANS Institute InfoSec Reading Room. 2002. +
  • + + +
    +
    diff --git a/src/microsoft/Likely Bugs/Memory Management/StrncpyFlippedArgs.ql b/src/microsoft/Likely Bugs/Memory Management/StrncpyFlippedArgs.ql new file mode 100644 index 00000000..ea77d915 --- /dev/null +++ b/src/microsoft/Likely Bugs/Memory Management/StrncpyFlippedArgs.ql @@ -0,0 +1,111 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +/** + * @name Possibly wrong buffer size in string copy + * @description Calling 'strncpy' with the size of the source buffer + * as the third argument may result in a buffer overflow. + * @kind problem + * @problem.severity warning + * @security-severity 9.3 + * @precision medium + * @id cpp/microsoft/public/likely-bugs/memory-management/bad-strncpy-size + * @tags reliability + * correctness + * security + * external/cwe/cwe-676 + * external/cwe/cwe-119 + * external/cwe/cwe-251 + */ + +import cpp +import Buffer +private import semmle.code.cpp.valuenumbering.GlobalValueNumbering +private import semmle.code.cpp.models.implementations.Strcpy + +predicate isSizePlus(Expr e, BufferSizeExpr baseSize, int plus) { + // baseSize + e = baseSize and plus = 0 + or + exists(AddExpr ae, Expr operand1, Expr operand2, int plusSub | + // baseSize + n or n + baseSize + ae = e and + operand1 = ae.getAnOperand() and + operand2 = ae.getAnOperand() and + operand1 != operand2 and + isSizePlus(operand1, baseSize, plusSub) and + plus = plusSub + operand2.getValue().toInt() + ) + or + exists(SubExpr se, int plusSub | + // baseSize - n + se = e and + isSizePlus(se.getLeftOperand(), baseSize, plusSub) and + plus = plusSub - se.getRightOperand().getValue().toInt() + ) +} + +string nthString(int num) { + num = 0 and + result = "first" + or + num = 1 and + result = "second" + or + num = 2 and + result = "third" +} + +/** + * Gets the size of the expression, if it is initialized + * with a fixed size array. + */ +int arrayExprFixedSize(Expr e) { + result = e.getUnspecifiedType().(ArrayType).getSize() + or + result = e.(NewArrayExpr).getAllocatedType().(ArrayType).getSize() + or + exists(SsaDefinition def, LocalVariable v | + not e.getUnspecifiedType() instanceof ArrayType and + e = def.getAUse(v) and + result = arrayExprFixedSize(def.getDefiningValue(v)) + ) +} + +from + StrcpyFunction f, FunctionCall fc, int argDest, int argSrc, int argLimit, int charSize, + Access copyDest, Access copySource, string name, string nth +where + f = fc.getTarget() and + argDest = f.getParamDest() and + argSrc = f.getParamSrc() and + argLimit = f.getParamSize() and + copyDest = fc.getArgument(argDest) and + copySource = fc.getArgument(argSrc) and + // Some of the functions operate on a larger char type, like `wchar_t`, so we + // need to take this into account in the fixed size case. + charSize = f.getParameter(argDest).getUnspecifiedType().(PointerType).getBaseType().getSize() and + ( + if exists(fc.getArgument(argLimit).getValue().toInt()) + then + // Fixed sized case + exists(int size | + size = arrayExprFixedSize(copyDest) and + size < charSize * fc.getArgument(argLimit).getValue().toInt() and + size != 0 // if the array has zero size, something special is going on + ) + else + exists(Access takenSizeOf, BufferSizeExpr sizeExpr, int plus | + // Variable sized case + sizeExpr = fc.getArgument(argLimit).getAChild*() and + isSizePlus(fc.getArgument(argLimit), sizeExpr, plus) and + plus >= 0 and + takenSizeOf = sizeExpr.getArg() and + globalValueNumber(copySource) = globalValueNumber(takenSizeOf) and // e.g. strncpy(x, y, strlen(y)) + globalValueNumber(copyDest) != globalValueNumber(takenSizeOf) // e.g. strncpy(y, y, strlen(y)) + ) + ) and + name = fc.getTarget().getName() and + nth = nthString(argLimit) +select fc, + "Potentially unsafe call to " + name + "; " + nth + " argument should be size of destination." diff --git a/src/microsoft/Likely Bugs/Memory Management/StrncpyFlippedArgsBad.cpp b/src/microsoft/Likely Bugs/Memory Management/StrncpyFlippedArgsBad.cpp new file mode 100644 index 00000000..952550b2 --- /dev/null +++ b/src/microsoft/Likely Bugs/Memory Management/StrncpyFlippedArgsBad.cpp @@ -0,0 +1,9 @@ +char src[256]; +char dest1[128]; + +... + +strncpy(dest1, src, sizeof(src)); // wrong: size of dest should be used + +char *dest2 = (char *)malloc(sz1 + sz2 + sz3); +strncpy(dest2, src, strlen(src)); // wrong: size of dest should be used diff --git a/src/microsoft/Likely Bugs/Memory Management/StrncpyFlippedArgsGood.cpp b/src/microsoft/Likely Bugs/Memory Management/StrncpyFlippedArgsGood.cpp new file mode 100644 index 00000000..22fc4ebd --- /dev/null +++ b/src/microsoft/Likely Bugs/Memory Management/StrncpyFlippedArgsGood.cpp @@ -0,0 +1,10 @@ +char src[256]; +char dest1[128]; + +... + +strncpy(dest1, src, sizeof(dest1)); // correct + +size_t destSize = sz1 + sz2 + sz3; +char *dest2 = (char *)malloc(destSize); +strncpy(dest2, src, destSize); // correct diff --git a/src/microsoft/Likely Bugs/Memory Management/UnsafeUseOfStrcat.cpp b/src/microsoft/Likely Bugs/Memory Management/UnsafeUseOfStrcat.cpp new file mode 100644 index 00000000..a17e89e7 --- /dev/null +++ b/src/microsoft/Likely Bugs/Memory Management/UnsafeUseOfStrcat.cpp @@ -0,0 +1,12 @@ +void f(char *s) { + char buf[80]; + strcpy(buf, "s: "); + strcat(buf, s); // wrong: buffer not checked before strcat +} + +void g(char *s) { + char buf[80]; + strcpy(buf, "s: "); + if(strlen(s) < 77) + strcat(buf, s); // correct: buffer size checked before strcat +} diff --git a/src/microsoft/Likely Bugs/Memory Management/UnsafeUseOfStrcat.md b/src/microsoft/Likely Bugs/Memory Management/UnsafeUseOfStrcat.md new file mode 100644 index 00000000..eb2b17ad --- /dev/null +++ b/src/microsoft/Likely Bugs/Memory Management/UnsafeUseOfStrcat.md @@ -0,0 +1,32 @@ +# Potentially unsafe use of strcat +The standard library function `strcat` appends a source string to a target string. If you do not check the size of the source string then you cannot guarantee that appending the data to the target string will not cause a buffer overflow. Buffer overflows can lead to anything from a segmentation fault to a security vulnerability. + + +## Recommendation +Check the highlighted function calls carefully to ensure that no buffer overflow is possible. For a more robust solution, consider adding explicit range checks or using the `strncat` function instead. + + +## Example + +```cpp +void f(char *s) { + char buf[80]; + strcpy(buf, "s: "); + strcat(buf, s); // wrong: buffer not checked before strcat +} + +void g(char *s) { + char buf[80]; + strcpy(buf, "s: "); + if(strlen(s) < 77) + strcat(buf, s); // correct: buffer size checked before strcat +} + +``` + +## References +* I. Gerg, *An Overview and Example of the Buffer-Overflow Exploit*. IANewsletter vol 7, no 4, 2005. +* M. Donaldson, *Inside the Buffer Overflow Attack: Mechanism, Method & Prevention*. SANS Institute InfoSec Reading Room. 2002. +* Common Weakness Enumeration: [CWE-676](https://cwe.mitre.org/data/definitions/676.html). +* Common Weakness Enumeration: [CWE-120](https://cwe.mitre.org/data/definitions/120.html). +* Common Weakness Enumeration: [CWE-251](https://cwe.mitre.org/data/definitions/251.html). diff --git a/src/microsoft/Likely Bugs/Memory Management/UnsafeUseOfStrcat.qhelp b/src/microsoft/Likely Bugs/Memory Management/UnsafeUseOfStrcat.qhelp new file mode 100644 index 00000000..6f985f1b --- /dev/null +++ b/src/microsoft/Likely Bugs/Memory Management/UnsafeUseOfStrcat.qhelp @@ -0,0 +1,35 @@ + + + +

    The standard library function strcat appends a source string to a target string. If you do not check the size of the source string then you cannot guarantee that +appending the data to the target string will not cause a buffer overflow. Buffer overflows can lead to anything from a segmentation fault to a security vulnerability.

    + +
    + +

    Check the highlighted function calls carefully to ensure that no buffer overflow is possible. +For a more robust solution, consider adding explicit range checks or using the strncat +function instead.

    + +
    + + + + + + + +
  • + I. Gerg, An Overview and Example of the Buffer-Overflow Exploit. IANewsletter vol 7, no 4, 2005. +
  • +
  • + M. Donaldson, Inside the Buffer Overflow Attack: Mechanism, Method & Prevention. SANS Institute InfoSec Reading Room. 2002. +
  • + + + + +
    +
    diff --git a/src/microsoft/Likely Bugs/Memory Management/UnsafeUseOfStrcat.ql b/src/microsoft/Likely Bugs/Memory Management/UnsafeUseOfStrcat.ql new file mode 100644 index 00000000..e3618a6f --- /dev/null +++ b/src/microsoft/Likely Bugs/Memory Management/UnsafeUseOfStrcat.ql @@ -0,0 +1,52 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +/** + * @name Potentially unsafe use of strcat + * @description Using 'strcat' without checking the size of the source string + * may result in a buffer overflow + * @kind problem + * @problem.severity warning + * @security-severity 9.8 + * @precision medium + * @id cpp/microsoft/public/likely-bugs/memory-management/unsafe-strcat + * @tags reliability + * correctness + * security + * external/cwe/cwe-676 + * external/cwe/cwe-120 + * external/cwe/cwe-251 + */ + +import cpp +import Buffer + +/** + * An access to a variable that is initialized by a constant + * expression, and is never used as an lvalue anywhere else. + */ +predicate isEffectivelyConstAccess(VariableAccess a) { + exists(Variable v | + a.getTarget() = v and + v.getInitializer().getExpr().isConstant() and + not v.getAnAccess().isUsedAsLValue() + ) +} + +class StrcatSource extends VariableAccess { + FunctionCall strcat; + + StrcatSource() { + strcat.getTarget().hasName("strcat") and + this = strcat.getArgument(1) + } + + FunctionCall getStrcatCall() { result = strcat } +} + +from StrcatSource src +where + not src.getType() instanceof ArrayType and + not exists(BufferSizeExpr bse | bse.getArg().(VariableAccess).getTarget() = src.getTarget()) and + not isEffectivelyConstAccess(src) +select src.getStrcatCall(), "Always check the size of the source buffer when using strcat." diff --git a/src/microsoft/Security/CWE/CWE-190/ArithmeticUncontrolled.c b/src/microsoft/Security/CWE/CWE-190/ArithmeticUncontrolled.c new file mode 100644 index 00000000..fee75d54 --- /dev/null +++ b/src/microsoft/Security/CWE/CWE-190/ArithmeticUncontrolled.c @@ -0,0 +1,15 @@ +int main(int argc, char** argv) { + int i = rand(); + // BAD: potential overflow + int j = i + 1000; + + // ... + + int n = rand(); + int k; + // GOOD: use a guard to prevent overflow + if (n < INT_MAX-1000) + k = n + 1000; + else + k = INT_MAX; +} diff --git a/src/microsoft/Security/CWE/CWE-190/ArithmeticUncontrolled.md b/src/microsoft/Security/CWE/CWE-190/ArithmeticUncontrolled.md new file mode 100644 index 00000000..332831b3 --- /dev/null +++ b/src/microsoft/Security/CWE/CWE-190/ArithmeticUncontrolled.md @@ -0,0 +1,39 @@ +# Uncontrolled data in arithmetic expression +Performing calculations on uncontrolled data can result in integer overflows unless the input is validated. + +If the data is not under your control, and can take extremely large values, even arithmetic operations that would usually result in a small change in magnitude may result in overflows. + + +## Recommendation +Always guard against overflow in arithmetic operations on uncontrolled data by doing one of the following: + +* Validate the data. +* Define a guard on the arithmetic expression, so that the operation is performed only if the result can be known to be less than, or equal to, the maximum value for the type, for example `INT_MAX`. +* Use a wider type, so that larger input values do not cause overflow. + +## Example +In this example, a random integer is generated. Because the value is not controlled by the programmer, it could be extremely large. Performing arithmetic operations on this value could therefore cause an overflow. To avoid this happening, the example shows how to perform a check before performing an arithmetic operation. + + +```c +int main(int argc, char** argv) { + int i = rand(); + // BAD: potential overflow + int j = i + 1000; + + // ... + + int n = rand(); + int k; + // GOOD: use a guard to prevent overflow + if (n < INT_MAX-1000) + k = n + 1000; + else + k = INT_MAX; +} + +``` + +## References +* Common Weakness Enumeration: [CWE-190](https://cwe.mitre.org/data/definitions/190.html). +* Common Weakness Enumeration: [CWE-191](https://cwe.mitre.org/data/definitions/191.html). diff --git a/src/microsoft/Security/CWE/CWE-190/ArithmeticUncontrolled.qhelp b/src/microsoft/Security/CWE/CWE-190/ArithmeticUncontrolled.qhelp new file mode 100644 index 00000000..265c6172 --- /dev/null +++ b/src/microsoft/Security/CWE/CWE-190/ArithmeticUncontrolled.qhelp @@ -0,0 +1,44 @@ + + + +

    Performing calculations on uncontrolled data can result in integer overflows +unless the input is validated.

    + +

    If the data is not under your control, and can take extremely large values, +even arithmetic operations that would usually result in a small change in magnitude may result in overflows.

    + +
    + + +

    Always guard against overflow in arithmetic operations on uncontrolled data by doing one of the +following:

    + +
      +
    • Validate the data.
    • +
    • Define a guard on the arithmetic expression, so that the operation is performed only if the +result can be known to be less than, or equal to, the maximum value for the type, for example INT_MAX.
    • +
    • Use a wider type, so that larger input values do not cause overflow.
    • +
    + +
    + + +

    In this example, a random integer is generated. Because the value +is not controlled by the programmer, it could be extremely large. Performing arithmetic operations on this +value could therefore cause an overflow. To avoid this happening, the example shows how to perform +a check before performing an arithmetic operation.

    + + + +
    + + + + + + + +
    diff --git a/src/microsoft/Security/CWE/CWE-190/ArithmeticUncontrolled.ql b/src/microsoft/Security/CWE/CWE-190/ArithmeticUncontrolled.ql new file mode 100644 index 00000000..53afa5bd --- /dev/null +++ b/src/microsoft/Security/CWE/CWE-190/ArithmeticUncontrolled.ql @@ -0,0 +1,149 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +/** + * @name Uncontrolled data in arithmetic expression + * @description Arithmetic operations on uncontrolled data that is not + * validated can cause overflows. + * @kind path-problem + * @problem.severity warning + * @security-severity 8.6 + * @precision high + * @id cpp/microsoft/public/security/cwe-190/uncontrolled-arithmetic + * @tags security + * external/cwe/cwe-190 + * external/cwe/cwe-191 + */ + +import cpp +import semmle.code.cpp.security.Overflow +import semmle.code.cpp.security.Security +import semmle.code.cpp.security.FlowSources +import semmle.code.cpp.ir.dataflow.TaintTracking +import UncontrolledArith::PathGraph +import Bounded + +/** + * A function that outputs random data such as `std::rand`. + */ +abstract class RandomFunction extends Function { + /** + * Gets the `FunctionOutput` that describes how this function returns the random data. + */ + FunctionOutput getFunctionOutput() { result.isReturnValue() } +} + +/** + * The standard function `std::rand`. + */ +private class StdRand extends RandomFunction { + StdRand() { + this.hasGlobalOrStdOrBslName("rand") and + this.getNumberOfParameters() = 0 + } +} + +/** + * The Unix function `rand_r`. + */ +private class RandR extends RandomFunction { + RandR() { + this.hasGlobalName("rand_r") and + this.getNumberOfParameters() = 1 + } +} + +/** + * The Unix function `random`. + */ +private class Random extends RandomFunction { + Random() { + this.hasGlobalName("random") and + this.getNumberOfParameters() = 1 + } +} + +/** + * The Windows `rand_s` function. + */ +private class RandS extends RandomFunction { + RandS() { + this.hasGlobalName("rand_s") and + this.getNumberOfParameters() = 1 + } + + override FunctionOutput getFunctionOutput() { result.isParameterDeref(0) } +} + +predicate missingGuard(VariableAccess va, string effect) { + exists(Operation op | op.getAnOperand() = va | + // underflow - random numbers are usually non-negative, so underflow is + // only likely if the type is unsigned. Multiplication is also unlikely to + // cause underflow of a non-negative number. + missingGuardAgainstUnderflow(op, va) and + effect = "underflow" and + op.getUnspecifiedType().(IntegralType).isUnsigned() and + not op instanceof MulExpr + or + // overflow - only report signed integer overflow since unsigned overflow + // is well-defined. + op.getUnspecifiedType().(IntegralType).isSigned() and + missingGuardAgainstOverflow(op, va) and + effect = "overflow" + ) +} + +module UncontrolledArithConfig implements DataFlow::ConfigSig { + predicate isSource(DataFlow::Node source) { + exists(RandomFunction rand, Call call | call.getTarget() = rand | + rand.getFunctionOutput().isReturnValue() and + source.asExpr() = call + or + exists(int n | + source.asDefiningArgument() = call.getArgument(n) and + rand.getFunctionOutput().isParameterDeref(n) + ) + ) + } + + predicate isSink(DataFlow::Node sink) { missingGuard(sink.asExpr(), _) } + + predicate isBarrier(DataFlow::Node node) { + bounded(node.asExpr()) + or + // If this expression is part of bitwise 'and' or 'or' operation it's likely that the value is + // only used as a bit pattern. + node.asExpr() = + any(Operation op | + op instanceof BitwiseOrExpr or + op instanceof BitwiseAndExpr or + op instanceof ComplementExpr + ).getAnOperand*() + or + // block unintended flow to pointers + node.asExpr().getUnspecifiedType() instanceof PointerType + } + + predicate observeDiffInformedIncrementalMode() { any() } + + Location getASelectedSourceLocation(DataFlow::Node source) { + isSource(source) and + result = [getExpr(source).getLocation(), source.getLocation()] + } +} + +module UncontrolledArith = TaintTracking::Global; + +/** Gets the expression that corresponds to `node`, if any. */ +Expr getExpr(DataFlow::Node node) { result = [node.asExpr(), node.asDefiningArgument()] } + +from + UncontrolledArith::PathNode source, UncontrolledArith::PathNode sink, VariableAccess va, + string effect +where + UncontrolledArith::flowPath(source, sink) and + sink.getNode().asExpr() = va and + missingGuard(va, effect) +select sink.getNode(), source, sink, + "This arithmetic expression depends on an $@, potentially causing an " + effect + ".", + getExpr(source.getNode()), "uncontrolled value" diff --git a/src/microsoft/Security/CWE/CWE-190/ArithmeticWithExtremeValues.c b/src/microsoft/Security/CWE/CWE-190/ArithmeticWithExtremeValues.c new file mode 100644 index 00000000..69e22d2f --- /dev/null +++ b/src/microsoft/Security/CWE/CWE-190/ArithmeticWithExtremeValues.c @@ -0,0 +1,11 @@ +int main(int argc, char** argv) { + int i = INT_MAX; + // BAD: overflow + int j = i + 1; + + // ... + + int l = INT_MAX; + // GOOD: no overflow + long k = (long)l + 1; +} diff --git a/src/microsoft/Security/CWE/CWE-190/ArithmeticWithExtremeValues.md b/src/microsoft/Security/CWE/CWE-190/ArithmeticWithExtremeValues.md new file mode 100644 index 00000000..de3bc8d7 --- /dev/null +++ b/src/microsoft/Security/CWE/CWE-190/ArithmeticWithExtremeValues.md @@ -0,0 +1,30 @@ +# Use of extreme values in arithmetic expression +Assigning the maximum or minimum value for a type to a variable of that type and then using the variable in calculations may cause overflows. + + +## Recommendation +Before using the variable, ensure that it is reassigned a value that does not cause an overflow, or use a wider type to do the arithmetic. + + +## Example +In this example, assigning `INT_MAX` to a variable and adding one causes an overflow. However, casting to a `long` beforehand ensures that the arithmetic is done in the wider type, and so does not overflow. + + +```c +int main(int argc, char** argv) { + int i = INT_MAX; + // BAD: overflow + int j = i + 1; + + // ... + + int l = INT_MAX; + // GOOD: no overflow + long k = (long)l + 1; +} + +``` + +## References +* Common Weakness Enumeration: [CWE-190](https://cwe.mitre.org/data/definitions/190.html). +* Common Weakness Enumeration: [CWE-191](https://cwe.mitre.org/data/definitions/191.html). diff --git a/src/microsoft/Security/CWE/CWE-190/ArithmeticWithExtremeValues.qhelp b/src/microsoft/Security/CWE/CWE-190/ArithmeticWithExtremeValues.qhelp new file mode 100644 index 00000000..b5b49059 --- /dev/null +++ b/src/microsoft/Security/CWE/CWE-190/ArithmeticWithExtremeValues.qhelp @@ -0,0 +1,33 @@ + + + +

    Assigning the maximum or minimum value for a type to a variable of that type and then using the +variable in calculations may cause overflows.

    + +
    + + +

    Before using the variable, ensure that it is reassigned a value that does not cause an overflow, +or use a wider type to do the arithmetic.

    + +
    + + +

    In this example, assigning INT_MAX to a variable and adding one causes +an overflow. However, casting to a long beforehand ensures that the arithmetic +is done in the wider type, and so does not overflow.

    + + + +
    + + + + + + + +
    diff --git a/src/microsoft/Security/CWE/CWE-190/ArithmeticWithExtremeValues.ql b/src/microsoft/Security/CWE/CWE-190/ArithmeticWithExtremeValues.ql new file mode 100644 index 00000000..5203ccec --- /dev/null +++ b/src/microsoft/Security/CWE/CWE-190/ArithmeticWithExtremeValues.ql @@ -0,0 +1,137 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +/** + * @name Use of extreme values in arithmetic expression + * @description If a variable is assigned the maximum or minimum value + * for that variable's type and is then used in an + * arithmetic expression, this may result in an overflow. + * @kind problem + * @id cpp/microsoft/public/security/cwe-190/arithmetic-with-extreme-values + * @problem.severity warning + * @security-severity 8.6 + * @precision low + * @tags security + * reliability + * external/cwe/cwe-190 + * external/cwe/cwe-191 + */ + +import cpp +import semmle.code.cpp.security.Overflow +import semmle.code.cpp.dataflow.new.TaintTracking +import semmle.code.cpp.ir.IR +import semmle.code.cpp.controlflow.IRGuards as IRGuards + +predicate isMaxValue(Expr mie) { + exists(MacroInvocation mi | + mi.getExpr() = mie and + mi.getMacroName() = ["CHAR_MAX", "LLONG_MAX", "INT_MAX", "SHRT_MAX", "UINT_MAX"] + ) +} + +predicate isMinValue(Expr mie) { + exists(MacroInvocation mi | + mi.getExpr() = mie and + mi.getMacroName() = ["CHAR_MIN", "LLONG_MIN", "INT_MIN", "SHRT_MIN"] + ) +} + +predicate isSource(DataFlow::Node source, string cause) { + exists(Expr expr | expr = source.asExpr() | + isMaxValue(expr) and cause = "max value" + or + isMinValue(expr) and cause = "min value" + ) +} + +predicate causeEffectCorrespond(string cause, string effect) { + cause = "max value" and + effect = "overflow" + or + cause = "min value" and + effect = "underflow" +} + +predicate isSink(DataFlow::Node sink, VariableAccess va, string effect) { + exists(Operation op | + sink.asExpr() = va and + op.getAnOperand() = va + | + missingGuardAgainstUnderflow(op, va) and effect = "underflow" + or + missingGuardAgainstOverflow(op, va) and effect = "overflow" + ) +} + +predicate hasUpperBoundsCheck(Variable var) { + exists(RelationalOperation oper, VariableAccess access | + oper.getAnOperand() = access and + access.getTarget() = var and + // Comparing to 0 is not an upper bound check + not oper.getAnOperand().getValue() = "0" + ) +} + +predicate constantInstruction(Instruction instr) { + instr instanceof ConstantInstruction or + constantInstruction(instr.(UnaryInstruction).getUnary()) +} + +predicate readsVariable(LoadInstruction load, Variable var) { + load.getSourceAddress().(VariableAddressInstruction).getAstVariable() = var +} + +predicate nodeIsBarrierEqualityCandidate(DataFlow::Node node, Operand access, Variable checkedVar) { + exists(Instruction instr | instr = node.asInstruction() | + readsVariable(instr, checkedVar) and + any(IRGuards::IRGuardCondition guard).ensuresEq(access, _, _, instr.getBlock(), true) + ) +} + +module Config implements DataFlow::ConfigSig { + predicate isSource(DataFlow::Node source) { isSource(source, _) } + + predicate isSink(DataFlow::Node sink) { isSink(sink, _, _) } + + predicate isBarrier(DataFlow::Node node) { + // Block flow if there's an upper bound check of the variable anywhere in the program + exists(Variable checkedVar, Instruction instr | instr = node.asInstruction() | + readsVariable(instr, checkedVar) and + hasUpperBoundsCheck(checkedVar) + ) + or + // Block flow if the node is guarded by an equality check + exists(Variable checkedVar, Operand access | + nodeIsBarrierEqualityCandidate(node, access, checkedVar) and + readsVariable(access.getDef(), checkedVar) + ) + or + // Block flow to any binary instruction whose operands are both non-constants. + exists(BinaryInstruction iTo | + iTo = node.asInstruction() and + not constantInstruction(iTo.getLeft()) and + not constantInstruction(iTo.getRight()) and + // propagate taint from either the pointer or the offset, regardless of constantness + not iTo instanceof PointerArithmeticInstruction + ) + } + + predicate observeDiffInformedIncrementalMode() { any() } + + Location getASelectedSinkLocation(DataFlow::Node sink) { + exists(VariableAccess va | result = va.getLocation() | isSink(sink, va, _)) + } +} + +module Flow = TaintTracking::Global; + +from DataFlow::Node source, DataFlow::Node sink, VariableAccess va, string cause, string effect +where + Flow::flow(source, sink) and + isSource(source, cause) and + causeEffectCorrespond(cause, effect) and + isSink(sink, va, effect) +select va, + "$@ flows to an operand of an arithmetic expression, potentially causing an " + effect + ".", + source, "Extreme value" diff --git a/src/microsoft/Security/CWE/CWE-190/Bounded.qll b/src/microsoft/Security/CWE/CWE-190/Bounded.qll new file mode 100644 index 00000000..997f2876 --- /dev/null +++ b/src/microsoft/Security/CWE/CWE-190/Bounded.qll @@ -0,0 +1,60 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +/** + * This file provides the `bounded` predicate that is used in `cpp/uncontrolled-arithmetic`, + * `cpp/tainted-arithmetic` and `cpp/uncontrolled-allocation-size`. + */ + +private import cpp +private import semmle.code.cpp.rangeanalysis.SimpleRangeAnalysis +private import semmle.code.cpp.rangeanalysis.RangeAnalysisUtils + +/** + * An operand `operand` of a bitwise and expression `andExpr` (i.e., `andExpr` is either a + * `BitwiseAndExpr` or an `AssignAndExpr`) is upper bounded by some number that is less than the + * maximum integer allowed by the result type of `andExpr`. + */ +pragma[inline] +private predicate boundedBitwiseAnd(Expr operand, Expr andExpr) { + upperBound(operand.getFullyConverted()) < exprMaxVal(andExpr.getFullyConverted()) +} + +/** + * Holds if `e` is an arithmetic expression that cannot overflow, or if `e` is an operation that + * may greatly reduce the range of possible values. + */ +predicate bounded(Expr e) { + // There can be two separate reasons for `convertedExprMightOverflow` not holding: + // 1. `e` really cannot overflow. + // 2. `e` isn't analyzable. + // If we didn't rule out case 2 we would declare anything that isn't analyzable as bounded. + ( + e instanceof UnaryArithmeticOperation or + e instanceof BinaryArithmeticOperation or + e instanceof AssignArithmeticOperation + ) and + not convertedExprMightOverflow(e) + or + // Optimistically assume that the following operations always yields a much smaller value. + e instanceof RemExpr + or + e instanceof DivExpr + or + e instanceof RShiftExpr + or + exists(BitwiseAndExpr andExpr | + e = andExpr and boundedBitwiseAnd(andExpr.getAnOperand(), andExpr) + ) + or + // For the assignment variant of the operations we place the barrier on the assigned lvalue. + e = any(AssignRemExpr rem).getLValue() + or + e = any(AssignDivExpr div).getLValue() + or + e = any(AssignRShiftExpr div).getLValue() + or + exists(AssignAndExpr andExpr | + e = andExpr.getLValue() and boundedBitwiseAnd(andExpr.getRValue(), andExpr) + ) +} diff --git a/src/microsoft/experimental/Security/CWE-022/TaintedPath.md b/src/microsoft/experimental/Security/CWE-022/TaintedPath.md new file mode 100644 index 00000000..a5d6c681 --- /dev/null +++ b/src/microsoft/experimental/Security/CWE-022/TaintedPath.md @@ -0,0 +1,97 @@ +# Uncontrolled data used in path expression +Accessing paths controlled by users can allow an attacker to access unexpected resources. This can result in sensitive information being revealed or deleted, or an attacker being able to influence behavior by modifying unexpected files. + +Paths that are naively constructed from data controlled by a user may be absolute paths, or may contain unexpected special characters such as "..". Such a path could point anywhere on the file system. + + +## Recommendation +Validate user input before using it to construct a file path. + +Common validation methods include checking that the normalized path is relative and does not contain any ".." components, or checking that the path is contained within a safe folder. The method you should use depends on how the path is used in the application, and whether the path should be a single path component. + +If the path should be a single path component (such as a file name), you can check for the existence of any path separators ("/" or "\\"), or ".." sequences in the input, and reject the input if any are found. + +Note that removing "../" sequences is *not* sufficient, since the input could still contain a path separator followed by "..". For example, the input ".../...//" would still result in the string "../" if only "../" sequences are removed. + +Finally, the simplest (but most restrictive) option is to use an allow list of safe patterns and make sure that the user input matches one of these patterns. + + +## Example +In this example, a file name is read from a user and then used to access a file. However, a malicious user could enter a file name anywhere on the file system, such as "/etc/passwd" or "../../../etc/passwd". + + +```c +int main(int argc, char** argv) { + char *userAndFile = argv[2]; + + { + char fileBuffer[PATH_MAX]; + snprintf(fileBuffer, sizeof(fileBuffer), "/home/%s", userAndFile); + // BAD: a string from the user is used in a filename + fopen(fileBuffer, "wb+"); + } +} + +``` +If the input should only be a file name, you can check that it doesn't contain any path separators or ".." sequences. + + +```c +#include +#include + +int main(int argc, char** argv) { + char *fileName = argv[2]; + // Check for invalid sequences in the user input + if (strstr(fileName , "..") || strchr(fileName , '/') || strchr(fileName , '\\')) { + printf("Invalid filename.\n"); + return 1; + } + + char fileBuffer[PATH_MAX]; + snprintf(fileBuffer, sizeof(fileBuffer), "/home/user/files/%s", fileName); + // GOOD: We know that the filename is safe and stays within the public folder + FILE *file = fopen(fileBuffer, "wb+"); +} +``` +If the input should be within a specific directory, you can check that the resolved path is still contained within that directory. + + +```c +#include +#include + +int main(int argc, char** argv) { + char *userAndFile = argv[2]; + const char *baseDir = "/home/user/public/"; + char fullPath[PATH_MAX]; + + // Attempt to concatenate the base directory and the user-supplied path + snprintf(fullPath, sizeof(fullPath), "%s%s", baseDir, userAndFile); + + // Resolve the absolute path, normalizing any ".." or "." + char *resolvedPath = realpath(fullPath, NULL); + if (resolvedPath == NULL) { + perror("Error resolving path"); + return 1; + } + + // Check if the resolved path starts with the base directory + if (strncmp(baseDir, resolvedPath, strlen(baseDir)) != 0) { + free(resolvedPath); + return 1; + } + + // GOOD: Path is within the intended directory + FILE *file = fopen(resolvedPath, "wb+"); + free(resolvedPath); +} +``` + +## References +* OWASP: [Path Traversal](https://owasp.org/www-community/attacks/Path_Traversal). +* Linux man pages: [realpath(3)](https://man7.org/linux/man-pages/man3/realpath.3.html). +* Common Weakness Enumeration: [CWE-22](https://cwe.mitre.org/data/definitions/22.html). +* Common Weakness Enumeration: [CWE-23](https://cwe.mitre.org/data/definitions/23.html). +* Common Weakness Enumeration: [CWE-36](https://cwe.mitre.org/data/definitions/36.html). +* Common Weakness Enumeration: [CWE-73](https://cwe.mitre.org/data/definitions/73.html). diff --git a/src/microsoft/experimental/Security/CWE-022/TaintedPath.qhelp b/src/microsoft/experimental/Security/CWE-022/TaintedPath.qhelp new file mode 100644 index 00000000..4d6238ac --- /dev/null +++ b/src/microsoft/experimental/Security/CWE-022/TaintedPath.qhelp @@ -0,0 +1,68 @@ + + + +

    Accessing paths controlled by users can allow an attacker to access unexpected resources. This +can result in sensitive information being revealed or deleted, or an attacker being able to influence +behavior by modifying unexpected files.

    + +

    Paths that are naively constructed from data controlled by a user may be absolute paths, or may contain +unexpected special characters such as "..". Such a path could point anywhere on the file system.

    + +
    + + +

    Validate user input before using it to construct a file path.

    + +

    Common validation methods include checking that the normalized path is relative and does not contain +any ".." components, or checking that the path is contained within a safe folder. The method you should use depends +on how the path is used in the application, and whether the path should be a single path component. +

    + +

    If the path should be a single path component (such as a file name), you can check for the existence +of any path separators ("/" or "\"), or ".." sequences in the input, and reject the input if any are found. +

    + +

    +Note that removing "../" sequences is not sufficient, since the input could still contain a path separator +followed by "..". For example, the input ".../...//" would still result in the string "../" if only "../" sequences +are removed. +

    + +

    Finally, the simplest (but most restrictive) option is to use an allow list of safe patterns and make sure that +the user input matches one of these patterns.

    + +
    + + +

    In this example, a file name is read from a user and then used to access a file. +However, a malicious user could enter a file name anywhere on the file system, +such as "/etc/passwd" or "../../../etc/passwd".

    + + + +

    +If the input should only be a file name, you can check that it doesn't contain any path separators or ".." sequences. +

    + + + +

    +If the input should be within a specific directory, you can check that the resolved path +is still contained within that directory. +

    + + + +
    + + +
  • +OWASP: +Path Traversal. +
  • +
  • Linux man pages: realpath(3).
  • + +
    +
    diff --git a/src/microsoft/experimental/Security/CWE-022/TaintedPath.ql b/src/microsoft/experimental/Security/CWE-022/TaintedPath.ql new file mode 100644 index 00000000..a0a88869 --- /dev/null +++ b/src/microsoft/experimental/Security/CWE-022/TaintedPath.ql @@ -0,0 +1,119 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +/** + * @name Uncontrolled data used in path expression + * @description Accessing paths influenced by users can allow an + * attacker to access unexpected resources. + * @kind path-problem + * @problem.severity warning + * @security-severity 7.5 + * @precision medium + * @id cpp/microsoft/experimental/security/cwe-022/path-injection + * @tags security + * external/cwe/cwe-022 + * external/cwe/cwe-023 + * external/cwe/cwe-036 + * external/cwe/cwe-073 + */ + +import cpp +import semmle.code.cpp.security.FunctionWithWrappers +import semmle.code.cpp.security.FlowSources +import semmle.code.cpp.ir.IR +import semmle.code.cpp.ir.dataflow.TaintTracking +import TaintedPath::PathGraph + +/** + * A function for opening a file. + */ +class FileFunction extends FunctionWithWrappers { + FileFunction() { + exists(string nme | this.hasGlobalName(nme) | + nme = ["fopen", "_fopen", "_wfopen", "open", "_open", "_wopen"] + or + // create file function on windows + nme.matches("CreateFile%") + ) + or + this.hasQualifiedName("std", "fopen") + or + // on any of the fstream classes, or filebuf + exists(string nme | this.getDeclaringType().hasQualifiedName("std", nme) | + nme = ["basic_fstream", "basic_ifstream", "basic_ofstream", "basic_filebuf"] + ) and + // we look for either the open method or the constructor + (this.getName() = "open" or this instanceof Constructor) + } + + // conveniently, all of these functions take the path as the first parameter! + override predicate interestingArg(int arg) { arg = 0 } +} + +/** + * Holds for a variable that has any kind of upper-bound check anywhere in the program. + * This is biased towards being inclusive and being a coarse overapproximation because + * there are a lot of valid ways of doing an upper bounds checks if we don't consider + * where it occurs, for example: + * ```cpp + * if (x < 10) { sink(x); } + * + * if (10 > y) { sink(y); } + * + * if (z > 10) { z = 10; } + * sink(z); + * ``` + */ +predicate hasUpperBoundsCheck(Variable var) { + exists(RelationalOperation oper, VariableAccess access | + oper.getAnOperand() = access and + access.getTarget() = var and + // Comparing to 0 is not an upper bound check + not oper.getAnOperand().getValue() = "0" + ) +} + +module TaintedPathConfig implements DataFlow::ConfigSig { + predicate isSource(DataFlow::Node node) { node instanceof FlowSource } + + predicate isSink(DataFlow::Node node) { + exists(FileFunction fileFunction | + fileFunction.outermostWrapperFunctionCall(node.asIndirectArgument(), _) + ) + } + + predicate isBarrier(DataFlow::Node node) { + node.asExpr().(Call).getTarget().getUnspecifiedType() instanceof ArithmeticType + or + exists(LoadInstruction load, Variable checkedVar | + load = node.asInstruction() and + checkedVar = load.getSourceAddress().(VariableAddressInstruction).getAstVariable() and + hasUpperBoundsCheck(checkedVar) + ) + } + + predicate isBarrierOut(DataFlow::Node node) { + // make sinks barriers so that we only report the closest instance + isSink(node) + } + + predicate observeDiffInformedIncrementalMode() { any() } + + Location getASelectedSinkLocation(DataFlow::Node sink) { + result = sink.asIndirectArgument().getLocation() + } +} + +module TaintedPath = TaintTracking::Global; + +from + FileFunction fileFunction, Expr taintedArg, FlowSource taintSource, + TaintedPath::PathNode sourceNode, TaintedPath::PathNode sinkNode, string callChain +where + taintedArg = sinkNode.getNode().asIndirectArgument() and + fileFunction.outermostWrapperFunctionCall(taintedArg, callChain) and + TaintedPath::flowPath(sourceNode, sinkNode) and + taintSource = sourceNode.getNode() +select taintedArg, sourceNode, sinkNode, + "This argument to a file access function is derived from $@ and then passed to " + callChain + ".", + taintSource, "user input (" + taintSource.getSourceType() + ")" diff --git a/src/microsoft/experimental/Security/CWE-022/examples/TaintedPath.c b/src/microsoft/experimental/Security/CWE-022/examples/TaintedPath.c new file mode 100644 index 00000000..ff309d7d --- /dev/null +++ b/src/microsoft/experimental/Security/CWE-022/examples/TaintedPath.c @@ -0,0 +1,10 @@ +int main(int argc, char** argv) { + char *userAndFile = argv[2]; + + { + char fileBuffer[PATH_MAX]; + snprintf(fileBuffer, sizeof(fileBuffer), "/home/%s", userAndFile); + // BAD: a string from the user is used in a filename + fopen(fileBuffer, "wb+"); + } +} diff --git a/src/microsoft/experimental/Security/CWE-022/examples/TaintedPathFolder.c b/src/microsoft/experimental/Security/CWE-022/examples/TaintedPathFolder.c new file mode 100644 index 00000000..1970e515 --- /dev/null +++ b/src/microsoft/experimental/Security/CWE-022/examples/TaintedPathFolder.c @@ -0,0 +1,28 @@ +#include +#include + +int main(int argc, char** argv) { + char *userAndFile = argv[2]; + const char *baseDir = "/home/user/public/"; + char fullPath[PATH_MAX]; + + // Attempt to concatenate the base directory and the user-supplied path + snprintf(fullPath, sizeof(fullPath), "%s%s", baseDir, userAndFile); + + // Resolve the absolute path, normalizing any ".." or "." + char *resolvedPath = realpath(fullPath, NULL); + if (resolvedPath == NULL) { + perror("Error resolving path"); + return 1; + } + + // Check if the resolved path starts with the base directory + if (strncmp(baseDir, resolvedPath, strlen(baseDir)) != 0) { + free(resolvedPath); + return 1; + } + + // GOOD: Path is within the intended directory + FILE *file = fopen(resolvedPath, "wb+"); + free(resolvedPath); +} \ No newline at end of file diff --git a/src/microsoft/experimental/Security/CWE-022/examples/TaintedPathNormalize.c b/src/microsoft/experimental/Security/CWE-022/examples/TaintedPathNormalize.c new file mode 100644 index 00000000..ab7607cd --- /dev/null +++ b/src/microsoft/experimental/Security/CWE-022/examples/TaintedPathNormalize.c @@ -0,0 +1,16 @@ +#include +#include + +int main(int argc, char** argv) { + char *fileName = argv[2]; + // Check for invalid sequences in the user input + if (strstr(fileName , "..") || strchr(fileName , '/') || strchr(fileName , '\\')) { + printf("Invalid filename.\n"); + return 1; + } + + char fileBuffer[PATH_MAX]; + snprintf(fileBuffer, sizeof(fileBuffer), "/home/user/files/%s", fileName); + // GOOD: We know that the filename is safe and stays within the public folder + FILE *file = fopen(fileBuffer, "wb+"); +} \ No newline at end of file diff --git a/src/microsoft/experimental/Security/CWE-190/ArithmeticTainted.c b/src/microsoft/experimental/Security/CWE-190/ArithmeticTainted.c new file mode 100644 index 00000000..2a108fbc --- /dev/null +++ b/src/microsoft/experimental/Security/CWE-190/ArithmeticTainted.c @@ -0,0 +1,18 @@ +int main(int argc, char** argv) { + char buffer[20]; + fgets(buffer, 20, stdin); + + int num = atoi(buffer); + // BAD: may overflow if input is very large + int scaled = num + 1000; + + // ... + + int num2 = atoi(buffer); + int scaled2; + // GOOD: use a guard to prevent overflow + if (num2 < INT_MAX-1000) + scaled2 = num2 + 1000; + else + scaled2 = INT_MAX; +} diff --git a/src/microsoft/experimental/Security/CWE-190/ArithmeticTainted.md b/src/microsoft/experimental/Security/CWE-190/ArithmeticTainted.md new file mode 100644 index 00000000..dc2c70cf --- /dev/null +++ b/src/microsoft/experimental/Security/CWE-190/ArithmeticTainted.md @@ -0,0 +1,42 @@ +# User-controlled data in arithmetic expression +Performing calculations on user-controlled data can result in integer overflows unless the input is validated. + +If the user is free to enter very large numbers, even arithmetic operations that would usually result in a small change in magnitude may result in overflows. + + +## Recommendation +Always guard against overflow in arithmetic operations on user-controlled data by doing one of the following: + +* Validate the user input. +* Define a guard on the arithmetic expression, so that the operation is performed only if the result can be known to be less than, or equal to, the maximum value for the type, for example `INT_MAX`. +* Use a wider type, so that larger input values do not cause overflow. + +## Example +In this example, a value is read from standard input into an `int`. Because the value is a user-controlled value, it could be extremely large. Performing arithmetic operations on this value could therefore cause an overflow. To avoid this happening, the example shows how to perform a check before performing a multiplication. + + +```c +int main(int argc, char** argv) { + char buffer[20]; + fgets(buffer, 20, stdin); + + int num = atoi(buffer); + // BAD: may overflow if input is very large + int scaled = num + 1000; + + // ... + + int num2 = atoi(buffer); + int scaled2; + // GOOD: use a guard to prevent overflow + if (num2 < INT_MAX-1000) + scaled2 = num2 + 1000; + else + scaled2 = INT_MAX; +} + +``` + +## References +* Common Weakness Enumeration: [CWE-190](https://cwe.mitre.org/data/definitions/190.html). +* Common Weakness Enumeration: [CWE-191](https://cwe.mitre.org/data/definitions/191.html). diff --git a/src/microsoft/experimental/Security/CWE-190/ArithmeticTainted.qhelp b/src/microsoft/experimental/Security/CWE-190/ArithmeticTainted.qhelp new file mode 100644 index 00000000..b09c84f7 --- /dev/null +++ b/src/microsoft/experimental/Security/CWE-190/ArithmeticTainted.qhelp @@ -0,0 +1,44 @@ + + + +

    Performing calculations on user-controlled data can result in integer overflows +unless the input is validated.

    + +

    If the user is free to enter very large numbers, even arithmetic operations that would usually +result in a small change in magnitude may result in overflows.

    + +
    + + +

    Always guard against overflow in arithmetic operations on user-controlled data by doing one of the +following:

    + +
      +
    • Validate the user input.
    • +
    • Define a guard on the arithmetic expression, so that the operation is performed only if the +result can be known to be less than, or equal to, the maximum value for the type, for example INT_MAX.
    • +
    • Use a wider type, so that larger input values do not cause overflow.
    • +
    + +
    + + +

    In this example, a value is read from standard input into an int. Because the value +is a user-controlled value, it could be extremely large. Performing arithmetic operations on this +value could therefore cause an overflow. To avoid this happening, the example shows how to perform +a check before performing a multiplication.

    + + + +
    + + + + + + + +
    diff --git a/src/microsoft/experimental/Security/CWE-190/ArithmeticTainted.ql b/src/microsoft/experimental/Security/CWE-190/ArithmeticTainted.ql new file mode 100644 index 00000000..828005d2 --- /dev/null +++ b/src/microsoft/experimental/Security/CWE-190/ArithmeticTainted.ql @@ -0,0 +1,131 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +/** + * @name User-controlled data in arithmetic expression + * @description Arithmetic operations on user-controlled data that is + * not validated can cause overflows. + * @kind path-problem + * @problem.severity warning + * @security-severity 8.6 + * @precision low + * @id cpp/microsoft/experimental/security/cwe-190/tainted-arithmetic + * @tags security + * external/cwe/cwe-190 + * external/cwe/cwe-191 + */ + +import cpp +import semmle.code.cpp.security.Overflow +import semmle.code.cpp.dataflow.new.TaintTracking +import semmle.code.cpp.dataflow.new.DataFlow +import semmle.code.cpp.ir.IR +import semmle.code.cpp.controlflow.IRGuards as IRGuards +import semmle.code.cpp.security.FlowSources as FS +import Bounded +import Flow::PathGraph + +bindingset[op] +predicate missingGuard(Operation op, Expr e, string effect) { + missingGuardAgainstUnderflow(op, e) and effect = "underflow" + or + missingGuardAgainstOverflow(op, e) and effect = "overflow" + or + not e instanceof VariableAccess and effect = "overflow" +} + +predicate isSource(FS::FlowSource source, string sourceType) { sourceType = source.getSourceType() } + +predicate isSink(DataFlow::Node sink, Operation op, Expr e) { + e = sink.asExpr() and + missingGuard(op, e, _) and + op.getAnOperand() = e and + ( + op instanceof UnaryArithmeticOperation or + op instanceof BinaryArithmeticOperation or + op instanceof AssignArithmeticOperation + ) +} + +predicate hasUpperBoundsCheck(Variable var) { + exists(RelationalOperation oper, VariableAccess access | + oper.getAnOperand() = access and + access.getTarget() = var and + // Comparing to 0 is not an upper bound check + not oper.getAnOperand().getValue() = "0" + ) +} + +predicate constantInstruction(Instruction instr) { + instr instanceof ConstantInstruction or + constantInstruction(instr.(UnaryInstruction).getUnary()) +} + +predicate readsVariable(LoadInstruction load, Variable var) { + load.getSourceAddress().(VariableAddressInstruction).getAstVariable() = var +} + +predicate nodeIsBarrierEqualityCandidate(DataFlow::Node node, Operand access, Variable checkedVar) { + exists(Instruction instr | instr = node.asInstruction() | + readsVariable(instr, checkedVar) and + any(IRGuards::IRGuardCondition guard).ensuresEq(access, _, _, instr.getBlock(), true) + ) +} + +module Config implements DataFlow::ConfigSig { + predicate isSource(DataFlow::Node source) { isSource(source, _) } + + predicate isSink(DataFlow::Node sink) { isSink(sink, _, _) } + + predicate isBarrier(DataFlow::Node node) { + exists(StoreInstruction store, Expr e | + store = node.asInstruction() and e = node.asCertainDefinition() + | + // Block flow to "likely small expressions" + bounded(e) + or + // Block flow to "small types" + store.getResultType().getUnspecifiedType().(IntegralType).getSize() <= 1 + ) + or + // Block flow if there's an upper bound check of the variable anywhere in the program + exists(Variable checkedVar, Instruction instr | instr = node.asInstruction() | + readsVariable(instr, checkedVar) and + hasUpperBoundsCheck(checkedVar) + ) + or + // Block flow if the node is guarded by an equality check + exists(Variable checkedVar, Operand access | + nodeIsBarrierEqualityCandidate(node, access, checkedVar) and + readsVariable(access.getDef(), checkedVar) + ) + or + // Block flow to any binary instruction whose operands are both non-constants. + exists(BinaryInstruction iTo | + iTo = node.asInstruction() and + not constantInstruction(iTo.getLeft()) and + not constantInstruction(iTo.getRight()) and + // propagate taint from either the pointer or the offset, regardless of constantness + not iTo instanceof PointerArithmeticInstruction + ) + } + + predicate observeDiffInformedIncrementalMode() { any() } + + Location getASelectedSinkLocation(DataFlow::Node sink) { + exists(Expr e | result = e.getLocation() | isSink(sink, _, e)) + } +} + +module Flow = TaintTracking::Global; + +from + Expr e, string effect, Flow::PathNode source, Flow::PathNode sink, Operation op, string sourceType +where + Flow::flowPath(source, sink) and + isSource(source.getNode(), sourceType) and + isSink(sink.getNode(), op, e) and + missingGuard(op, e, effect) +select e, source, sink, + "$@ flows to an operand of an arithmetic expression, potentially causing an " + effect + ".", + source, sourceType diff --git a/src/microsoft/experimental/Security/CWE-190/Bounded.qll b/src/microsoft/experimental/Security/CWE-190/Bounded.qll new file mode 100644 index 00000000..997f2876 --- /dev/null +++ b/src/microsoft/experimental/Security/CWE-190/Bounded.qll @@ -0,0 +1,60 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +/** + * This file provides the `bounded` predicate that is used in `cpp/uncontrolled-arithmetic`, + * `cpp/tainted-arithmetic` and `cpp/uncontrolled-allocation-size`. + */ + +private import cpp +private import semmle.code.cpp.rangeanalysis.SimpleRangeAnalysis +private import semmle.code.cpp.rangeanalysis.RangeAnalysisUtils + +/** + * An operand `operand` of a bitwise and expression `andExpr` (i.e., `andExpr` is either a + * `BitwiseAndExpr` or an `AssignAndExpr`) is upper bounded by some number that is less than the + * maximum integer allowed by the result type of `andExpr`. + */ +pragma[inline] +private predicate boundedBitwiseAnd(Expr operand, Expr andExpr) { + upperBound(operand.getFullyConverted()) < exprMaxVal(andExpr.getFullyConverted()) +} + +/** + * Holds if `e` is an arithmetic expression that cannot overflow, or if `e` is an operation that + * may greatly reduce the range of possible values. + */ +predicate bounded(Expr e) { + // There can be two separate reasons for `convertedExprMightOverflow` not holding: + // 1. `e` really cannot overflow. + // 2. `e` isn't analyzable. + // If we didn't rule out case 2 we would declare anything that isn't analyzable as bounded. + ( + e instanceof UnaryArithmeticOperation or + e instanceof BinaryArithmeticOperation or + e instanceof AssignArithmeticOperation + ) and + not convertedExprMightOverflow(e) + or + // Optimistically assume that the following operations always yields a much smaller value. + e instanceof RemExpr + or + e instanceof DivExpr + or + e instanceof RShiftExpr + or + exists(BitwiseAndExpr andExpr | + e = andExpr and boundedBitwiseAnd(andExpr.getAnOperand(), andExpr) + ) + or + // For the assignment variant of the operations we place the barrier on the assigned lvalue. + e = any(AssignRemExpr rem).getLValue() + or + e = any(AssignDivExpr div).getLValue() + or + e = any(AssignRShiftExpr div).getLValue() + or + exists(AssignAndExpr andExpr | + e = andExpr.getLValue() and boundedBitwiseAnd(andExpr.getRValue(), andExpr) + ) +} diff --git a/src/microsoft/experimental/Security/CWE-190/IntegerOverflowTainted.md b/src/microsoft/experimental/Security/CWE-190/IntegerOverflowTainted.md new file mode 100644 index 00000000..a455f79d --- /dev/null +++ b/src/microsoft/experimental/Security/CWE-190/IntegerOverflowTainted.md @@ -0,0 +1,45 @@ +# Potential integer arithmetic overflow +Performing calculations on user-controlled data can result in integer overflows unless the input is validated. + +Integer overflow occurs when the result of an arithmetic expression is too large to be represented by the (integer) output type of the expression. For example, if the result of the expression is 200, but the output type is a signed 8-bit integer, then overflow occurs because the largest value that can be represented is 127. The behavior of overflow is implementation defined, but the most common implementation is two's complement arithmetic, in which case the result is -56. Overflow can cause unexpected results, particularly when a large value overflows and the result is negative. It can also pose a security risk if the value of the expression is controllable by user, because it could enable an attacker to deliberately cause an overflow. + +Negative integer overflow is another form of integer overflow, in which a negative result cannot be represented in the output type. + + +## Recommendation +Always guard against overflow in arithmetic operations on user-controlled data by doing one of the following: + +* Validate the user input. +* Define a guard on the arithmetic expression, so that the operation is performed only if the result can be known to be less than, or equal to, the maximum value for the type, for example `INT_MAX`. +* Use a wider type, so that larger input values do not cause overflow. + +## Example +In this example, a value is read from standard input into an `int`. Because the value is a user-controlled value, it could be extremely large. Performing arithmetic operations on this value could therefore cause an overflow. To avoid this happening, the example shows how to perform a check before performing a multiplication. + + +```c +int main(int argc, char** argv) { + char buffer[20]; + fgets(buffer, 20, stdin); + + int num = atoi(buffer); + // BAD: may overflow if input is very large + int scaled = num + 1000; + + // ... + + int num2 = atoi(buffer); + int scaled2; + // GOOD: use a guard to prevent overflow + if (num2 < INT_MAX-1000) + scaled2 = num2 + 1000; + else + scaled2 = INT_MAX; +} + +``` + +## References +* Common Weakness Enumeration: [CWE-190](https://cwe.mitre.org/data/definitions/190.html). +* Common Weakness Enumeration: [CWE-197](https://cwe.mitre.org/data/definitions/197.html). +* Common Weakness Enumeration: [CWE-681](https://cwe.mitre.org/data/definitions/681.html). diff --git a/src/microsoft/experimental/Security/CWE-190/IntegerOverflowTainted.qhelp b/src/microsoft/experimental/Security/CWE-190/IntegerOverflowTainted.qhelp new file mode 100644 index 00000000..01bb78be --- /dev/null +++ b/src/microsoft/experimental/Security/CWE-190/IntegerOverflowTainted.qhelp @@ -0,0 +1,55 @@ + + + +

    Performing calculations on user-controlled data can result in +integer overflows unless the input is validated.

    + +

    Integer overflow occurs when the result of an arithmetic expression +is too large to be represented by the (integer) output type of the +expression. For example, if the result of the expression is 200, but +the output type is a signed 8-bit integer, then overflow occurs +because the largest value that can be represented is 127. The behavior +of overflow is implementation defined, but the most common +implementation is two's complement arithmetic, in which case the +result is -56. Overflow can cause unexpected results, particularly +when a large value overflows and the result is negative. It can also +pose a security risk if the value of the expression is controllable by +user, because it could enable an attacker to deliberately cause an +overflow.

    + +

    Negative integer overflow is another form of integer overflow, +in which a negative result cannot be represented in the output type.

    +
    + + + +

    Always guard against overflow in arithmetic operations on +user-controlled data by doing one of the following:

    + +
      +
    • Validate the user input.
    • +
    • Define a guard on the arithmetic expression, so that the operation +is performed only if the result can be known to be less than, or equal +to, the maximum value for the type, for +example INT_MAX.
    • +
    • Use a wider type, so that larger input values do not cause +overflow.
    • +
    + +
    + + +

    In this example, a value is read from standard input into an int. Because the value +is a user-controlled value, it could be extremely large. Performing arithmetic operations on this +value could therefore cause an overflow. To avoid this happening, the example shows how to perform +a check before performing a multiplication.

    + + + +
    + + + +
    diff --git a/src/microsoft/experimental/Security/CWE-190/IntegerOverflowTainted.ql b/src/microsoft/experimental/Security/CWE-190/IntegerOverflowTainted.ql new file mode 100644 index 00000000..488a4398 --- /dev/null +++ b/src/microsoft/experimental/Security/CWE-190/IntegerOverflowTainted.ql @@ -0,0 +1,111 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +/** + * @name Potential integer arithmetic overflow + * @description A user-controlled integer arithmetic expression + * that is not validated can cause overflows. + * @kind problem + * @id cpp/microsoft/experimental/security/cwe-190/integer-overflow-tainted + * @problem.severity warning + * @security-severity 8.1 + * @precision low + * @tags security + * external/cwe/cwe-190 + * external/cwe/cwe-197 + * external/cwe/cwe-681 + */ + +import cpp +import semmle.code.cpp.rangeanalysis.SimpleRangeAnalysis +import semmle.code.cpp.dataflow.new.DataFlow +import semmle.code.cpp.security.FlowSources as FS +import semmle.code.cpp.dataflow.new.TaintTracking +import semmle.code.cpp.ir.IR +import semmle.code.cpp.controlflow.IRGuards as IRGuards + +/** Holds if `expr` might overflow. */ +predicate outOfBoundsExpr(Expr expr, string kind) { + if convertedExprMightOverflowPositively(expr) + then kind = "overflow" + else ( + convertedExprMightOverflowNegatively(expr) and + kind = "overflow negatively" + ) +} + +predicate isSource(FS::FlowSource source, string sourceType) { sourceType = source.getSourceType() } + +predicate isSink(DataFlow::Node sink, string kind) { + exists(Expr use | + not use.getUnspecifiedType() instanceof PointerType and + outOfBoundsExpr(use, kind) and + not inSystemMacroExpansion(use) and + use = sink.asExpr() + ) +} + +predicate hasUpperBoundsCheck(Variable var) { + exists(RelationalOperation oper, VariableAccess access | + oper.getAnOperand() = access and + access.getTarget() = var and + // Comparing to 0 is not an upper bound check + not oper.getAnOperand().getValue() = "0" + ) +} + +predicate constantInstruction(Instruction instr) { + instr instanceof ConstantInstruction or + constantInstruction(instr.(UnaryInstruction).getUnary()) +} + +predicate readsVariable(LoadInstruction load, Variable var) { + load.getSourceAddress().(VariableAddressInstruction).getAstVariable() = var +} + +predicate nodeIsBarrierEqualityCandidate(DataFlow::Node node, Operand access, Variable checkedVar) { + exists(Instruction instr | instr = node.asInstruction() | + readsVariable(instr, checkedVar) and + any(IRGuards::IRGuardCondition guard).ensuresEq(access, _, _, instr.getBlock(), true) + ) +} + +module Config implements DataFlow::ConfigSig { + predicate isSource(DataFlow::Node source) { isSource(source, _) } + + predicate isSink(DataFlow::Node sink) { isSink(sink, _) } + + predicate isBarrier(DataFlow::Node node) { + // Block flow if there's an upper bound check of the variable anywhere in the program + exists(Variable checkedVar, Instruction instr | instr = node.asInstruction() | + readsVariable(instr, checkedVar) and + hasUpperBoundsCheck(checkedVar) + ) + or + // Block flow if the node is guarded by an equality check + exists(Variable checkedVar, Operand access | + nodeIsBarrierEqualityCandidate(node, access, checkedVar) and + readsVariable(access.getDef(), checkedVar) + ) + or + // Block flow to any binary instruction whose operands are both non-constants. + exists(BinaryInstruction iTo | + iTo = node.asInstruction() and + not constantInstruction(iTo.getLeft()) and + not constantInstruction(iTo.getRight()) and + // propagate taint from either the pointer or the offset, regardless of constantness + not iTo instanceof PointerArithmeticInstruction + ) + } + + predicate observeDiffInformedIncrementalMode() { any() } +} + +module Flow = TaintTracking::Global; + +from DataFlow::Node source, DataFlow::Node sink, string kind, string sourceType +where + Flow::flow(source, sink) and + isSource(source, sourceType) and + isSink(sink, kind) +select sink, "$@ flows an expression which might " + kind + ".", source, sourceType diff --git a/src/microsoft/experimental/Security/CWE-190/TaintedAllocationSize.c b/src/microsoft/experimental/Security/CWE-190/TaintedAllocationSize.c new file mode 100644 index 00000000..60a1db04 --- /dev/null +++ b/src/microsoft/experimental/Security/CWE-190/TaintedAllocationSize.c @@ -0,0 +1,9 @@ +int factor = atoi(getenv("BRANCHING_FACTOR")); + +// BAD: This can allocate too little memory if factor is very large due to overflow. +char **root_node = (char **) malloc(factor * sizeof(char *)); + +// GOOD: Prevent overflow and unbounded allocation size by checking the input. +if (factor > 0 && factor <= 1000) { + char **root_node = (char **) malloc(factor * sizeof(char *)); +} diff --git a/src/microsoft/experimental/Security/CWE-190/TaintedAllocationSize.md b/src/microsoft/experimental/Security/CWE-190/TaintedAllocationSize.md new file mode 100644 index 00000000..45b6484f --- /dev/null +++ b/src/microsoft/experimental/Security/CWE-190/TaintedAllocationSize.md @@ -0,0 +1,31 @@ +# Uncontrolled allocation size +This code allocates memory using a size value based on user input, with no apparent bound on its magnitude being established. This allows for arbitrary amounts of memory to be allocated. + +If the allocation size is calculated by multiplying user input by a `sizeof` expression, the multiplication can overflow. When an integer multiplication overflows in C, the result wraps around and can be much smaller than intended. A later attempt to write data into the allocated memory can then be out of bounds. + + +## Recommendation +Guard all integer parameters that come from an external user. Implement a guard with the expected range for the parameter and make sure that the input value meets both the minimum and maximum requirements for this range. If the input value fails this guard then reject the request before proceeding further. If the input value passes the guard then subsequent calculations should not overflow. + + +## Example + +```c +int factor = atoi(getenv("BRANCHING_FACTOR")); + +// BAD: This can allocate too little memory if factor is very large due to overflow. +char **root_node = (char **) malloc(factor * sizeof(char *)); + +// GOOD: Prevent overflow and unbounded allocation size by checking the input. +if (factor > 0 && factor <= 1000) { + char **root_node = (char **) malloc(factor * sizeof(char *)); +} + +``` +This code shows one way to guard that an input value is within the expected range. If `factor` fails the guard, then an error is returned, and the value is not used as an argument to the subsequent call to `malloc`. Without this guard, the allocated buffer might be too small to hold the data intended for it. + + +## References +* The CERT Oracle Secure Coding Standard for C: [INT04-C. Enforce limits on integer values originating from tainted sources](https://www.securecoding.cert.org/confluence/display/c/INT04-C.+Enforce+limits+on+integer+values+originating+from+tainted+sources). +* Common Weakness Enumeration: [CWE-190](https://cwe.mitre.org/data/definitions/190.html). +* Common Weakness Enumeration: [CWE-789](https://cwe.mitre.org/data/definitions/789.html). diff --git a/src/microsoft/experimental/Security/CWE-190/TaintedAllocationSize.qhelp b/src/microsoft/experimental/Security/CWE-190/TaintedAllocationSize.qhelp new file mode 100644 index 00000000..153a089b --- /dev/null +++ b/src/microsoft/experimental/Security/CWE-190/TaintedAllocationSize.qhelp @@ -0,0 +1,50 @@ + + + + +

    This code allocates memory using a size value based on user input, +with no apparent bound on its magnitude being established. This allows +for arbitrary amounts of memory to be allocated.

    + +

    If the allocation size is calculated by multiplying user input by a +sizeof expression, the multiplication can overflow. When +an integer multiplication overflows in C, the result wraps around and +can be much smaller than intended. A later attempt to write data into +the allocated memory can then be out of bounds.

    + +
    + + +

    Guard all integer parameters that come from an external +user. Implement a guard with the expected range for the parameter and +make sure that the input value meets both the minimum and maximum +requirements for this range. If the input value fails this guard then +reject the request before proceeding further. If the input value +passes the guard then subsequent calculations should not overflow.

    + + +
    + + + +

    This code shows one way to guard that an input value is within the +expected range. If factor fails the guard, then an error +is returned, and the value is not used as an argument to the +subsequent call to malloc. Without this guard, the +allocated buffer might be too small to hold the data intended for it.

    + +
    + + +
  • The CERT Oracle Secure Coding Standard for C: + INT04-C. Enforce + limits on integer values originating from tainted sources.
  • + + + + +
    +
    diff --git a/src/microsoft/experimental/Security/CWE-190/TaintedAllocationSize.ql b/src/microsoft/experimental/Security/CWE-190/TaintedAllocationSize.ql new file mode 100644 index 00000000..df9c1c1e --- /dev/null +++ b/src/microsoft/experimental/Security/CWE-190/TaintedAllocationSize.ql @@ -0,0 +1,116 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +/** + * @name Uncontrolled allocation size + * @description Allocating memory with a size controlled by an external user can result in + * arbitrary amounts of memory being allocated. + * @kind path-problem + * @problem.severity error + * @security-severity 8.1 + * @precision medium + * @id cpp/microsoft/experimental/security/cwe-190/uncontrolled-allocation-size + * @tags reliability + * security + * external/cwe/cwe-190 + * external/cwe/cwe-789 + */ + +import cpp +import semmle.code.cpp.rangeanalysis.SimpleRangeAnalysis +import semmle.code.cpp.ir.dataflow.TaintTracking +import semmle.code.cpp.ir.IR +import semmle.code.cpp.controlflow.IRGuards +import semmle.code.cpp.security.FlowSources +import TaintedAllocationSize::PathGraph +import Bounded + +/** + * Holds if `alloc` is an allocation, and `tainted` is a child of it that is a + * taint sink. + */ +predicate allocSink(HeuristicAllocationExpr alloc, DataFlow::Node sink) { + exists(Expr e | e = sink.asExpr() | + e = alloc.getAChild() and + e.getUnspecifiedType() instanceof IntegralType + ) +} + +predicate readsVariable(LoadInstruction load, Variable var, IRBlock bb) { + load.getSourceAddress().(VariableAddressInstruction).getAstVariable() = var and + bb = load.getBlock() +} + +predicate hasUpperBoundsCheck(Variable var) { + exists(RelationalOperation oper, VariableAccess access | + oper.getAnOperand() = access and + access.getTarget() = var and + // Comparing to 0 is not an upper bound check + not oper.getAnOperand().getValue() = "0" + ) +} + +predicate variableEqualityCheckedInBlock(Variable checkedVar, IRBlock bb) { + exists(Operand access | + readsVariable(access.getDef(), checkedVar, _) and + any(IRGuardCondition guard).ensuresEq(access, _, _, bb, true) + ) +} + +predicate nodeIsBarrierEquality(DataFlow::Node node) { + exists(Variable checkedVar, Instruction instr, IRBlock bb | + instr = node.asOperand().getDef() and + readsVariable(instr, checkedVar, bb) and + variableEqualityCheckedInBlock(checkedVar, bb) + ) +} + +predicate isFlowSource(FlowSource source, string sourceType) { sourceType = source.getSourceType() } + +module TaintedAllocationSizeConfig implements DataFlow::ConfigSig { + predicate isSource(DataFlow::Node source) { isFlowSource(source, _) } + + predicate isSink(DataFlow::Node sink) { allocSink(_, sink) } + + predicate isBarrier(DataFlow::Node node) { + exists(Expr e | e = node.asExpr() | + bounded(e) + or + // Subtracting two pointers is either well-defined (and the result will likely be small), or + // terribly undefined and dangerous. Here, we assume that the programmer has ensured that the + // result is well-defined (i.e., the two pointers point to the same object), and thus the result + // will likely be small. + e = any(PointerDiffExpr diff).getAnOperand() + ) + or + exists(Variable checkedVar, Instruction instr | instr = node.asOperand().getDef() | + readsVariable(instr, checkedVar, _) and + hasUpperBoundsCheck(checkedVar) + ) + or + nodeIsBarrierEquality(node) + or + // block flow to inside of identified allocation functions (this flow leads + // to duplicate results) + any(HeuristicAllocationFunction f).getAParameter() = node.asParameter() + } + + predicate observeDiffInformedIncrementalMode() { any() } + + Location getASelectedSinkLocation(DataFlow::Node sink) { + exists(Expr alloc | result = [alloc.getLocation(), sink.getLocation()] | allocSink(alloc, sink)) + } +} + +module TaintedAllocationSize = TaintTracking::Global; + +from + Expr alloc, TaintedAllocationSize::PathNode source, TaintedAllocationSize::PathNode sink, + string taintCause +where + isFlowSource(source.getNode(), taintCause) and + TaintedAllocationSize::flowPath(source, sink) and + allocSink(alloc, sink.getNode()) +select alloc, source, sink, + "This allocation size is derived from $@ and could allocate arbitrary amounts of memory.", + source.getNode(), "user input (" + taintCause + ")" diff --git a/src/microsoft/experimental/Security/CWE-468/IncorrectPointerScalingChar.cpp b/src/microsoft/experimental/Security/CWE-468/IncorrectPointerScalingChar.cpp new file mode 100644 index 00000000..b7e82e02 --- /dev/null +++ b/src/microsoft/experimental/Security/CWE-468/IncorrectPointerScalingChar.cpp @@ -0,0 +1,14 @@ +char example1(int i) { + int intArray[5] = { 1, 2, 3, 4, 5 }; + char *charPointer = (char *)intArray; + // BAD: the pointer arithmetic uses type char*, so the offset + // is not scaled by sizeof(int). + return *(charPointer + i); +} + +int example2(int i) { + int intArray[10] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; + int *intPointer = intArray; + // GOOD: the offset is automatically scaled by sizeof(int). + return *(intPointer + i); +} diff --git a/src/microsoft/experimental/Security/CWE-468/IncorrectPointerScalingChar.md b/src/microsoft/experimental/Security/CWE-468/IncorrectPointerScalingChar.md new file mode 100644 index 00000000..7c3779db --- /dev/null +++ b/src/microsoft/experimental/Security/CWE-468/IncorrectPointerScalingChar.md @@ -0,0 +1,32 @@ +# Suspicious pointer scaling to char +Casting arbitrary pointers into `char*` and then accessing their contents should be done with care. The results may not be portable. + +This query finds pointer arithmetic expressions where a pointer to `char` (or similar) is dereferenced even though the underlying value is of a type larger than `char`. + + +## Recommendation +1. Whenever possible, use the array subscript operator rather than pointer arithmetic. For example, replace `*(p+k)` with `p[k]`. +1. Cast to the correct type before using pointer arithmetic. For example, if the type of `p` is `char*` but it really points to an array of type `double[]` then use the syntax `(double*)p + k` to get a pointer to the `k`'th element of the array. + +## Example + +```cpp +char example1(int i) { + int intArray[5] = { 1, 2, 3, 4, 5 }; + char *charPointer = (char *)intArray; + // BAD: the pointer arithmetic uses type char*, so the offset + // is not scaled by sizeof(int). + return *(charPointer + i); +} + +int example2(int i) { + int intArray[10] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; + int *intPointer = intArray; + // GOOD: the offset is automatically scaled by sizeof(int). + return *(intPointer + i); +} + +``` + +## References +* Common Weakness Enumeration: [CWE-468](https://cwe.mitre.org/data/definitions/468.html). diff --git a/src/microsoft/experimental/Security/CWE-468/IncorrectPointerScalingChar.qhelp b/src/microsoft/experimental/Security/CWE-468/IncorrectPointerScalingChar.qhelp new file mode 100644 index 00000000..b81174b8 --- /dev/null +++ b/src/microsoft/experimental/Security/CWE-468/IncorrectPointerScalingChar.qhelp @@ -0,0 +1,38 @@ + + + +

    Casting arbitrary pointers into char* and then accessing their +contents should be done with care. The results may not be portable.

    + +

    +This query finds pointer arithmetic expressions where a pointer to +char (or similar) is dereferenced even though the underlying value is +of a type larger than char. +

    + +
    + + +
      +
    1. Whenever possible, use the array subscript operator rather than +pointer arithmetic. For example, replace *(p+k) +with p[k].
    2. +
    3. Cast to the correct type before using pointer arithmetic. For +example, if the type of p is char* but it really +points to an array of type double[] then use the syntax +(double*)p + k to get a pointer to the k'th element +of the array.
    4. +
    + +
    + + + + + + + + +
    diff --git a/src/microsoft/experimental/Security/CWE-468/IncorrectPointerScalingChar.ql b/src/microsoft/experimental/Security/CWE-468/IncorrectPointerScalingChar.ql new file mode 100644 index 00000000..431b0270 --- /dev/null +++ b/src/microsoft/experimental/Security/CWE-468/IncorrectPointerScalingChar.ql @@ -0,0 +1,58 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +/** + * @name Suspicious pointer scaling to char + * @description Implicit scaling of pointer arithmetic expressions + * can cause buffer overflow conditions. + * @kind problem + * @id cpp/microsoft/experimental/security/cwe-468/incorrect-pointer-scaling-char + * @problem.severity warning + * @security-severity 8.8 + * @precision low + * @tags security + * external/cwe/cwe-468 + */ + +import IncorrectPointerScalingCommon + +from Expr dest, Type destType, Type sourceType, Type sourceBase, Type destBase, Location sourceLoc +where + exists(pointerArithmeticParent(dest)) and + exprSourceType(dest, sourceType, sourceLoc) and + sourceBase = baseType(sourceType) and + destType = dest.getFullyConverted().getType() and + destBase = baseType(destType) and + destBase.getSize() != sourceBase.getSize() and + not dest.isInMacroExpansion() and + // If the source type is a `char*` or `void*` then don't + // produce a result, because it is likely to be a false + // positive. + not sourceBase instanceof CharType and + not sourceBase instanceof VoidType and + // Don't produce an alert if the dest type is `char *` but the + // expression contains a `sizeof`, which is probably correct. For + // example: + // ``` + // int x[3] = {1,2,3}; + // char* p = (char*)x; + // return *(int*)(p + (2 * sizeof(int))) + // ``` + not ( + destBase instanceof CharType and + dest.getParent().(Expr).getAChild*() instanceof SizeofOperator + ) and + // Don't produce an alert if the root expression computes + // an offset, rather than a pointer. For example: + // ``` + // (p + 1) - q + // ``` + forall(Expr parent | parent = pointerArithmeticParent+(dest) | + parent.getFullyConverted().getUnspecifiedType() instanceof PointerType + ) and + // Only produce alerts that are not produced by `IncorrectPointerScaling.ql`. + destBase instanceof CharType +select dest, + "This pointer might have type $@ (size " + sourceBase.getSize() + + "), but this pointer arithmetic is done with type " + destType + " (size " + destBase.getSize() + + ").", sourceLoc, sourceBase.toString() diff --git a/src/microsoft/experimental/Security/CWE-468/IncorrectPointerScalingCommon.qll b/src/microsoft/experimental/Security/CWE-468/IncorrectPointerScalingCommon.qll new file mode 100644 index 00000000..c257a612 --- /dev/null +++ b/src/microsoft/experimental/Security/CWE-468/IncorrectPointerScalingCommon.qll @@ -0,0 +1,166 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +/** + * Shared utilities for the CWE-468 queries. + */ + +import cpp + +/** + * Gets the type parameter of `sizeof` expression `e`. + */ +private Type sizeofParam(Expr e) { + result = e.(SizeofExprOperator).getExprOperand().getFullyConverted().getType() + or + result = e.(SizeofTypeOperator).getTypeOperand() +} + +/** + * Holds if `e` is `sizeof` expression `sizeofExpr`, possibly multiplied + * by another expression, and `sizeofParam` is `sizeofExpr`'s type + * parameter. + * + * For example, if `e` is `4 * sizeof(T)` then `sizeofExpr` is + * `sizeof(T)` and `sizeofParam` is `T`. + */ +private predicate multiplyWithSizeof(Expr e, Expr sizeofExpr, Type sizeofParam) { + e = sizeofExpr and sizeofParam = sizeofParam(e).getUnspecifiedType() + or + multiplyWithSizeof(e.(MulExpr).getAnOperand(), sizeofExpr, sizeofParam) +} + +/** + * Holds if the pointer `e` is added to the `sizeof` expression + * `sizeofExpr` (which may first be multiplied by another expression), + * and `sizeofParam` is `sizeofExpr`'s type parameter. + * + * For example, if the program contains the expression + * `p - (i * sizeof(T))` then `e` would be `p`, `sizeofExpr` would be + * `sizeof(T)`, and `sizeofParam` would be `T`. + */ +predicate addWithSizeof(Expr e, Expr sizeofExpr, Type sizeofParam) { + exists(PointerAddExpr addExpr | + e = addExpr.getLeftOperand() and + multiplyWithSizeof(addExpr.getRightOperand(), sizeofExpr, sizeofParam) + ) + or + exists(PointerSubExpr subExpr | + e = subExpr.getLeftOperand() and + multiplyWithSizeof(subExpr.getRightOperand(), sizeofExpr, sizeofParam) + ) +} + +/** + * Holds if `t` is a pointer or array type. + */ +predicate isPointerType(Type t) { + t instanceof PointerType or + t instanceof ArrayType +} + +/** + * Gets the base type of a pointer or array type. In the case of an array of + * arrays, the inner base type is returned. + */ +Type baseType(Type t) { + ( + exists(PointerType dt | + dt = t.getUnspecifiedType() and + result = dt.getBaseType().getUnspecifiedType() + ) + or + exists(ArrayType at | + at = t.getUnspecifiedType() and + not at.getBaseType().getUnspecifiedType() instanceof ArrayType and + result = at.getBaseType().getUnspecifiedType() + ) + or + exists(ArrayType at, ArrayType at2 | + at = t.getUnspecifiedType() and + at2 = at.getBaseType().getUnspecifiedType() and + result = baseType(at2) + ) + ) and + // Make sure that the type has a size and that it isn't ambiguous. + strictcount(result.getSize()) = 1 +} + +/** + * Holds if there is a pointer expression with type `sourceType` at + * location `sourceLoc` which might be the source expression for `use`. + * + * For example, with + * ``` + * int intArray[5] = { 1, 2, 3, 4, 5 }; + * char *charPointer = (char *)intArray; + * return *(charPointer + i); + * ``` + * the array initializer on the first line is a source expression + * for the use of `charPointer` on the third line. + * + * The source will either be an `Expr` or a `Parameter`. + */ +predicate exprSourceType(Expr use, Type sourceType, Location sourceLoc) { + // Reaching definitions. + if exists(SsaDefinition def | use = def.getAUse(_)) + then + exists(SsaDefinition def, StackVariable v | use = def.getAUse(v) | + defSourceType(def, v, sourceType, sourceLoc) + ) + else + // Pointer arithmetic + if use instanceof PointerAddExpr + then exprSourceType(use.(PointerAddExpr).getLeftOperand(), sourceType, sourceLoc) + else + if use instanceof PointerSubExpr + then exprSourceType(use.(PointerSubExpr).getLeftOperand(), sourceType, sourceLoc) + else + if use instanceof AddExpr + then exprSourceType(use.(AddExpr).getAnOperand(), sourceType, sourceLoc) + else + if use instanceof SubExpr + then exprSourceType(use.(SubExpr).getAnOperand(), sourceType, sourceLoc) + else + if use instanceof CrementOperation + then exprSourceType(use.(CrementOperation).getOperand(), sourceType, sourceLoc) + else ( + // Conversions are not in the AST, so ignore them. + not use instanceof Conversion and + // Source expressions + sourceType = use.getUnspecifiedType() and + isPointerType(sourceType) and + sourceLoc = use.getLocation() + ) +} + +/** + * Holds if there is a pointer expression with type `sourceType` at + * location `sourceLoc` which might define the value of `v` at `def`. + */ +predicate defSourceType(SsaDefinition def, StackVariable v, Type sourceType, Location sourceLoc) { + exprSourceType(def.getDefiningValue(v), sourceType, sourceLoc) + or + defSourceType(def.getAPhiInput(v), v, sourceType, sourceLoc) + or + exists(Parameter p | + p = v and + def.definedByParameter(p) and + sourceType = p.getUnspecifiedType() and + strictcount(p.getType()) = 1 and + isPointerType(sourceType) and + sourceLoc = p.getLocation() + ) +} + +/** + * Gets the pointer arithmetic expression that `e` is (directly) used + * in, if any. + * + * For example, in `(char*)(p + 1)`, for `p`, ths result is `p + 1`. + */ +Expr pointerArithmeticParent(Expr e) { + e = result.(PointerAddExpr).getLeftOperand() or + e = result.(PointerSubExpr).getLeftOperand() or + e = result.(PointerDiffExpr).getAnOperand() +} diff --git a/src/microsoft/experimental/Security/CWE-807/TaintedCondition.c b/src/microsoft/experimental/Security/CWE-807/TaintedCondition.c new file mode 100644 index 00000000..8664d491 --- /dev/null +++ b/src/microsoft/experimental/Security/CWE-807/TaintedCondition.c @@ -0,0 +1,10 @@ +struct hostent *hp;struct in_addr myaddr; +char* tHost = "trustme.example.com"; +myaddr.s_addr=inet_addr(ip_addr_string); + +hp = gethostbyaddr((char *) &myaddr, sizeof(struct in_addr), AF_INET); +if (hp && !strncmp(hp->h_name, tHost, sizeof(tHost))) { + trusted = true; +} else { + trusted = false; +} diff --git a/src/microsoft/experimental/Security/CWE-807/TaintedCondition.md b/src/microsoft/experimental/Security/CWE-807/TaintedCondition.md new file mode 100644 index 00000000..a95d88fb --- /dev/null +++ b/src/microsoft/experimental/Security/CWE-807/TaintedCondition.md @@ -0,0 +1,34 @@ +# Untrusted input for a condition +This rule finds code where untrusted inputs are used in an `if` statement, and the body of that statement makes a security decision. This is an example of CWE-807 and makes the program vulnerable to attack. An attacker might be able to gain unauthorized access to the system by manipulating external inputs to the system. + + +## Recommendation +In most cases, you need to add or strengthen the checks made on the user-supplied data to ensure its integrity. The user-supplied data can then be used as a trusted input to the security decision. For example, instead of checking an HTTP cookie against a predictable fixed string, check a cookie against a randomly generated session key. + +This rule may highlight a few conditions where user-supplied data has been checked and can be trusted. It is not always possible to determine if the checks applied to data are enough to ensure security. + + +## Example +The following example is included in CWE 807. + + +```c +struct hostent *hp;struct in_addr myaddr; +char* tHost = "trustme.example.com"; +myaddr.s_addr=inet_addr(ip_addr_string); + +hp = gethostbyaddr((char *) &myaddr, sizeof(struct in_addr), AF_INET); +if (hp && !strncmp(hp->h_name, tHost, sizeof(tHost))) { + trusted = true; +} else { + trusted = false; +} + +``` +In this example, the result of a reverse DNS query is compared against a fixed string. An attacker can return an incorrect reverse DNS entry for the requesting IP and thus gain the same access as a legitimate user from `trustme.example.com`. + +To fix the problem in this example, you need to add an additional mechanism to test the user-supplied data. For example, numeric IP addresses could be used. + + +## References +* Common Weakness Enumeration: [CWE-807](https://cwe.mitre.org/data/definitions/807.html). diff --git a/src/microsoft/experimental/Security/CWE-807/TaintedCondition.qhelp b/src/microsoft/experimental/Security/CWE-807/TaintedCondition.qhelp new file mode 100644 index 00000000..80e51ec0 --- /dev/null +++ b/src/microsoft/experimental/Security/CWE-807/TaintedCondition.qhelp @@ -0,0 +1,47 @@ + + + +

    This rule finds code where untrusted inputs are used in +an if statement, and the body of that statement makes a +security decision. This is an example of CWE-807 and makes the program +vulnerable to attack. An attacker might be able to gain unauthorized +access to the system by manipulating external inputs to the system.

    + +
    + +

    In most cases, you need to add or strengthen the checks made on the +user-supplied data to ensure its integrity. The user-supplied data can +then be used as a trusted input to the security decision. For example, +instead of checking an HTTP cookie against a predictable fixed string, +check a cookie against a randomly generated session key.

    + +

    This rule may highlight a few conditions where user-supplied +data has been checked and can be trusted. It is not always possible +to determine if the checks applied to data are enough to ensure security.

    + +
    + +

    The following example is included in CWE 807.

    + + +

    In this example, the result of a reverse DNS query is compared +against a fixed string. An attacker can return an incorrect reverse +DNS entry for the requesting IP and thus gain the same access as a +legitimate user from trustme.example.com.

    + +

    To fix the problem in this example, you need to add an additional +mechanism to test the user-supplied data. For example, +numeric IP addresses could be used.

    + + +
    + + + + + + +
    diff --git a/src/microsoft/experimental/Security/CWE-807/TaintedCondition.ql b/src/microsoft/experimental/Security/CWE-807/TaintedCondition.ql new file mode 100644 index 00000000..4184c970 --- /dev/null +++ b/src/microsoft/experimental/Security/CWE-807/TaintedCondition.ql @@ -0,0 +1,101 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +/** + * @name Untrusted input for a condition + * @description Using untrusted inputs in a statement that makes a + * security decision makes code vulnerable to + * attack. + * @kind path-problem + * @problem.severity warning + * @security-severity 7.5 + * @precision medium + * @id cpp/microsoft/experimental/security/cwe-807/tainted-permissions-check + * @tags security + * external/cwe/cwe-807 + */ + +import cpp +import semmle.code.cpp.security.Security +import semmle.code.cpp.security.FlowSources +import semmle.code.cpp.ir.dataflow.TaintTracking +import semmle.code.cpp.ir.IR +import Flow::PathGraph + +predicate sensitiveCondition(Expr condition, Expr raise) { + raisesPrivilege(raise) and + exists(IfStmt ifstmt | + ifstmt.getCondition() = condition and + raise.getEnclosingStmt().getParentStmt*() = ifstmt + ) +} + +private predicate constantInstruction(Instruction instr) { + instr instanceof ConstantInstruction + or + instr instanceof StringConstantInstruction + or + constantInstruction(instr.(UnaryInstruction).getUnary()) +} + +predicate isSource(FlowSource source, string sourceType) { sourceType = source.getSourceType() } + +module Config implements DataFlow::ConfigSig { + predicate isSource(DataFlow::Node node) { isSource(node, _) } + + predicate isSink(DataFlow::Node node) { + sensitiveCondition([node.asExpr(), node.asIndirectExpr()], _) + } + + predicate isBarrier(DataFlow::Node node) { + // Block flow into binary instructions if both operands are non-constant + exists(BinaryInstruction iTo | + iTo = node.asInstruction() and + not constantInstruction(iTo.getLeft()) and + not constantInstruction(iTo.getRight()) and + // propagate taint from either the pointer or the offset, regardless of constant-ness + not iTo instanceof PointerArithmeticInstruction + ) + or + // Block flow through calls to pure functions if two or more operands are non-constant + exists(Instruction iFrom1, Instruction iFrom2, CallInstruction iTo | + iTo = node.asInstruction() and + isPureFunction(iTo.getStaticCallTarget().getName()) and + iFrom1 = iTo.getAnArgument() and + iFrom2 = iTo.getAnArgument() and + not constantInstruction(iFrom1) and + not constantInstruction(iFrom2) and + iFrom1 != iFrom2 + ) + } + + predicate observeDiffInformedIncrementalMode() { any() } + + Location getASelectedSinkLocation(DataFlow::Node sink) { + result = sink.getLocation() + or + exists(Expr raise | result = raise.getLocation() | + sensitiveCondition([sink.asExpr(), sink.asIndirectExpr()], raise) + ) + } +} + +module Flow = TaintTracking::Global; + +/* + * Produce an alert if there is an 'if' statement whose condition `condition` + * is influenced by tainted data `source`, and the body contains + * `raise` which escalates privilege. + */ + +from + Expr raise, string sourceType, DataFlow::Node source, DataFlow::Node sink, + Flow::PathNode sourceNode, Flow::PathNode sinkNode +where + source = sourceNode.getNode() and + sink = sinkNode.getNode() and + isSource(source, sourceType) and + sensitiveCondition([sink.asExpr(), sink.asIndirectExpr()], raise) and + Flow::flowPath(sourceNode, sinkNode) +select sink, sourceNode, sinkNode, "Reliance on $@ to raise privilege at $@.", source, sourceType, + raise, raise.toString() diff --git a/src/windows-driver-suites/recommended.qls b/src/windows-driver-suites/recommended.qls index ecadd949..1142a2fc 100644 --- a/src/windows-driver-suites/recommended.qls +++ b/src/windows-driver-suites/recommended.qls @@ -54,14 +54,20 @@ - microsoft/Likely Bugs/Boundary Violations/PaddingByteInformationDisclosure.ql - microsoft/Likely Bugs/Conversion/BadOverflowGuard.ql - microsoft/Likely Bugs/Conversion/InfiniteLoop.ql + - microsoft/Likely Bugs/Format/NonConstantFormat.ql - microsoft/Likely Bugs/Memory Management/ConditionallyUninitializedVariable.ql + - microsoft/Likely Bugs/Memory Management/ImproperNullTermination.ql + - microsoft/Likely Bugs/Memory Management/StrncpyFlippedArgs.ql - microsoft/Likely Bugs/Memory Management/UnprobedDereference.ql + - microsoft/Likely Bugs/Memory Management/UnsafeUseOfStrcat.ql - microsoft/Likely Bugs/Memory Management/UserModeMemoryOutsideTry.ql - microsoft/Likely Bugs/Memory Management/UserModeMemoryReadMultipleTimes.ql - microsoft/Likely Bugs/Memory Management/UseAfterFree/UseAfterFree.ql - microsoft/Likely Bugs/UnguardedNullReturnDereference.ql - microsoft/Likely Bugs/UninitializedPtrField.ql - microsoft/Security/Crytpography/HardcodedIVCNG.ql + - microsoft/Security/CWE/CWE-190/ArithmeticUncontrolled.ql + - microsoft/Security/CWE/CWE-190/ArithmeticWithExtremeValues.ql - queries: . from: microsoft/cpp-queries version: 0.0.5