From 09694c448d65dfd0f3064f3597ff31b1abdadcb5 Mon Sep 17 00:00:00 2001 From: Joe Farebrother Date: Mon, 24 Feb 2025 05:19:32 +0000 Subject: [PATCH 01/13] Rewrite file not closed simple case using dataflow --- .../ql/src/Resources/FileNotAlwaysClosed.ql | 115 +++++++++--------- .../Resources/FileNotAlwaysClosedQuery.qll | 39 ++++++ 2 files changed, 97 insertions(+), 57 deletions(-) create mode 100644 python/ql/src/Resources/FileNotAlwaysClosedQuery.qll diff --git a/python/ql/src/Resources/FileNotAlwaysClosed.ql b/python/ql/src/Resources/FileNotAlwaysClosed.ql index 5b5a869e62a8..f04764995752 100644 --- a/python/ql/src/Resources/FileNotAlwaysClosed.ql +++ b/python/ql/src/Resources/FileNotAlwaysClosed.ql @@ -13,62 +13,63 @@ */ import python -import FileOpen - -/** - * Whether resource is opened and closed in in a matched pair of methods, - * either `__enter__` and `__exit__` or `__init__` and `__del__` - */ -predicate opened_in_enter_closed_in_exit(ControlFlowNode open) { - file_not_closed_at_scope_exit(open) and - exists(FunctionValue entry, FunctionValue exit | - open.getScope() = entry.getScope() and - exists(ClassValue cls | - cls.declaredAttribute("__enter__") = entry and cls.declaredAttribute("__exit__") = exit - or - cls.declaredAttribute("__init__") = entry and cls.declaredAttribute("__del__") = exit - ) and - exists(AttrNode attr_open, AttrNode attrclose | - attr_open.getScope() = entry.getScope() and - attrclose.getScope() = exit.getScope() and - expr_is_open(attr_open.(DefinitionNode).getValue(), open) and - attr_open.getName() = attrclose.getName() and - close_method_call(_, attrclose) - ) - ) -} - -predicate file_not_closed_at_scope_exit(ControlFlowNode open) { - exists(EssaVariable v | - BaseFlow::reaches_exit(v) and - var_is_open(v, open) and - not file_is_returned(v, open) - ) - or - call_to_open(open) and - not exists(AssignmentDefinition def | def.getValue() = open) and - not exists(Return r | r.getValue() = open.getNode()) -} - -predicate file_not_closed_at_exception_exit(ControlFlowNode open, ControlFlowNode exit) { - exists(EssaVariable v | - exit.(RaisingNode).viableExceptionalExit(_, _) and - not closes_arg(exit, v.getSourceVariable()) and - not close_method_call(exit, v.getAUse().(NameNode)) and - var_is_open(v, open) and - v.getAUse() = exit.getAChild*() - ) -} +// import FileOpen +import FileNotAlwaysClosedQuery +// /** +// * Whether resource is opened and closed in in a matched pair of methods, +// * either `__enter__` and `__exit__` or `__init__` and `__del__` +// */ +// predicate opened_in_enter_closed_in_exit(ControlFlowNode open) { +// file_not_closed_at_scope_exit(open) and +// exists(FunctionValue entry, FunctionValue exit | +// open.getScope() = entry.getScope() and +// exists(ClassValue cls | +// cls.declaredAttribute("__enter__") = entry and cls.declaredAttribute("__exit__") = exit +// or +// cls.declaredAttribute("__init__") = entry and cls.declaredAttribute("__del__") = exit +// ) and +// exists(AttrNode attr_open, AttrNode attrclose | +// attr_open.getScope() = entry.getScope() and +// attrclose.getScope() = exit.getScope() and +// expr_is_open(attr_open.(DefinitionNode).getValue(), open) and +// attr_open.getName() = attrclose.getName() and +// close_method_call(_, attrclose) +// ) +// ) +// } +// predicate file_not_closed_at_scope_exit(ControlFlowNode open) { +// exists(EssaVariable v | +// BaseFlow::reaches_exit(v) and +// var_is_open(v, open) and +// not file_is_returned(v, open) +// ) +// or +// call_to_open(open) and +// not exists(AssignmentDefinition def | def.getValue() = open) and +// not exists(Return r | r.getValue() = open.getNode()) +// } +// predicate file_not_closed_at_exception_exit(ControlFlowNode open, ControlFlowNode exit) { +// exists(EssaVariable v | +// exit.(RaisingNode).viableExceptionalExit(_, _) and +// not closes_arg(exit, v.getSourceVariable()) and +// not close_method_call(exit, v.getAUse().(NameNode)) and +// var_is_open(v, open) and +// v.getAUse() = exit.getAChild*() +// ) +// } /* Check to see if a file is opened but not closed or returned */ -from ControlFlowNode defn, string message -where - not opened_in_enter_closed_in_exit(defn) and - ( - file_not_closed_at_scope_exit(defn) and message = "File is opened but is not closed." - or - not file_not_closed_at_scope_exit(defn) and - file_not_closed_at_exception_exit(defn, _) and - message = "File may not be closed if an exception is raised." - ) -select defn.getNode(), message +// from ControlFlowNode defn, string message +// where +// not opened_in_enter_closed_in_exit(defn) and +// ( +// file_not_closed_at_scope_exit(defn) and message = "File is opened but is not closed." +// or +// not file_not_closed_at_scope_exit(defn) and +// file_not_closed_at_exception_exit(defn, _) and +// message = "File may not be closed if an exception is raised." +// ) +// select defn.getNode(), message +from FileOpen fo +where fileNotAlwaysClosed(fo) +select fo, "File is opened but is not closed." diff --git a/python/ql/src/Resources/FileNotAlwaysClosedQuery.qll b/python/ql/src/Resources/FileNotAlwaysClosedQuery.qll new file mode 100644 index 000000000000..f75f61feea33 --- /dev/null +++ b/python/ql/src/Resources/FileNotAlwaysClosedQuery.qll @@ -0,0 +1,39 @@ +/** Definitions for reasoning about whether files are closed. */ + +import python +//import semmle.python.dataflow.DataFlow +import semmle.python.ApiGraphs + +abstract class FileOpen extends DataFlow::CfgNode { } + +class FileOpenCall extends FileOpen { + FileOpenCall() { this = API::builtin("open").getACall() } +} + +// todo: type tracking to find wrapping funcs +abstract class FileClose extends DataFlow::CfgNode { } + +class FileCloseCall extends FileClose { + FileCloseCall() { exists(DataFlow::MethodCallNode mc | mc.calls(this, "close")) } +} + +class WithStatement extends FileClose { + WithStatement() { exists(With w | this.asExpr() = w.getContextExpr()) } +} + +predicate fileIsClosed(FileOpen fo) { exists(FileClose fc | DataFlow::localFlow(fo, fc)) } + +predicate fileIsReturned(FileOpen fo) { + exists(Return ret | DataFlow::localFlow(fo, DataFlow::exprNode(ret.getValue()))) +} + +predicate fileIsStoredInField(FileOpen fo) { + exists(DataFlow::AttrWrite aw | DataFlow::localFlow(fo, aw.getValue())) +} + +predicate fileNotAlwaysClosed(FileOpen fo) { + not fileIsClosed(fo) and + not fileIsReturned(fo) and + not fileIsStoredInField(fo) + // TODO: exception cases +} From ecb3050780341ddecc99d8ab4cab6a50bece2843 Mon Sep 17 00:00:00 2001 From: Joe Farebrother Date: Mon, 24 Feb 2025 05:28:52 +0000 Subject: [PATCH 02/13] Update tests --- .../query-tests/Resources/Dataflow.expected | 119 ------------------ .../ql/test/query-tests/Resources/Dataflow.ql | 14 --- .../Resources/FileNotAlwaysClosed.expected | 9 -- .../Resources/FileNotAlwaysClosed.qlref | 1 - .../resources_test.py | 24 ++-- .../FileNotAlwaysClosed/test.expected | 0 .../Resources/FileNotAlwaysClosed/test.ql | 21 ++++ 7 files changed, 33 insertions(+), 155 deletions(-) delete mode 100644 python/ql/test/query-tests/Resources/Dataflow.expected delete mode 100644 python/ql/test/query-tests/Resources/Dataflow.ql delete mode 100644 python/ql/test/query-tests/Resources/FileNotAlwaysClosed.expected delete mode 100644 python/ql/test/query-tests/Resources/FileNotAlwaysClosed.qlref rename python/ql/test/query-tests/Resources/{ => FileNotAlwaysClosed}/resources_test.py (89%) create mode 100644 python/ql/test/query-tests/Resources/FileNotAlwaysClosed/test.expected create mode 100644 python/ql/test/query-tests/Resources/FileNotAlwaysClosed/test.ql diff --git a/python/ql/test/query-tests/Resources/Dataflow.expected b/python/ql/test/query-tests/Resources/Dataflow.expected deleted file mode 100644 index 18ac21574588..000000000000 --- a/python/ql/test/query-tests/Resources/Dataflow.expected +++ /dev/null @@ -1,119 +0,0 @@ -| f1_0 = open() | open | | -| f1_1 = MethodCallsiteRefinement(f1_0) | open | | -| f1_2 = MethodCallsiteRefinement(f1_1) | closed | exit | -| f2_0 = open() | open | exit | -| f3_0 = open() | open | | -| f3_1 = MethodCallsiteRefinement(f3_0) | closed | exit | -| f4_0 = with | closed | | -| f4_1 = MethodCallsiteRefinement(f4_0) | closed | exit | -| f5_0 = open() | open | | -| f5_1 = MethodCallsiteRefinement(f5_0) | open | | -| f5_2 = MethodCallsiteRefinement(f5_1) | closed | exit | -| f5_3 = phi(f5_0, f5_1) | open | | -| f6_0 = None | closed | | -| f6_1 = open() | open | | -| f6_2 = MethodCallsiteRefinement(f6_1) | open | | -| f6_3 = phi(f6_0, f6_1, f6_2) | open | | -| f6_4 = Pi(f6_2) [true] | open | | -| f6_5 = MethodCallsiteRefinement(f6_4) | closed | | -| f6_6 = Pi(f6_3) [true] | open | | -| f6_7 = Pi(f6_2) [false] | closed | | -| f6_8 = phi(f6_5, f6_7) | closed | exit | -| f7_0 = None | closed | | -| f7_1 = open() | open | | -| f7_2 = MethodCallsiteRefinement(f7_1) | open | | -| f7_3 = phi(f7_0, f7_1, f7_2) | open | | -| f7_4 = Pi(f7_2) [true] | open | | -| f7_5 = MethodCallsiteRefinement(f7_4) | closed | | -| f7_6 = Pi(f7_3) [true] | open | | -| f7_7 = Pi(f7_2) [false] | closed | | -| f7_8 = phi(f7_5, f7_7) | closed | exit | -| f8_0 = None | closed | | -| f8_1 = open() | open | | -| f8_2 = MethodCallsiteRefinement(f8_1) | open | | -| f8_3 = phi(f8_0, f8_1, f8_2) | open | | -| f8_4 = Pi(f8_2) [true] | closed | | -| f8_5 = MethodCallsiteRefinement(f8_4) | closed | | -| f8_6 = Pi(f8_3) [true] | closed | | -| f8_7 = Pi(f8_2) [false] | open | | -| f8_8 = phi(f8_5, f8_7) | open | exit | -| f9_0 = None | closed | | -| f9_1 = open() | open | | -| f9_2 = MethodCallsiteRefinement(f9_1) | open | | -| f9_3 = phi(f9_0, f9_1, f9_2) | open | | -| f9_4 = Pi(f9_2) [true] | closed | | -| f9_5 = MethodCallsiteRefinement(f9_4) | closed | | -| f9_6 = Pi(f9_3) [true] | closed | | -| f9_7 = Pi(f9_2) [false] | open | | -| f9_8 = phi(f9_5, f9_7) | open | exit | -| f10_0 = open() | open | | -| f10_1 = MethodCallsiteRefinement(f10_0) | open | | -| f10_2 = MethodCallsiteRefinement(f10_1) | open | | -| f10_3 = MethodCallsiteRefinement(f10_2) | closed | | -| f10_4 = phi(f10_0, f10_1, f10_2, f10_3) | open | | -| f10_5 = MethodCallsiteRefinement(f10_4) | closed | | -| f10_6 = phi(f10_3, f10_5) | closed | exit | -| f11_0 = open() | open | | -| f11_1 = MethodCallsiteRefinement(f11_0) | open | | -| f11_2 = MethodCallsiteRefinement(f11_1) | open | | -| f11_3 = MethodCallsiteRefinement(f11_2) | closed | | -| f11_4 = phi(f11_0, f11_1, f11_2, f11_3) | open | | -| f11_5 = MethodCallsiteRefinement(f11_4) | closed | | -| f11_6 = phi(f11_3, f11_5) | closed | exit | -| f12_0 = open() | open | | -| f12_1 = MethodCallsiteRefinement(f12_0) | open | | -| f12_2 = MethodCallsiteRefinement(f12_1) | open | | -| f12_3 = MethodCallsiteRefinement(f12_2) | closed | | -| f12_4 = phi(f12_0, f12_1, f12_2, f12_3) | open | | -| f12_5 = MethodCallsiteRefinement(f12_4) | closed | | -| f12_6 = phi(f12_3, f12_5) | closed | exit | -| f13_0 = open() | open | | -| f13_1 = MethodCallsiteRefinement(f13_0) | open | exit | -| f14_0 = opener_func2() | open | | -| f14_1 = MethodCallsiteRefinement(f14_0) | open | | -| f14_2 = MethodCallsiteRefinement(f14_1) | closed | exit | -| f15_0 = opener_func2() | open | | -| f15_1 = ArgumentRefinement(f15_0) | closed | exit | -| f16_0 = ScopeEntryDefinition | closed | | -| f16_1 = open() | open | | -| f16_2 = MethodCallsiteRefinement(f16_1) | open | | -| f16_3 = MethodCallsiteRefinement(f16_2) | closed | | -| f16_4 = phi(f16_0, f16_1, f16_2, f16_3) | open | | -| f16_5 = phi(f16_3, f16_4) | open | exit | -| f17_0 = open() | open | | -| f17_1 = MethodCallsiteRefinement(f17_0) | open | | -| f17_2 = MethodCallsiteRefinement(f17_1) | open | | -| f17_3 = MethodCallsiteRefinement(f17_2) | closed | | -| f17_4 = phi(f17_0, f17_1, f17_2, f17_3) | open | | -| f17_5 = MethodCallsiteRefinement(f17_4) | closed | | -| f17_6 = phi(f17_3, f17_5) | closed | exit | -| f18_0 = open() | closed | | -| f18_1 = MethodCallsiteRefinement(f18_0) | closed | exit | -| f20_0 = open() | open | | -| f20_1 = ArgumentRefinement(f20_0) | closed | exit | -| f21_0 = open() | open | | -| f21_1 = MethodCallsiteRefinement(f21_0) | open | | -| f21_2 = MethodCallsiteRefinement(f21_1) | closed | | -| f21_3 = phi(f21_1, f21_2) | open | | -| f21_4 = phi(f21_0, f21_1, f21_2) | open | | -| f21_5 = Pi(f21_3) [true] | open | | -| f21_6 = MethodCallsiteRefinement(f21_5) | closed | | -| f21_7 = Pi(f21_4) [true] | open | | -| f21_8 = Pi(f21_3) [false] | closed | | -| f21_9 = phi(f21_6, f21_8) | closed | exit | -| f22_0 = open() | open | | -| f22_1 = MethodCallsiteRefinement(f22_0) | open | | -| f22_2 = MethodCallsiteRefinement(f22_1) | closed | | -| f22_3 = phi(f22_1, f22_2) | open | | -| f22_4 = phi(f22_0, f22_1, f22_2) | open | | -| f22_5 = Pi(f22_3) [true] | closed | | -| f22_6 = MethodCallsiteRefinement(f22_5) | closed | | -| f22_7 = Pi(f22_4) [true] | closed | | -| f22_8 = Pi(f22_3) [false] | open | | -| f22_9 = phi(f22_6, f22_8) | open | exit | -| f_0 = FunctionExpr | closed | exit | -| file_0 = open() | open | | -| file_1 = open() | open | | -| file_2 = None | closed | | -| file_3 = phi(file_0, file_1, file_2) | open | exit | -| fp_0 = ParameterDefinition | closed | exit | diff --git a/python/ql/test/query-tests/Resources/Dataflow.ql b/python/ql/test/query-tests/Resources/Dataflow.ql deleted file mode 100644 index fad31d80ec13..000000000000 --- a/python/ql/test/query-tests/Resources/Dataflow.ql +++ /dev/null @@ -1,14 +0,0 @@ -import python -import Resources.FileOpen - -from EssaVariable v, EssaDefinition def, string open, string exit -where - def = v.getDefinition() and - v.getSourceVariable().getName().charAt(0) = "f" and - ( - var_is_open(v, _) and open = "open" - or - not var_is_open(v, _) and open = "closed" - ) and - if BaseFlow::reaches_exit(v) then exit = "exit" else exit = "" -select v.getRepresentation() + " = " + v.getDefinition().getRepresentation(), open, exit diff --git a/python/ql/test/query-tests/Resources/FileNotAlwaysClosed.expected b/python/ql/test/query-tests/Resources/FileNotAlwaysClosed.expected deleted file mode 100644 index c0a6c413333f..000000000000 --- a/python/ql/test/query-tests/Resources/FileNotAlwaysClosed.expected +++ /dev/null @@ -1,9 +0,0 @@ -| resources_test.py:4:10:4:25 | open() | File may not be closed if an exception is raised. | -| resources_test.py:9:10:9:25 | open() | File is opened but is not closed. | -| resources_test.py:49:14:49:29 | open() | File is opened but is not closed. | -| resources_test.py:58:14:58:29 | open() | File is opened but is not closed. | -| resources_test.py:79:11:79:26 | open() | File may not be closed if an exception is raised. | -| resources_test.py:108:11:108:20 | open() | File is opened but is not closed. | -| resources_test.py:112:11:112:28 | opener_func2() | File may not be closed if an exception is raised. | -| resources_test.py:129:15:129:24 | open() | File is opened but is not closed. | -| resources_test.py:237:11:237:26 | open() | File is opened but is not closed. | diff --git a/python/ql/test/query-tests/Resources/FileNotAlwaysClosed.qlref b/python/ql/test/query-tests/Resources/FileNotAlwaysClosed.qlref deleted file mode 100644 index 37e9a0766805..000000000000 --- a/python/ql/test/query-tests/Resources/FileNotAlwaysClosed.qlref +++ /dev/null @@ -1 +0,0 @@ -Resources/FileNotAlwaysClosed.ql \ No newline at end of file diff --git a/python/ql/test/query-tests/Resources/resources_test.py b/python/ql/test/query-tests/Resources/FileNotAlwaysClosed/resources_test.py similarity index 89% rename from python/ql/test/query-tests/Resources/resources_test.py rename to python/ql/test/query-tests/Resources/FileNotAlwaysClosed/resources_test.py index 73fa88b44016..177c557d8b00 100644 --- a/python/ql/test/query-tests/Resources/resources_test.py +++ b/python/ql/test/query-tests/Resources/FileNotAlwaysClosed/resources_test.py @@ -3,10 +3,10 @@ def not_close1(): f1 = open("filename") f1.write("Error could occur") - f1.close() + f1.close() # $ notAlwaysClosed def not_close2(): - f2 = open("filename") + f2 = open("filename") # $ notAlwaysClosed def closed3(): f3 = open("filename") @@ -46,7 +46,7 @@ def closed7(): def not_closed8(): f8 = None try: - f8 = open("filename") + f8 = open("filename") # $ notAlwaysClosed f8.write("Error could occur") finally: if f8 is None: @@ -55,7 +55,7 @@ def not_closed8(): def not_closed9(): f9 = None try: - f9 = open("filename") + f9 = open("filename") # $ notAlwaysClosed f9.write("Error could occur") finally: if not f9: @@ -76,7 +76,7 @@ def closed10(): #Not closed by handling the wrong exception def not_closed11(): - f11 = open("filename") + f11 = open("filename") # $ notAlwaysClosed try: f11.write("IOError could occur") f11.write("IOError could occur") @@ -84,11 +84,11 @@ def not_closed11(): except AttributeError: f11.close() -def doesnt_raise(): +def doesnt_raise(*args): pass def mostly_closed12(): - f12 = open("filename") + f12 = open("filename") # $ SPURIOUS:notAlwaysClosed try: f12.write("IOError could occur") f12.write("IOError could occur") @@ -105,11 +105,11 @@ def opener_func2(name): return t1 def not_closed13(name): - f13 = open(name) + f13 = open(name) # $ notAlwaysClosed f13.write("Hello") def may_not_be_closed14(name): - f14 = opener_func2(name) + f14 = opener_func2(name) # $ notAlwaysClosed f14.write("Hello") f14.close() @@ -126,7 +126,7 @@ def closed15(): def may_not_be_closed16(name): try: - f16 = open(name) + f16 = open(name) # $ notAlwaysClosed f16.write("Hello") f16.close() except IOError: @@ -138,7 +138,7 @@ def may_raise(): #Not handling all exceptions, but we'll tolerate the false negative def not_closed17(): - f17 = open("filename") + f17 = open("filename") # $ notAlwaysClosed try: f17.write("IOError could occur") f17.write("IOError could occur") @@ -234,7 +234,7 @@ def closed21(path): def not_closed22(path): - f22 = open(path, "wb") + f22 = open(path, "wb") # $ notAlwaysClosed try: f22.write(b"foo") may_raise() diff --git a/python/ql/test/query-tests/Resources/FileNotAlwaysClosed/test.expected b/python/ql/test/query-tests/Resources/FileNotAlwaysClosed/test.expected new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/python/ql/test/query-tests/Resources/FileNotAlwaysClosed/test.ql b/python/ql/test/query-tests/Resources/FileNotAlwaysClosed/test.ql new file mode 100644 index 000000000000..501c4a0f309a --- /dev/null +++ b/python/ql/test/query-tests/Resources/FileNotAlwaysClosed/test.ql @@ -0,0 +1,21 @@ +import python +import Resources.FileNotAlwaysClosedQuery +import utils.test.InlineExpectationsTest + +module MethodArgTest implements TestSig { + string getARelevantTag() { result = "notAlwaysClosed" } + + predicate hasActualResult(Location location, string element, string tag, string value) { + exists(DataFlow::CfgNode f | + element = f.toString() and + location = f.getLocation() and + value = "" and + ( + fileNotAlwaysClosed(f) and + tag = "notAlwaysClosed" + ) + ) + } +} + +import MakeTest \ No newline at end of file From c8fc56560d94874c249958cdcd54c5607698f83c Mon Sep 17 00:00:00 2001 From: Joe Farebrother Date: Thu, 6 Mar 2025 16:21:37 +0000 Subject: [PATCH 03/13] Check for wrapper classes --- .../Resources/FileNotAlwaysClosedQuery.qll | 24 +++++++++++++++---- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/python/ql/src/Resources/FileNotAlwaysClosedQuery.qll b/python/ql/src/Resources/FileNotAlwaysClosedQuery.qll index f75f61feea33..c6a9e0841ef5 100644 --- a/python/ql/src/Resources/FileNotAlwaysClosedQuery.qll +++ b/python/ql/src/Resources/FileNotAlwaysClosedQuery.qll @@ -1,22 +1,36 @@ /** Definitions for reasoning about whether files are closed. */ import python -//import semmle.python.dataflow.DataFlow +import semmle.python.dataflow.new.internal.DataFlowDispatch import semmle.python.ApiGraphs abstract class FileOpen extends DataFlow::CfgNode { } class FileOpenCall extends FileOpen { - FileOpenCall() { this = API::builtin("open").getACall() } + FileOpenCall() { this = [API::builtin("open").getACall()] } +} + +class FileWrapperClassCall extends FileOpen, DataFlow::CallCfgNode { + FileOpen wrapped; + + FileWrapperClassCall() { + wrapped = this.getArg(_).getALocalSource() and + this.getFunction() = classTracker(_) + } + + FileOpen getWrapped() { result = wrapped } } -// todo: type tracking to find wrapping funcs abstract class FileClose extends DataFlow::CfgNode { } class FileCloseCall extends FileClose { FileCloseCall() { exists(DataFlow::MethodCallNode mc | mc.calls(this, "close")) } } +class OsCloseCall extends FileClose { + OsCloseCall() { this = API::moduleImport("os").getMember("close").getACall().getArg(0) } +} + class WithStatement extends FileClose { WithStatement() { exists(With w | this.asExpr() = w.getContextExpr()) } } @@ -34,6 +48,6 @@ predicate fileIsStoredInField(FileOpen fo) { predicate fileNotAlwaysClosed(FileOpen fo) { not fileIsClosed(fo) and not fileIsReturned(fo) and - not fileIsStoredInField(fo) - // TODO: exception cases + not fileIsStoredInField(fo) and + not exists(FileWrapperClassCall fwc | fo = fwc.getWrapped()) } From f750e22d91aee02b9ed740204e4b56961308cb83 Mon Sep 17 00:00:00 2001 From: Joe Farebrother Date: Mon, 10 Mar 2025 11:12:04 +0000 Subject: [PATCH 04/13] Add case for exception flow --- .../ql/src/Resources/FileNotAlwaysClosed.ql | 66 ++--------- .../Resources/FileNotAlwaysClosedQuery.qll | 103 ++++++++++++++++-- .../FileNotAlwaysClosed/resources_test.py | 29 ++--- .../Resources/FileNotAlwaysClosed/test.ql | 20 ++-- 4 files changed, 129 insertions(+), 89 deletions(-) diff --git a/python/ql/src/Resources/FileNotAlwaysClosed.ql b/python/ql/src/Resources/FileNotAlwaysClosed.ql index f04764995752..7a96c7affc42 100644 --- a/python/ql/src/Resources/FileNotAlwaysClosed.ql +++ b/python/ql/src/Resources/FileNotAlwaysClosed.ql @@ -13,63 +13,13 @@ */ import python -// import FileOpen import FileNotAlwaysClosedQuery -// /** -// * Whether resource is opened and closed in in a matched pair of methods, -// * either `__enter__` and `__exit__` or `__init__` and `__del__` -// */ -// predicate opened_in_enter_closed_in_exit(ControlFlowNode open) { -// file_not_closed_at_scope_exit(open) and -// exists(FunctionValue entry, FunctionValue exit | -// open.getScope() = entry.getScope() and -// exists(ClassValue cls | -// cls.declaredAttribute("__enter__") = entry and cls.declaredAttribute("__exit__") = exit -// or -// cls.declaredAttribute("__init__") = entry and cls.declaredAttribute("__del__") = exit -// ) and -// exists(AttrNode attr_open, AttrNode attrclose | -// attr_open.getScope() = entry.getScope() and -// attrclose.getScope() = exit.getScope() and -// expr_is_open(attr_open.(DefinitionNode).getValue(), open) and -// attr_open.getName() = attrclose.getName() and -// close_method_call(_, attrclose) -// ) -// ) -// } -// predicate file_not_closed_at_scope_exit(ControlFlowNode open) { -// exists(EssaVariable v | -// BaseFlow::reaches_exit(v) and -// var_is_open(v, open) and -// not file_is_returned(v, open) -// ) -// or -// call_to_open(open) and -// not exists(AssignmentDefinition def | def.getValue() = open) and -// not exists(Return r | r.getValue() = open.getNode()) -// } -// predicate file_not_closed_at_exception_exit(ControlFlowNode open, ControlFlowNode exit) { -// exists(EssaVariable v | -// exit.(RaisingNode).viableExceptionalExit(_, _) and -// not closes_arg(exit, v.getSourceVariable()) and -// not close_method_call(exit, v.getAUse().(NameNode)) and -// var_is_open(v, open) and -// v.getAUse() = exit.getAChild*() -// ) -// } -/* Check to see if a file is opened but not closed or returned */ -// from ControlFlowNode defn, string message -// where -// not opened_in_enter_closed_in_exit(defn) and -// ( -// file_not_closed_at_scope_exit(defn) and message = "File is opened but is not closed." -// or -// not file_not_closed_at_scope_exit(defn) and -// file_not_closed_at_exception_exit(defn, _) and -// message = "File may not be closed if an exception is raised." -// ) -// select defn.getNode(), message -from FileOpen fo -where fileNotAlwaysClosed(fo) -select fo, "File is opened but is not closed." +from FileOpen fo, string msg +where + fileNotClosed(fo) and + msg = "File is opened but is not closed." + or + fileMayNotBeClosedOnException(fo, _) and + msg = "File may not be closed if an exception is raised" +select fo.getLocalSource(), msg diff --git a/python/ql/src/Resources/FileNotAlwaysClosedQuery.qll b/python/ql/src/Resources/FileNotAlwaysClosedQuery.qll index c6a9e0841ef5..1bfc40407d6f 100644 --- a/python/ql/src/Resources/FileNotAlwaysClosedQuery.qll +++ b/python/ql/src/Resources/FileNotAlwaysClosedQuery.qll @@ -4,50 +4,133 @@ import python import semmle.python.dataflow.new.internal.DataFlowDispatch import semmle.python.ApiGraphs -abstract class FileOpen extends DataFlow::CfgNode { } +/** A CFG node where a file is opened. */ +abstract class FileOpenSource extends DataFlow::CfgNode { } -class FileOpenCall extends FileOpen { - FileOpenCall() { this = [API::builtin("open").getACall()] } +/** A call to the builtin `open` or `os.open`. */ +class FileOpenCall extends FileOpenSource { + FileOpenCall() { + this = [API::builtin("open").getACall(), API::moduleImport("os").getMember("open").getACall()] + } +} + +private DataFlow::TypeTrackingNode fileOpenInstance(DataFlow::TypeTracker t) { + t.start() and + result instanceof FileOpenSource + or + exists(DataFlow::TypeTracker t2 | result = fileOpenInstance(t2).track(t2, t)) +} + +/** A call that returns an instance of an open file object. */ +class FileOpen extends DataFlow::CallCfgNode { + FileOpen() { fileOpenInstance(DataFlow::TypeTracker::end()).flowsTo(this) } + + /** Gets the local source of this file object, through any wrapper calls. */ + FileOpen getLocalSource() { + if this instanceof FileWrapperCall + then result = this.(FileWrapperCall).getWrapped().getLocalSource() + else result = this + } } -class FileWrapperClassCall extends FileOpen, DataFlow::CallCfgNode { +/** A call that may wrap a file object in a wrapper class or `os.fdopen`. */ +class FileWrapperCall extends FileOpenSource, DataFlow::CallCfgNode { FileOpen wrapped; - FileWrapperClassCall() { + FileWrapperCall() { wrapped = this.getArg(_).getALocalSource() and this.getFunction() = classTracker(_) + or + wrapped = this.getArg(0) and + this = API::moduleImport("os").getMember("fdopen").getACall() } + /** Gets the file that this call wraps. */ FileOpen getWrapped() { result = wrapped } } -abstract class FileClose extends DataFlow::CfgNode { } +/** A node where a file is closed. */ +abstract class FileClose extends DataFlow::CfgNode { + /** Holds if this file close will occur if an exception is thrown at `e`. */ + predicate guardsExceptions(Expr e) { + exists(Try try | + e = try.getAStmt().getAChildNode*() and + ( + this.asExpr() = try.getAHandler().getAChildNode*() + or + this.asExpr() = try.getAFinalstmt().getAChildNode*() + ) + ) + } +} +/** A call to the `.close()` method of a file object. */ class FileCloseCall extends FileClose { FileCloseCall() { exists(DataFlow::MethodCallNode mc | mc.calls(this, "close")) } } +/** A call to `os.close`. */ class OsCloseCall extends FileClose { OsCloseCall() { this = API::moduleImport("os").getMember("close").getACall().getArg(0) } } +/** A `with` statement. */ class WithStatement extends FileClose { - WithStatement() { exists(With w | this.asExpr() = w.getContextExpr()) } + With w; + + WithStatement() { this.asExpr() = w.getContextExpr() } + + override predicate guardsExceptions(Expr e) { + super.guardsExceptions(e) + or + e = w.getAStmt().getAChildNode*() + } } +/** Holds if an exception may be raised at `node` if it is a file object. */ +private predicate mayRaiseWithFile(DataFlow::CfgNode node) { + // Currently just consider any method called on `node`; e.g. `file.write()`; as potentially raising an exception + exists(DataFlow::MethodCallNode mc | node = mc.getObject()) and + not node instanceof FileOpen and + not node instanceof FileClose +} + +/** Holds if the file opened at `fo` is closed. */ predicate fileIsClosed(FileOpen fo) { exists(FileClose fc | DataFlow::localFlow(fo, fc)) } +/** Holds if the file opened at `fo` is returned to the caller. This makes the caller responsible for closing the file. */ predicate fileIsReturned(FileOpen fo) { - exists(Return ret | DataFlow::localFlow(fo, DataFlow::exprNode(ret.getValue()))) + exists(Return ret, Expr retVal | + ( + retVal = ret.getValue() + or + retVal = ret.getValue().(List).getAnElt() + or + retVal = ret.getValue().(Tuple).getAnElt() + ) and + DataFlow::localFlow(fo, DataFlow::exprNode(retVal)) + ) } +/** Holds if the file opened at `fo` is stored in a field. We assume that another method is then responsible for closing the file. */ predicate fileIsStoredInField(FileOpen fo) { exists(DataFlow::AttrWrite aw | DataFlow::localFlow(fo, aw.getValue())) } -predicate fileNotAlwaysClosed(FileOpen fo) { +/** Holds if the file opened at `fo` is not closed, and is expected to be closed. */ +predicate fileNotClosed(FileOpen fo) { not fileIsClosed(fo) and not fileIsReturned(fo) and not fileIsStoredInField(fo) and - not exists(FileWrapperClassCall fwc | fo = fwc.getWrapped()) + not exists(FileWrapperCall fwc | fo = fwc.getWrapped()) +} + +predicate fileMayNotBeClosedOnException(FileOpen fo, DataFlow::Node raises) { + fileIsClosed(fo) and + mayRaiseWithFile(raises) and + DataFlow::localFlow(fo, raises) and + not exists(FileClose fc | + DataFlow::localFlow(fo, fc) and + fc.guardsExceptions(raises.asExpr()) + ) } diff --git a/python/ql/test/query-tests/Resources/FileNotAlwaysClosed/resources_test.py b/python/ql/test/query-tests/Resources/FileNotAlwaysClosed/resources_test.py index 177c557d8b00..ddc89a8fb7b2 100644 --- a/python/ql/test/query-tests/Resources/FileNotAlwaysClosed/resources_test.py +++ b/python/ql/test/query-tests/Resources/FileNotAlwaysClosed/resources_test.py @@ -1,12 +1,12 @@ #File not always closed def not_close1(): - f1 = open("filename") + f1 = open("filename") # $ notClosedOnException f1.write("Error could occur") - f1.close() # $ notAlwaysClosed + f1.close() def not_close2(): - f2 = open("filename") # $ notAlwaysClosed + f2 = open("filename") # $ notClosed def closed3(): f3 = open("filename") @@ -46,7 +46,7 @@ def closed7(): def not_closed8(): f8 = None try: - f8 = open("filename") # $ notAlwaysClosed + f8 = open("filename") # $ MISSING:notClosedOnException f8.write("Error could occur") finally: if f8 is None: @@ -55,7 +55,7 @@ def not_closed8(): def not_closed9(): f9 = None try: - f9 = open("filename") # $ notAlwaysClosed + f9 = open("filename") # $ MISSING:notAlwaysClosed f9.write("Error could occur") finally: if not f9: @@ -76,7 +76,7 @@ def closed10(): #Not closed by handling the wrong exception def not_closed11(): - f11 = open("filename") # $ notAlwaysClosed + f11 = open("filename") # $ MISSING:notAlwaysClosed try: f11.write("IOError could occur") f11.write("IOError could occur") @@ -88,7 +88,7 @@ def doesnt_raise(*args): pass def mostly_closed12(): - f12 = open("filename") # $ SPURIOUS:notAlwaysClosed + f12 = open("filename") try: f12.write("IOError could occur") f12.write("IOError could occur") @@ -105,11 +105,11 @@ def opener_func2(name): return t1 def not_closed13(name): - f13 = open(name) # $ notAlwaysClosed + f13 = open(name) # $ notClosed f13.write("Hello") def may_not_be_closed14(name): - f14 = opener_func2(name) # $ notAlwaysClosed + f14 = opener_func2(name) # $ notClosedOnException f14.write("Hello") f14.close() @@ -120,13 +120,13 @@ def closer2(t3): closer1(t3) def closed15(): - f15 = opener_func2() + f15 = opener_func2() # $ SPURIOUS:notClosed closer2(f15) def may_not_be_closed16(name): try: - f16 = open(name) # $ notAlwaysClosed + f16 = open(name) # $ notClosedOnException f16.write("Hello") f16.close() except IOError: @@ -138,7 +138,7 @@ def may_raise(): #Not handling all exceptions, but we'll tolerate the false negative def not_closed17(): - f17 = open("filename") # $ notAlwaysClosed + f17 = open("filename") # $ MISSING:notClosedOnException try: f17.write("IOError could occur") f17.write("IOError could occur") @@ -234,7 +234,7 @@ def closed21(path): def not_closed22(path): - f22 = open(path, "wb") # $ notAlwaysClosed + f22 = open(path, "wb") # $ MISSING:notClosedOnException try: f22.write(b"foo") may_raise() @@ -244,3 +244,6 @@ def not_closed22(path): if f22.closed: # Wrong sense f22.close() +def not_closed23(path): + f23 = open(path, "w") # $ notClosed + wr = FileWrapper(f23) \ No newline at end of file diff --git a/python/ql/test/query-tests/Resources/FileNotAlwaysClosed/test.ql b/python/ql/test/query-tests/Resources/FileNotAlwaysClosed/test.ql index 501c4a0f309a..3abba197373a 100644 --- a/python/ql/test/query-tests/Resources/FileNotAlwaysClosed/test.ql +++ b/python/ql/test/query-tests/Resources/FileNotAlwaysClosed/test.ql @@ -1,21 +1,25 @@ -import python +import python import Resources.FileNotAlwaysClosedQuery import utils.test.InlineExpectationsTest module MethodArgTest implements TestSig { - string getARelevantTag() { result = "notAlwaysClosed" } + string getARelevantTag() { result = ["notClosed", "notClosedOnException"] } predicate hasActualResult(Location location, string element, string tag, string value) { - exists(DataFlow::CfgNode f | - element = f.toString() and - location = f.getLocation() and + exists(DataFlow::CfgNode el, FileOpen fo | + el = fo.getLocalSource() and + element = el.toString() and + location = el.getLocation() and value = "" and ( - fileNotAlwaysClosed(f) and - tag = "notAlwaysClosed" + fileNotClosed(fo) and + tag = "notClosed" + or + fileMayNotBeClosedOnException(fo, _) and + tag = "notClosedOnException" ) ) } } -import MakeTest \ No newline at end of file +import MakeTest From f8a0b1c5f94bfc78011f71b595dc56dfda674f0d Mon Sep 17 00:00:00 2001 From: Joe Farebrother Date: Mon, 10 Mar 2025 13:14:44 +0000 Subject: [PATCH 05/13] Update docs, precision, and deprecate old library --- .../ql/src/Resources/FileNotAlwaysClosed.py | 15 ------------ .../src/Resources/FileNotAlwaysClosed.qhelp | 23 ++++++++----------- .../ql/src/Resources/FileNotAlwaysClosed.ql | 6 ++--- python/ql/src/Resources/FileOpen.qll | 6 ++++- .../Resources/examples/FileNotAlwaysClosed.py | 17 ++++++++++++++ 5 files changed, 34 insertions(+), 33 deletions(-) delete mode 100644 python/ql/src/Resources/FileNotAlwaysClosed.py create mode 100644 python/ql/src/Resources/examples/FileNotAlwaysClosed.py diff --git a/python/ql/src/Resources/FileNotAlwaysClosed.py b/python/ql/src/Resources/FileNotAlwaysClosed.py deleted file mode 100644 index 5f5f10345c79..000000000000 --- a/python/ql/src/Resources/FileNotAlwaysClosed.py +++ /dev/null @@ -1,15 +0,0 @@ -f = open("filename") - ... # Actions to perform on file -f.close() -# File only closed if actions are completed successfully - -with open("filename") as f: - ...# Actions to perform on file -# File always closed - -f = open("filename") -try: - ... # Actions to perform on file -finally: - f.close() -# File always closed diff --git a/python/ql/src/Resources/FileNotAlwaysClosed.qhelp b/python/ql/src/Resources/FileNotAlwaysClosed.qhelp index 71073caa47b5..5c3e0f98a0e2 100644 --- a/python/ql/src/Resources/FileNotAlwaysClosed.qhelp +++ b/python/ql/src/Resources/FileNotAlwaysClosed.qhelp @@ -4,32 +4,27 @@ -

If a file is opened then it should always be closed again, even if an -exception is raised. -Failing to ensure that all files are closed may result in failure due to too -many open files.

+

When a file is opened, it should always be closed. Failure to close files could result in loss of data or resource leaks.

-

Ensure that if you open a file it is always closed on exiting the method. -Wrap the code between the open() and close() -functions in a with statement or use a try...finally -statement. Using a with statement is preferred as it is shorter -and more readable.

+

Ensure that opened files are always closed, including when an exception could be raised. +The best practice is to use a with statement to automatically clean up resources. +Otherwise, ensure that .close() is called in a try...except or try...finally +block to handle any possible exceptions. +

-

The following code shows examples of different ways of closing a file. In the first example, the -file is closed only if the method is exited successfully. In the other examples, the file is always -closed on exiting the method.

+

In the following examples, in the case marked BAD, the file may not be closed if an exception is raised. In the cases marked GOOD, the file is always closed.

- +
- +
  • Python Documentation: Reading and writing files.
  • Python Language Reference: The with statement, The try statement.
  • Python PEP 343: The "with" Statement.
  • diff --git a/python/ql/src/Resources/FileNotAlwaysClosed.ql b/python/ql/src/Resources/FileNotAlwaysClosed.ql index 7a96c7affc42..2f40ec3eb5f0 100644 --- a/python/ql/src/Resources/FileNotAlwaysClosed.ql +++ b/python/ql/src/Resources/FileNotAlwaysClosed.ql @@ -1,6 +1,6 @@ /** * @name File is not always closed - * @description Opening a file without ensuring that it is always closed may cause resource leaks. + * @description Opening a file without ensuring that it is always closed may cause data loss or resource leaks. * @kind problem * @tags efficiency * correctness @@ -8,7 +8,7 @@ * external/cwe/cwe-772 * @problem.severity warning * @sub-severity high - * @precision medium + * @precision high * @id py/file-not-closed */ @@ -21,5 +21,5 @@ where msg = "File is opened but is not closed." or fileMayNotBeClosedOnException(fo, _) and - msg = "File may not be closed if an exception is raised" + msg = "File may not be closed if an exception is raised." select fo.getLocalSource(), msg diff --git a/python/ql/src/Resources/FileOpen.qll b/python/ql/src/Resources/FileOpen.qll index 8fc453063536..dd952e732d42 100644 --- a/python/ql/src/Resources/FileOpen.qll +++ b/python/ql/src/Resources/FileOpen.qll @@ -1,4 +1,8 @@ -/** Contains predicates concerning when and where files are opened and closed. */ +/** + * DEPRECATED: Use FileNotAlwaysClosedQuery instead. + * Contains predicates concerning when and where files are opened and closed. + */ +deprecated module; import python import semmle.python.pointsto.Filters diff --git a/python/ql/src/Resources/examples/FileNotAlwaysClosed.py b/python/ql/src/Resources/examples/FileNotAlwaysClosed.py new file mode 100644 index 000000000000..cd5bdb2118a2 --- /dev/null +++ b/python/ql/src/Resources/examples/FileNotAlwaysClosed.py @@ -0,0 +1,17 @@ +def bad(): + f = open("filename", "w") + f.write("could raise exception") # BAD: This call could raise an exception, leading to the file not being closed. + f.close() + + +def good1(): + with open("filename", "w") as f: + f.write("always closed") # GOOD: The `with` statement ensures the file is always closed. + +def good2(): + f = open("filename", "w") + try: + f.write("always closed") + finally: + f.close() # GOOD: The `finally` block always ensures the file is closed. + From b2acfbcf8722fddbf4fa447ae665dceb2e67e324 Mon Sep 17 00:00:00 2001 From: Joe Farebrother Date: Thu, 20 Mar 2025 09:52:56 +0000 Subject: [PATCH 06/13] Simplify handling of wrapper classes and exception flow + improve qldoc and annotate tests. --- .../src/Resources/FileNotAlwaysClosed.qhelp | 9 ++-- .../ql/src/Resources/FileNotAlwaysClosed.ql | 2 +- .../Resources/FileNotAlwaysClosedQuery.qll | 45 ++++++++++--------- .../FileNotAlwaysClosed/resources_test.py | 14 +++--- .../Resources/FileNotAlwaysClosed/test.ql | 2 +- 5 files changed, 38 insertions(+), 34 deletions(-) diff --git a/python/ql/src/Resources/FileNotAlwaysClosed.qhelp b/python/ql/src/Resources/FileNotAlwaysClosed.qhelp index 5c3e0f98a0e2..f37c5a4cbc47 100644 --- a/python/ql/src/Resources/FileNotAlwaysClosed.qhelp +++ b/python/ql/src/Resources/FileNotAlwaysClosed.qhelp @@ -4,13 +4,16 @@ -

    When a file is opened, it should always be closed. Failure to close files could result in loss of data or resource leaks.

    - +

    When a file is opened, it should always be closed. +

    +

    A file opened for writing that is not closed when the application exits may result in data loss, where not all of the data written may be saved to the file. +A file opened for reading or writing that is not closed may also use up file descriptors, which is a resource leak that in long running applications could lead to a failure to open additional files. +

    Ensure that opened files are always closed, including when an exception could be raised. -The best practice is to use a with statement to automatically clean up resources. +The best practice is often to use a with statement to automatically clean up resources. Otherwise, ensure that .close() is called in a try...except or try...finally block to handle any possible exceptions.

    diff --git a/python/ql/src/Resources/FileNotAlwaysClosed.ql b/python/ql/src/Resources/FileNotAlwaysClosed.ql index 2f40ec3eb5f0..4bfba62b2138 100644 --- a/python/ql/src/Resources/FileNotAlwaysClosed.ql +++ b/python/ql/src/Resources/FileNotAlwaysClosed.ql @@ -22,4 +22,4 @@ where or fileMayNotBeClosedOnException(fo, _) and msg = "File may not be closed if an exception is raised." -select fo.getLocalSource(), msg +select fo, msg diff --git a/python/ql/src/Resources/FileNotAlwaysClosedQuery.qll b/python/ql/src/Resources/FileNotAlwaysClosedQuery.qll index 1bfc40407d6f..11473edde047 100644 --- a/python/ql/src/Resources/FileNotAlwaysClosedQuery.qll +++ b/python/ql/src/Resources/FileNotAlwaysClosedQuery.qll @@ -24,17 +24,10 @@ private DataFlow::TypeTrackingNode fileOpenInstance(DataFlow::TypeTracker t) { /** A call that returns an instance of an open file object. */ class FileOpen extends DataFlow::CallCfgNode { FileOpen() { fileOpenInstance(DataFlow::TypeTracker::end()).flowsTo(this) } - - /** Gets the local source of this file object, through any wrapper calls. */ - FileOpen getLocalSource() { - if this instanceof FileWrapperCall - then result = this.(FileWrapperCall).getWrapped().getLocalSource() - else result = this - } } /** A call that may wrap a file object in a wrapper class or `os.fdopen`. */ -class FileWrapperCall extends FileOpenSource, DataFlow::CallCfgNode { +class FileWrapperCall extends DataFlow::CallCfgNode { FileOpen wrapped; FileWrapperCall() { @@ -53,14 +46,11 @@ class FileWrapperCall extends FileOpenSource, DataFlow::CallCfgNode { abstract class FileClose extends DataFlow::CfgNode { /** Holds if this file close will occur if an exception is thrown at `e`. */ predicate guardsExceptions(Expr e) { - exists(Try try | - e = try.getAStmt().getAChildNode*() and - ( - this.asExpr() = try.getAHandler().getAChildNode*() - or - this.asExpr() = try.getAFinalstmt().getAChildNode*() - ) - ) + this.asCfgNode() = + DataFlow::exprNode(e).asCfgNode().getAnExceptionalSuccessor().getASuccessor*() + or + // the expression is after the close call + DataFlow::exprNode(e).asCfgNode() = this.asCfgNode().getASuccessor*() } } @@ -95,8 +85,20 @@ private predicate mayRaiseWithFile(DataFlow::CfgNode node) { not node instanceof FileClose } +/** Holds if data flows from `nodeFrom` to `nodeTo` in one step that also includes file wrapper classes. */ +private predicate fileLocalFlowStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { + DataFlow::localFlowStep(nodeFrom, nodeTo) + or + exists(FileWrapperCall fw | nodeFrom = fw.getWrapped() and nodeTo = fw) +} + +/** Holds if data flows from `source` to `sink`, including file wrapper classes. */ +private predicate fileLocalFlow(DataFlow::Node source, DataFlow::Node sink) { + fileLocalFlowStep*(source, sink) +} + /** Holds if the file opened at `fo` is closed. */ -predicate fileIsClosed(FileOpen fo) { exists(FileClose fc | DataFlow::localFlow(fo, fc)) } +predicate fileIsClosed(FileOpen fo) { exists(FileClose fc | fileLocalFlow(fo, fc)) } /** Holds if the file opened at `fo` is returned to the caller. This makes the caller responsible for closing the file. */ predicate fileIsReturned(FileOpen fo) { @@ -108,27 +110,26 @@ predicate fileIsReturned(FileOpen fo) { or retVal = ret.getValue().(Tuple).getAnElt() ) and - DataFlow::localFlow(fo, DataFlow::exprNode(retVal)) + fileLocalFlow(fo, DataFlow::exprNode(retVal)) ) } /** Holds if the file opened at `fo` is stored in a field. We assume that another method is then responsible for closing the file. */ predicate fileIsStoredInField(FileOpen fo) { - exists(DataFlow::AttrWrite aw | DataFlow::localFlow(fo, aw.getValue())) + exists(DataFlow::AttrWrite aw | fileLocalFlow(fo, aw.getValue())) } /** Holds if the file opened at `fo` is not closed, and is expected to be closed. */ predicate fileNotClosed(FileOpen fo) { not fileIsClosed(fo) and not fileIsReturned(fo) and - not fileIsStoredInField(fo) and - not exists(FileWrapperCall fwc | fo = fwc.getWrapped()) + not fileIsStoredInField(fo) } predicate fileMayNotBeClosedOnException(FileOpen fo, DataFlow::Node raises) { fileIsClosed(fo) and mayRaiseWithFile(raises) and - DataFlow::localFlow(fo, raises) and + fileLocalFlow(fo, raises) and not exists(FileClose fc | DataFlow::localFlow(fo, fc) and fc.guardsExceptions(raises.asExpr()) diff --git a/python/ql/test/query-tests/Resources/FileNotAlwaysClosed/resources_test.py b/python/ql/test/query-tests/Resources/FileNotAlwaysClosed/resources_test.py index ddc89a8fb7b2..1f30d309d6f1 100644 --- a/python/ql/test/query-tests/Resources/FileNotAlwaysClosed/resources_test.py +++ b/python/ql/test/query-tests/Resources/FileNotAlwaysClosed/resources_test.py @@ -46,10 +46,10 @@ def closed7(): def not_closed8(): f8 = None try: - f8 = open("filename") # $ MISSING:notClosedOnException + f8 = open("filename") # $ MISSING:notClosedOnException f8.write("Error could occur") finally: - if f8 is None: + if f8 is None: # We don't precisely consider this condition, so this result is MISSING. However, this seems uncommon. f8.close() def not_closed9(): @@ -58,7 +58,7 @@ def not_closed9(): f9 = open("filename") # $ MISSING:notAlwaysClosed f9.write("Error could occur") finally: - if not f9: + if not f9: # We don't precisely consider this condition, so this result is MISSING.However, this seems uncommon. f9.close() def not_closed_but_cant_tell_locally(): @@ -81,7 +81,7 @@ def not_closed11(): f11.write("IOError could occur") f11.write("IOError could occur") f11.close() - except AttributeError: + except AttributeError: # We don't consider the type of exception handled here, so this result is MISSING. f11.close() def doesnt_raise(*args): @@ -121,7 +121,7 @@ def closer2(t3): def closed15(): f15 = opener_func2() # $ SPURIOUS:notClosed - closer2(f15) + closer2(f15) # We don't detect that this call closes the file, so this result is SPURIOUS. def may_not_be_closed16(name): @@ -144,7 +144,7 @@ def not_closed17(): f17.write("IOError could occur") may_raise("ValueError could occur") # FN here. f17.close() - except IOError: + except IOError: # We don't detect that a ValueErrror could be raised that isn't handled here, so this result is MISSING. f17.close() #ODASA-3779 @@ -241,7 +241,7 @@ def not_closed22(path): if foo: f22.close() finally: - if f22.closed: # Wrong sense + if f22.closed: # We don't precisely consider this condition, so this result is MISSING. However, this seems uncommon. f22.close() def not_closed23(path): diff --git a/python/ql/test/query-tests/Resources/FileNotAlwaysClosed/test.ql b/python/ql/test/query-tests/Resources/FileNotAlwaysClosed/test.ql index 3abba197373a..f176172d0782 100644 --- a/python/ql/test/query-tests/Resources/FileNotAlwaysClosed/test.ql +++ b/python/ql/test/query-tests/Resources/FileNotAlwaysClosed/test.ql @@ -7,7 +7,7 @@ module MethodArgTest implements TestSig { predicate hasActualResult(Location location, string element, string tag, string value) { exists(DataFlow::CfgNode el, FileOpen fo | - el = fo.getLocalSource() and + el = fo and element = el.toString() and location = el.getLocation() and value = "" and From 2c74ddb8539701bd8bd94b1da539ea364cf02df8 Mon Sep 17 00:00:00 2001 From: Joe Farebrother Date: Thu, 20 Mar 2025 10:37:58 +0000 Subject: [PATCH 07/13] Add django FileRsponse as a wrapper --- python/ql/src/Resources/FileNotAlwaysClosedQuery.qll | 3 +++ 1 file changed, 3 insertions(+) diff --git a/python/ql/src/Resources/FileNotAlwaysClosedQuery.qll b/python/ql/src/Resources/FileNotAlwaysClosedQuery.qll index 11473edde047..2baaf34ab4fe 100644 --- a/python/ql/src/Resources/FileNotAlwaysClosedQuery.qll +++ b/python/ql/src/Resources/FileNotAlwaysClosedQuery.qll @@ -36,6 +36,9 @@ class FileWrapperCall extends DataFlow::CallCfgNode { or wrapped = this.getArg(0) and this = API::moduleImport("os").getMember("fdopen").getACall() + or + wrapped = this.getArg(0) and + this = API::moduleImport("django").getMember("http").getMember("FileResponse").getACall() } /** Gets the file that this call wraps. */ From 3707f107bf3b196fb03dd226072e17bb2f11eae3 Mon Sep 17 00:00:00 2001 From: Joe Farebrother Date: Thu, 20 Mar 2025 11:26:17 +0000 Subject: [PATCH 08/13] Fix tests + add more tests --- .../Resources/FileNotAlwaysClosedQuery.qll | 47 +++++++++---------- .../FileNotAlwaysClosed/resources_test.py | 33 ++++++++++++- 2 files changed, 55 insertions(+), 25 deletions(-) diff --git a/python/ql/src/Resources/FileNotAlwaysClosedQuery.qll b/python/ql/src/Resources/FileNotAlwaysClosedQuery.qll index 2baaf34ab4fe..48b1763ec84d 100644 --- a/python/ql/src/Resources/FileNotAlwaysClosedQuery.qll +++ b/python/ql/src/Resources/FileNotAlwaysClosedQuery.qll @@ -21,14 +21,17 @@ private DataFlow::TypeTrackingNode fileOpenInstance(DataFlow::TypeTracker t) { exists(DataFlow::TypeTracker t2 | result = fileOpenInstance(t2).track(t2, t)) } -/** A call that returns an instance of an open file object. */ +/** + * A call that returns an instance of an open file object. + * This includes calls to methods that transitively call `open` or similar. + */ class FileOpen extends DataFlow::CallCfgNode { FileOpen() { fileOpenInstance(DataFlow::TypeTracker::end()).flowsTo(this) } } /** A call that may wrap a file object in a wrapper class or `os.fdopen`. */ class FileWrapperCall extends DataFlow::CallCfgNode { - FileOpen wrapped; + DataFlow::Node wrapped; FileWrapperCall() { wrapped = this.getArg(_).getALocalSource() and @@ -42,18 +45,18 @@ class FileWrapperCall extends DataFlow::CallCfgNode { } /** Gets the file that this call wraps. */ - FileOpen getWrapped() { result = wrapped } + DataFlow::Node getWrapped() { result = wrapped } } /** A node where a file is closed. */ abstract class FileClose extends DataFlow::CfgNode { /** Holds if this file close will occur if an exception is thrown at `e`. */ - predicate guardsExceptions(Expr e) { - this.asCfgNode() = - DataFlow::exprNode(e).asCfgNode().getAnExceptionalSuccessor().getASuccessor*() + predicate guardsExceptions(DataFlow::CfgNode raises) { + this.asCfgNode() = raises.asCfgNode().getAnExceptionalSuccessor().getASuccessor*() or - // the expression is after the close call - DataFlow::exprNode(e).asCfgNode() = this.asCfgNode().getASuccessor*() + // The expression is after the close call. + // This also covers the body of a `with` statement. + raises.asCfgNode() = this.asCfgNode().getASuccessor*() } } @@ -72,20 +75,14 @@ class WithStatement extends FileClose { With w; WithStatement() { this.asExpr() = w.getContextExpr() } - - override predicate guardsExceptions(Expr e) { - super.guardsExceptions(e) - or - e = w.getAStmt().getAChildNode*() - } } -/** Holds if an exception may be raised at `node` if it is a file object. */ -private predicate mayRaiseWithFile(DataFlow::CfgNode node) { +/** Holds if an exception may be raised at `raises` if `file` is a file object. */ +private predicate mayRaiseWithFile(DataFlow::CfgNode file, DataFlow::CfgNode raises) { // Currently just consider any method called on `node`; e.g. `file.write()`; as potentially raising an exception - exists(DataFlow::MethodCallNode mc | node = mc.getObject()) and - not node instanceof FileOpen and - not node instanceof FileClose + raises.(DataFlow::MethodCallNode).getObject() = file and + not file instanceof FileOpen and + not file instanceof FileClose } /** Holds if data flows from `nodeFrom` to `nodeTo` in one step that also includes file wrapper classes. */ @@ -131,10 +128,12 @@ predicate fileNotClosed(FileOpen fo) { predicate fileMayNotBeClosedOnException(FileOpen fo, DataFlow::Node raises) { fileIsClosed(fo) and - mayRaiseWithFile(raises) and - fileLocalFlow(fo, raises) and - not exists(FileClose fc | - DataFlow::localFlow(fo, fc) and - fc.guardsExceptions(raises.asExpr()) + exists(DataFlow::CfgNode fileRaised | + mayRaiseWithFile(fileRaised, raises) and + fileLocalFlow(fo, fileRaised) and + not exists(FileClose fc | + fileLocalFlow(fo, fc) and + fc.guardsExceptions(raises) + ) ) } diff --git a/python/ql/test/query-tests/Resources/FileNotAlwaysClosed/resources_test.py b/python/ql/test/query-tests/Resources/FileNotAlwaysClosed/resources_test.py index 1f30d309d6f1..598d54c892c3 100644 --- a/python/ql/test/query-tests/Resources/FileNotAlwaysClosed/resources_test.py +++ b/python/ql/test/query-tests/Resources/FileNotAlwaysClosed/resources_test.py @@ -246,4 +246,35 @@ def not_closed22(path): def not_closed23(path): f23 = open(path, "w") # $ notClosed - wr = FileWrapper(f23) \ No newline at end of file + wr = FileWrapper(f23) + +def closed24(path): + f24 = open(path, "w") + try: + f24.write("hi") + except: + pass + f24.close() + +def closed25(path): + from django.http import FileResponse + return FileResponse(open(path)) + +import os +def closed26(path): + fd = os.open(path) + os.close(fd) + +def not_closed27(path): + fd = os.open(path, "w") # $notClosedOnException + f27 = os.fdopen(fd, "w") + f27.write("hi") + f27.close() + +def closed28(path): + fd = os.open(path, os.O_WRONLY) + f28 = os.fdopen(fd, "w") + try: + f28.write("hi") + finally: + f28.close() \ No newline at end of file From bdbdcf8bd870060f4ead4581a8e55583ad259f57 Mon Sep 17 00:00:00 2001 From: Joe Farebrother Date: Thu, 20 Mar 2025 14:18:45 +0000 Subject: [PATCH 09/13] Clean up charpred of WithStatement + fix a comment --- python/ql/src/Resources/FileNotAlwaysClosedQuery.qll | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/python/ql/src/Resources/FileNotAlwaysClosedQuery.qll b/python/ql/src/Resources/FileNotAlwaysClosedQuery.qll index 48b1763ec84d..2d5bb5c3abc2 100644 --- a/python/ql/src/Resources/FileNotAlwaysClosedQuery.qll +++ b/python/ql/src/Resources/FileNotAlwaysClosedQuery.qll @@ -72,14 +72,12 @@ class OsCloseCall extends FileClose { /** A `with` statement. */ class WithStatement extends FileClose { - With w; - - WithStatement() { this.asExpr() = w.getContextExpr() } + WithStatement() { this.asExpr() = any(With w).getContextExpr() } } /** Holds if an exception may be raised at `raises` if `file` is a file object. */ private predicate mayRaiseWithFile(DataFlow::CfgNode file, DataFlow::CfgNode raises) { - // Currently just consider any method called on `node`; e.g. `file.write()`; as potentially raising an exception + // Currently just consider any method called on `file`; e.g. `file.write()`; as potentially raising an exception raises.(DataFlow::MethodCallNode).getObject() = file and not file instanceof FileOpen and not file instanceof FileClose From a46c157e46071d1d10fb76536ea118744cdd55a4 Mon Sep 17 00:00:00 2001 From: Joe Farebrother Date: Fri, 21 Mar 2025 09:24:54 +0000 Subject: [PATCH 10/13] Add quality tag + tweak description --- python/ql/src/Resources/FileNotAlwaysClosed.ql | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/ql/src/Resources/FileNotAlwaysClosed.ql b/python/ql/src/Resources/FileNotAlwaysClosed.ql index 4bfba62b2138..c3950eda805d 100644 --- a/python/ql/src/Resources/FileNotAlwaysClosed.ql +++ b/python/ql/src/Resources/FileNotAlwaysClosed.ql @@ -1,10 +1,11 @@ /** * @name File is not always closed - * @description Opening a file without ensuring that it is always closed may cause data loss or resource leaks. + * @description Opening a file without ensuring that it is always closed may lead to data loss or resource leaks. * @kind problem * @tags efficiency * correctness * resources + * quality * external/cwe/cwe-772 * @problem.severity warning * @sub-severity high From 0fa70db4c2bc43e7513b4a663f9de86b179d6aba Mon Sep 17 00:00:00 2001 From: Joe Farebrother Date: Tue, 25 Mar 2025 08:55:55 +0000 Subject: [PATCH 11/13] Review suggestions - update comment and introduce manual magic to filelocalflow --- python/ql/src/Resources/FileNotAlwaysClosedQuery.qll | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/ql/src/Resources/FileNotAlwaysClosedQuery.qll b/python/ql/src/Resources/FileNotAlwaysClosedQuery.qll index 2d5bb5c3abc2..ef5a9f201b6a 100644 --- a/python/ql/src/Resources/FileNotAlwaysClosedQuery.qll +++ b/python/ql/src/Resources/FileNotAlwaysClosedQuery.qll @@ -50,7 +50,7 @@ class FileWrapperCall extends DataFlow::CallCfgNode { /** A node where a file is closed. */ abstract class FileClose extends DataFlow::CfgNode { - /** Holds if this file close will occur if an exception is thrown at `e`. */ + /** Holds if this file close will occur if an exception is thrown at `raises`. */ predicate guardsExceptions(DataFlow::CfgNode raises) { this.asCfgNode() = raises.asCfgNode().getAnExceptionalSuccessor().getASuccessor*() or @@ -91,7 +91,7 @@ private predicate fileLocalFlowStep(DataFlow::Node nodeFrom, DataFlow::Node node } /** Holds if data flows from `source` to `sink`, including file wrapper classes. */ -private predicate fileLocalFlow(DataFlow::Node source, DataFlow::Node sink) { +private predicate fileLocalFlow(FileOpen source, DataFlow::Node sink) { fileLocalFlowStep*(source, sink) } From d23c3b8a74c24a410e4deb3999420f55a6c8ad67 Mon Sep 17 00:00:00 2001 From: Joe Farebrother Date: Wed, 26 Mar 2025 09:23:49 +0000 Subject: [PATCH 12/13] Revert manual magic This appeared to cause timeouts on DCA. --- python/ql/src/Resources/FileNotAlwaysClosedQuery.qll | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/ql/src/Resources/FileNotAlwaysClosedQuery.qll b/python/ql/src/Resources/FileNotAlwaysClosedQuery.qll index ef5a9f201b6a..5df0d093a145 100644 --- a/python/ql/src/Resources/FileNotAlwaysClosedQuery.qll +++ b/python/ql/src/Resources/FileNotAlwaysClosedQuery.qll @@ -91,7 +91,7 @@ private predicate fileLocalFlowStep(DataFlow::Node nodeFrom, DataFlow::Node node } /** Holds if data flows from `source` to `sink`, including file wrapper classes. */ -private predicate fileLocalFlow(FileOpen source, DataFlow::Node sink) { +private predicate fileLocalFlow(DataFlow::Node source, DataFlow::Node sink) { fileLocalFlowStep*(source, sink) } From 2fd9b1673677059e78d51c2e9c9c2cb85dbc0489 Mon Sep 17 00:00:00 2001 From: Joe Farebrother Date: Thu, 27 Mar 2025 15:45:38 +0000 Subject: [PATCH 13/13] Attempt performance improvement for fileLocalFlow --- .../Resources/FileNotAlwaysClosedQuery.qll | 23 +++++++++++++++---- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/python/ql/src/Resources/FileNotAlwaysClosedQuery.qll b/python/ql/src/Resources/FileNotAlwaysClosedQuery.qll index 5df0d093a145..fe1d6578e11e 100644 --- a/python/ql/src/Resources/FileNotAlwaysClosedQuery.qll +++ b/python/ql/src/Resources/FileNotAlwaysClosedQuery.qll @@ -84,15 +84,28 @@ private predicate mayRaiseWithFile(DataFlow::CfgNode file, DataFlow::CfgNode rai } /** Holds if data flows from `nodeFrom` to `nodeTo` in one step that also includes file wrapper classes. */ -private predicate fileLocalFlowStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { - DataFlow::localFlowStep(nodeFrom, nodeTo) - or +private predicate fileAdditionalLocalFlowStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { exists(FileWrapperCall fw | nodeFrom = fw.getWrapped() and nodeTo = fw) } +private predicate fileLocalFlowHelper0( + DataFlow::LocalSourceNode nodeFrom, DataFlow::LocalSourceNode nodeTo +) { + exists(DataFlow::Node nodeMid | + nodeFrom.flowsTo(nodeMid) and fileAdditionalLocalFlowStep(nodeMid, nodeTo) + ) +} + +private predicate fileLocalFlowHelper1( + DataFlow::LocalSourceNode nodeFrom, DataFlow::LocalSourceNode nodeTo +) { + fileLocalFlowHelper0*(nodeFrom, nodeTo) +} + /** Holds if data flows from `source` to `sink`, including file wrapper classes. */ -private predicate fileLocalFlow(DataFlow::Node source, DataFlow::Node sink) { - fileLocalFlowStep*(source, sink) +pragma[inline] +private predicate fileLocalFlow(FileOpen source, DataFlow::Node sink) { + exists(DataFlow::LocalSourceNode mid | fileLocalFlowHelper1(source, mid) and mid.flowsTo(sink)) } /** Holds if the file opened at `fo` is closed. */