Skip to content

Commit c0f34de

Browse files
authored
More fixes for OSS data flow (#1083)
1 parent b47d4ce commit c0f34de

11 files changed

Lines changed: 104 additions & 50 deletions

File tree

dataflowengineoss/src/main/scala/io/shiftleft/dataflowengineoss/language/nodemethods/ExpressionMethods.scala

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,12 @@ class ExpressionMethods[NodeType <: nodes.Expression](val node: NodeType) extend
2222
* */
2323
def isDefined(implicit semantics: Semantics): Boolean = {
2424
val s = semanticsForCallByArg
25-
s.isEmpty || s.exists(_.mappings.exists { case (_, dstIndex) => dstIndex == node.order })
25+
s.isEmpty || s.exists { semantic =>
26+
semantic.mappings.exists {
27+
case (_, dstIndex) =>
28+
dstIndex == node.order
29+
}
30+
}
2631
}
2732

2833
/**

dataflowengineoss/src/main/scala/io/shiftleft/dataflowengineoss/passes/reachingdef/ReachingDefPass.scala

Lines changed: 17 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,7 @@ import io.shiftleft.codepropertygraph.Cpg
44
import io.shiftleft.codepropertygraph.generated.{nodes, _}
55
import io.shiftleft.passes.{DiffGraph, ParallelCpgPass}
66
import io.shiftleft.semanticcpg.language._
7-
8-
import scala.collection.mutable
7+
import overflowdb.traversal.Traversal
98

109
/**
1110
* A pass that calculates reaching definitions ("data dependencies").
@@ -29,14 +28,12 @@ class ReachingDefPass(cpg: Cpg) extends ParallelCpgPass[nodes.Method](cpg) {
2928
private def addReachingDefEdges(method: nodes.Method, solution: Solution[Set[Definition]]): DiffGraph.Builder = {
3029

3130
val dstGraph = DiffGraph.newBuilder
32-
val nodesWithOutgoingEdges: mutable.Set[nodes.StoredNode] = mutable.Set()
3331

3432
def addEdge(fromNode: nodes.StoredNode, toNode: nodes.StoredNode, variable: String = ""): Unit = {
3533
val properties = List((EdgeKeyNames.VARIABLE, variable))
3634
if (fromNode.isInstanceOf[nodes.Unknown] || toNode
3735
.isInstanceOf[nodes.Unknown])
3836
return
39-
nodesWithOutgoingEdges += fromNode
4037
dstGraph.addEdgeInOriginal(fromNode, toNode, EdgeTypes.REACHING_DEF, properties)
4138
}
4239

@@ -61,9 +58,9 @@ class ReachingDefPass(cpg: Cpg) extends ParallelCpgPass[nodes.Method](cpg) {
6158
// For all calls, assume that input arguments
6259
// taint corresponding output arguments
6360
// and the return value
64-
usageAnalyzer.uses(call).foreach { use =>
65-
gen(call).foreach { g =>
66-
if (use != g.node) {
61+
usageAnalyzer.uses(node).foreach { use =>
62+
gen(node).foreach { g =>
63+
if (use != g.node && nodeMayBeSource(use)) {
6764
addEdge(use, g.node, nodeToEdgeLabel(use))
6865
}
6966
}
@@ -82,39 +79,33 @@ class ReachingDefPass(cpg: Cpg) extends ParallelCpgPass[nodes.Method](cpg) {
8279
}
8380
addEdge(ret, method.methodReturn, "<RET>")
8481

82+
case exitNode: nodes.MethodReturn =>
83+
in(exitNode).foreach { i =>
84+
addEdge(i.node, exitNode, nodeToEdgeLabel(i.node))
85+
}
8586
case _ =>
8687
}
8788
}
8889

8990
// Add edges from the entry node
9091
allNodes
91-
.filterNot(
92-
x =>
93-
x.isInstanceOf[nodes.Method] || x
94-
.isInstanceOf[nodes.ControlStructure] || x.isInstanceOf[nodes.FieldIdentifier] || x
95-
.isInstanceOf[nodes.JumpTarget] || x.isInstanceOf[nodes.MethodReturn] || x.isInstanceOf[nodes.Block])
92+
.filter(nodeMayBeSource)
9693
.foreach { node =>
9794
if (usageAnalyzer.usedIncomingDefs(node).isEmpty) {
9895
addEdge(method, node)
9996
}
10097
}
101-
102-
// Add edges to exit node
103-
val exitNode = method.methodReturn
104-
(allNodes.toSet -- nodesWithOutgoingEdges).toList
105-
.filter(_ != exitNode)
106-
.filter(_ != method)
107-
.filterNot(
108-
x =>
109-
x.isInstanceOf[nodes.Method] || x
110-
.isInstanceOf[nodes.ControlStructure] || x.isInstanceOf[nodes.FieldIdentifier] || x
111-
.isInstanceOf[nodes.JumpTarget] || x.isInstanceOf[nodes.MethodReturn])
112-
.foreach { from =>
113-
addEdge(from, exitNode)
114-
}
11598
dstGraph
11699
}
117100

101+
private def nodeMayBeSource(x: nodes.StoredNode): Boolean = {
102+
!(
103+
x.isInstanceOf[nodes.Method] || x
104+
.isInstanceOf[nodes.ControlStructure] || x.isInstanceOf[nodes.FieldIdentifier] || x
105+
.isInstanceOf[nodes.JumpTarget] || x.isInstanceOf[nodes.MethodReturn] || x.isInstanceOf[nodes.Block]
106+
)
107+
}
108+
118109
private def nodeToEdgeLabel(node: nodes.StoredNode): String = {
119110
node match {
120111
case n: nodes.CfgNode => n.code

dataflowengineoss/src/main/scala/io/shiftleft/dataflowengineoss/passes/reachingdef/ReachingDefProblem.scala

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ object ReachingDefProblem {
2727
val init = new ReachingDefInit(transfer.gen)
2828
def meet: (Set[Definition], Set[Definition]) => Set[Definition] =
2929
(x: Set[Definition], y: Set[Definition]) => { x.union(y) }
30+
3031
new DataFlowProblem[Set[Definition]](flowGraph, transfer, meet, init, true, Set[Definition]())
3132
}
3233

dataflowengineoss/src/main/scala/io/shiftleft/dataflowengineoss/passes/reachingdef/UsageAnalyzer.scala

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,6 @@ class UsageAnalyzer(in: Map[nodes.StoredNode, Set[Definition]]) {
4040

4141
def uses(node: nodes.StoredNode): Set[nodes.StoredNode] = {
4242
val n = node match {
43-
// case methodRet: nodes.MethodReturn => methodRet.start.method.cfgNode.toSet
4443
case ret: nodes.Return =>
4544
ret.astChildren.toSet
4645
case call: nodes.Call =>

dataflowengineoss/src/main/scala/io/shiftleft/dataflowengineoss/queryengine/Engine.scala

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
package io.shiftleft.dataflowengineoss.queryengine
22

3-
import java.util.concurrent.{Callable, ExecutorCompletionService, ExecutorService, Executors}
3+
import io.shiftleft.codepropertygraph.generated.nodes.Call
44

5+
import java.util.concurrent.{Callable, ExecutorCompletionService, ExecutorService, Executors}
56
import io.shiftleft.codepropertygraph.generated.{EdgeKeys, EdgeTypes, nodes}
67
import io.shiftleft.dataflowengineoss.semanticsloader.{FlowSemantic, Semantics}
78
import io.shiftleft.semanticcpg.language._
@@ -132,14 +133,22 @@ object Engine {
132133
implicit semantics: Semantics): Vector[PathElement] = {
133134
curNode match {
134135
case argument: nodes.Expression =>
135-
val (arguments, nonArguments) = ddgInE(curNode, path).partition(_.outNode().isInstanceOf[nodes.Expression])
136+
val (arguments, nonArguments) = ddgInE(curNode, path)
137+
.filter { edge =>
138+
!isCallRetvalThatShouldNotPropagate(edge.outNode().asInstanceOf[nodes.StoredNode])
139+
}
140+
.partition(_.outNode().isInstanceOf[nodes.Expression])
136141
val elemsForArguments = arguments.flatMap { e =>
137142
elemForArgument(e, argument)
138143
}
139144
val elems = elemsForArguments ++ nonArguments.map(edgeToPathElement)
140145
elems
141146
case _ =>
142-
ddgInE(curNode, path).map(edgeToPathElement)
147+
ddgInE(curNode, path)
148+
.filter { edge =>
149+
!isCallRetvalThatShouldNotPropagate(edge.outNode().asInstanceOf[nodes.StoredNode])
150+
}
151+
.map(edgeToPathElement)
143152
}
144153
}
145154

@@ -158,6 +167,16 @@ object Engine {
158167
.toVector
159168
}
160169

170+
def isCallRetvalThatShouldNotPropagate(parentNode: nodes.StoredNode)(implicit semantics: Semantics): Boolean = {
171+
parentNode match {
172+
case call: Call =>
173+
val sem = semantics.forMethod(call.methodFullName)
174+
(sem.isDefined && !(sem.get.mappings.map(_._2).contains(-1)))
175+
case _ =>
176+
false
177+
}
178+
}
179+
161180
/**
162181
* For a given `(parentNode, curNode)` pair, determine whether to expand into
163182
* `parentNode`. If so, return a corresponding path element or None if

dataflowengineoss/src/test/resources/default.semantics

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,3 +30,4 @@
3030
"<operator>.addition" 1->-1 2->-1
3131
"<operator>.conditional" 2->-1 3->-1
3232
"woo" 1->-1
33+
"free" 1->1

fuzzyc2cpg-tests/src/test/scala/io/shiftleft/fuzzyc2cpg/dotgenerator/DotDdgGeneratorTests.scala

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,11 @@ class DotDdgGeneratorTests extends DataFlowCodeToCpgSuite {
1919
|""".stripMargin
2020

2121
"A DdgDotGenerator" should {
22-
"create a dot graph with 30 edges" in {
22+
"create a dot graph with 31 edges" in {
2323
implicit val s = semantics
2424
val lines = cpg.method.name("foo").dotDdg.l.head.split("\n")
2525
lines.head.startsWith("digraph foo") shouldBe true
26-
lines.count(x => x.contains("->")) shouldBe 30
26+
lines.count(x => x.contains("->")) shouldBe 31
2727
lines.last.startsWith("}") shouldBe true
2828
}
2929
}
@@ -42,7 +42,7 @@ class DotDdgGeneratorTests2 extends DataFlowCodeToCpgSuite {
4242
"create correct dot graph" in {
4343
implicit val s = semantics
4444
val lines = cpg.method.name("foo").dotDdg.l.head.split("\n")
45-
lines.count(x => x.contains("->") && x.contains("\"x\"")) shouldBe 2
45+
lines.count(x => x.contains("->") && x.contains("\"x\"")) shouldBe 3
4646
}
4747

4848
}

fuzzyc2cpg-tests/src/test/scala/io/shiftleft/fuzzyc2cpg/querying/CDataFlowTests.scala

Lines changed: 44 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -26,14 +26,10 @@ class CDataFlowTests1 extends DataFlowCodeToCpgSuite {
2626

2727
"Test 1: flow from function call read to multiple versions of the same variable" in {
2828

29-
def source = cpg.identifier.name("sz")
30-
31-
def sink = cpg.call.name("read")
32-
29+
val source = cpg.identifier.name("sz").l
30+
val sink = cpg.call.name("read").l
3331
def flows = sink.reachableByFlows(source)
3432

35-
flows.map(flowToResultPairs).toSet.size shouldBe 6
36-
3733
flows.map(flowToResultPairs).toSet shouldBe
3834
Set(
3935
List[(String, Option[Integer])](
@@ -872,7 +868,7 @@ class CDataFlowTests27 extends DataFlowCodeToCpgSuite {
872868
|""".stripMargin
873869

874870
"Test 27: find flows of last statements to METHOD_RETURN" in {
875-
val source = cpg.call("free")
871+
val source = cpg.call("free").argument(1)
876872
val sink = cpg.method("foo").methodReturn
877873
implicit val s: Semantics = semantics
878874
val flows = sink.reachableByFlows(source).l
@@ -883,3 +879,44 @@ class CDataFlowTests27 extends DataFlowCodeToCpgSuite {
883879
)
884880
}
885881
}
882+
883+
class CDataFlowTests28 extends DataFlowCodeToCpgSuite {
884+
override val code =
885+
"""
886+
|int foo() {
887+
| int y = 1;
888+
| y = something_else;
889+
| y = 10;
890+
|}
891+
|
892+
|""".stripMargin
893+
894+
"Test 28: find that there is no flow from `y = 1` to exit node" in {
895+
val source = cpg.literal("1").l
896+
val sink = cpg.method("foo").methodReturn
897+
898+
val flows = sink.reachableByFlows(source).l
899+
flows.size shouldBe 0
900+
}
901+
}
902+
903+
class CDataFlowTests29 extends DataFlowCodeToCpgSuite {
904+
override val code =
905+
"""
906+
|int foo() {
907+
| char * y = malloc(10);
908+
| free(y);
909+
| y = 10;
910+
|}
911+
|
912+
|""".stripMargin
913+
914+
"Test 29: find that there is no flow from free(y) to exit node" in {
915+
val source = cpg.call("free").argument(1).l
916+
val sink = cpg.method("foo").methodReturn.l
917+
918+
implicit val s: Semantics = semantics
919+
val flows = sink.reachableByFlows(source).l
920+
flows.size shouldBe 0
921+
}
922+
}

fuzzyc2cpg-tests/src/test/scala/io/shiftleft/fuzzyc2cpg/querying/DataFlowTests.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ class DataFlowTests extends DataFlowCodeToCpgSuite {
5050
"should find flows from identifiers to return values of `flow`" in {
5151
val source = cpg.identifier
5252
val sink = cpg.method.name("flow").methodReturn
53-
sink.reachableByFlows(source).l.map(flowToResultPairs).distinct.size shouldBe 7
53+
sink.reachableByFlows(source).l.map(flowToResultPairs).distinct.size shouldBe 8
5454
}
5555

5656
"find flows from z to method returns of flow" in {

fuzzyc2cpg-tests/src/test/scala/io/shiftleft/fuzzyc2cpg/querying/FreeListDataFlowTests.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ class FreeListDataFlowTests extends DataFlowCodeToCpgSuite {
5050
"should find flows from identifiers to return values of `flow`" in {
5151
val source = cpg.identifier
5252
val sink = cpg.method.name("flow").methodReturn
53-
sink.reachableByFlows(source).l.map(flowToResultPairs).distinct.toSet.size shouldBe 7
53+
sink.reachableByFlows(source).l.map(flowToResultPairs).distinct.toSet.size shouldBe 8
5454
}
5555

5656
"find flows from z to method returns of flow" in {

0 commit comments

Comments
 (0)