Skip to content

Commit dcc8d28

Browse files
Add an aggregable tuple class to abstract aggregations
1 parent 5003e3c commit dcc8d28

4 files changed

Lines changed: 442 additions & 0 deletions

File tree

README.md

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,41 @@ from Qtil::Product<Person, City>::Product product
107107
select product.getFirst(), product.getSecond()
108108
```
109109

110+
**AggregableTuple**: A class that can aggregate multiple values at a time, which can be useful for
111+
creating generic APIs involving unknown/configurable aggregation steps.
112+
113+
```ql
114+
AggregableTuple::Piece getData(Person p) {
115+
result = initString(p.getName()).addInt(p.getAge())
116+
}
117+
118+
int two() { result = 2 }
119+
120+
predicate useSum(AggregableTuple::Sum<two/0>::Sum agg) {
121+
exists(int countVal, string nameJoin, int ageSum |
122+
countVal = agg.countTotal() and
123+
nameJoin = agg.asJoinedString(", ") and
124+
ageSum = agg.asSummedInt() and
125+
... // Use the aggregation results in some way
126+
)
127+
}
128+
```
129+
130+
To aggregate the `AggregableTuple::Piece` values, each should be cast to a string and concatenated
131+
with a comma separator. The resulting value can be cast to an `AggregableTuple::Sum` type.
132+
133+
```ql
134+
predicate createAndUseSum() {
135+
exists(string agg |
136+
agg = concat(string piece | piece = getData(getAPerson()) | piece, ",") and
137+
useSum(agg)
138+
)
139+
}
140+
```
141+
142+
It is very important that every tuple is the same width and type, and that the `Sum` type is given
143+
the correct width as a parameter, otherwise the aggregation will not work correctly.
144+
110145
### Lists
111146

112147
**Ordered**: Takes orderable data, and automatically adds `getPrevious()`, `getNext()` predicate members for ease of traversal.

src/qtil/tuple/AggregableTuple.qll

Lines changed: 153 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,153 @@
1+
private import qtil.parameterization.SignatureTypes
2+
private import qtil.parameterization.SignaturePredicates
3+
private import qtil.tuple.StringTuple as CustomStringTuple
4+
private import qtil.strings.Chars
5+
private import qtil.inheritance.Instance
6+
private import codeql.util.Boolean
7+
8+
class StringTuple = CustomStringTuple::StringTuple<Chars::comma/0>::Tuple;
9+
10+
/**
11+
* A module that allows multiple values to be aggregated at the same time, where each value
12+
* (including the aggregated value) acts like a tuple.
13+
*
14+
* The tuple may contain any number of the following types of columns:
15+
* - `string` columns, which are concatenated with a separator
16+
* - `int` columns, which are summed
17+
*
18+
* Additionally, the unique values of each column can be counted, and the total number of unique
19+
* aggregated tuples can be counted.
20+
*
21+
* This can be useful for writing generic code where a module may wish to perform an unknown number
22+
* of aggregations in a context where it cannot perform the aggregation for itself.
23+
*
24+
* Each value to be aggregated should be of type `AggregableTuple::Piece`, and pieces should be
25+
* aggregated with `concat(Piece p | p, ",")`, as the underlying representation is a comma
26+
* -separated string (a `StringTuple`).
27+
*
28+
* After aggregation, the result should be cast to a `AggregableTuple::Sum` to access the
29+
* aggregated values of each column.
30+
*
31+
* Note: This will not be as performant as individual aggregations, and should only be used in cases
32+
* where a single aggregation is not practical.
33+
*
34+
* Example usage:
35+
* ```ql
36+
* // What values a "person" may aggregate over defined here:
37+
* AggregableTuple::Piece personAggregant(Person p) {
38+
* result = AggregableTuple::initString(p.name)
39+
* .appendInt(p.age)
40+
* }
41+
*
42+
* // A usage of that aggregation can be defined separately:
43+
* predicate useAggregation(AggregableTuple::Sum<two/0>::Sum aggregated) {
44+
* exists(int counted, string names, int totalAge |
45+
* counted = aggregated.getCountTotal() and
46+
* names = aggregated.getAsJoinedString(0, ",") and
47+
* totalAge = aggregated.getAsSummedInt(1) and
48+
* // Use `counted`, `names`, and `totalAge` as needed
49+
* )
50+
* }
51+
* ```
52+
*/
53+
module AggregableTuple {
54+
55+
/**
56+
* Begin the construction of a new piece of an aggregable tuple with a `string` column.
57+
*
58+
* Sets the first column of this tuple to be the given `string` value. The `Piece`
59+
* returned by this predicate can have additional columns appended to it of any type.
60+
*/
61+
bindingset[s]
62+
Piece initString(string s) { result = s }
63+
64+
/**
65+
* Begin the construction of a new piece of an aggregable tuple with an `int` column.
66+
*
67+
* Sets the first column of this tuple to be the given `int` value. The `Piece`
68+
* returned by this predicate can have additional columns appended to it of any type.
69+
*/
70+
bindingset[i]
71+
Piece initInt(int i) { result = i.toString() }
72+
73+
/**
74+
* A piece of an aggregable tuple, which can be used to aggregate multiple values at the same
75+
* time.
76+
*
77+
* This class can be built up one column at a time, beginning with one of the predicates `asInc`,
78+
* `asString`, or `asInt`. Additional columns can be appended to the piece using the `appendInc`,
79+
* `appendString`, or `appendInt` predicates.
80+
*
81+
* After all of the columns have been appended, the piece can be aggregated with
82+
* `concat(Piece p | p, ",")`. Then the result can be cast to `AggregableTuple::Sum` to access the
83+
* aggregated values of each column.
84+
*/
85+
bindingset[this]
86+
class Piece extends InfInstance<StringTuple>::Type {
87+
bindingset[this, s]
88+
Piece appendString(string s) { result = inst().append(s) }
89+
90+
bindingset[this, i]
91+
Piece appendInt(int i) { result = inst().append(i.toString()) }
92+
}
93+
94+
module Sum<Nullary::Ret<int>::pred/0 columns> {
95+
bindingset[this]
96+
class Sum extends InfInstance<StringTuple>::Type {
97+
98+
bindingset[this]
99+
int getCountTotal() {
100+
result = (inst().size()) / columns()
101+
}
102+
103+
/**
104+
* Since the underlying representation is a comma-separated string, the ith value of
105+
* the nth column can be found at the index `i * columns() + n`.
106+
*
107+
* This predicate returns all such indexes for the nth column.
108+
*/
109+
bindingset[this]
110+
int getARawColumnValueIndex(int colIdx) {
111+
colIdx in [0 .. columns()] and
112+
exists(int rowIdx |
113+
rowIdx = [0..getCountTotal() - 1] and
114+
result = rowIdx * columns() + colIdx
115+
)
116+
}
117+
118+
/**
119+
* Get all of the raw string values for the nth column of aggregated tuples.
120+
*/
121+
bindingset[this]
122+
string getARawColumn(int colIdx) {
123+
colIdx in [0 .. columns()] and
124+
result = inst().get(getARawColumnValueIndex(colIdx))
125+
}
126+
127+
bindingset[this]
128+
int countColumn(int colIdx) {
129+
colIdx in [0 .. columns()] and
130+
result = count(string item | item = getARawColumn(colIdx))
131+
}
132+
133+
/**
134+
* Get the nth column of aggregated tuples, treated as strings and joined with the given
135+
* separator.
136+
*/
137+
bindingset[this, sep]
138+
string getAsJoinedString(int colIdx, string sep) {
139+
colIdx in [0 .. columns()] and
140+
result = concat(string item | item = getARawColumn(colIdx) | item, sep)
141+
}
142+
143+
/**
144+
* Get the nth column of aggregated tuples, treated as integers and summed.
145+
*/
146+
bindingset[this]
147+
int getAsSummedInt(int colIdx) {
148+
colIdx in [0 .. columns()] and
149+
result = sum(int item | item = getARawColumn(colIdx).toInt())
150+
}
151+
}
152+
}
153+
}
Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
/**
2+
* @name Custom Path State Problem Example
3+
* @description This example demonstrates how to define a custom path problem in C++ using Qtil. It
4+
* identifies paths from top-level variables to constructors that are called during their
5+
* initialization. Additionally, it tracks the depth of the search as a state.
6+
* @id qtil-example-custom-path-problem
7+
* @severity info
8+
* @kind path-problem
9+
*/
10+
11+
import cpp
12+
import cpp as cpp
13+
import qtil.locations.Locatable
14+
import qtil.locations.CustomPathStateProblem
15+
import CustomPathStateProblemCpp
16+
17+
/** Defines cpp location behavior; this will be moved to qtil.cpp eventually. */
18+
module CustomPathStateProblemCpp {
19+
module ElementConfig implements LocatableConfig<Location> {
20+
class Locatable = cpp::Locatable;
21+
}
22+
23+
import PathStateProblem<Location, ElementConfig>
24+
}
25+
26+
/**
27+
* Defines a custom path problem configuration for identifying paths from top-level variables to
28+
* constructors that are called during their initialization.
29+
*/
30+
module CallGraphPathProblemConfig implements CustomPathStateProblemConfigSig {
31+
/**
32+
* Since we are tracking flow from variable initialization to constructor calls, that means the
33+
* nodes in our path problem will be variables (roots), function calls (edges), and constructors
34+
* (end nodes).
35+
*/
36+
class Node extends Locatable {
37+
Node() {
38+
this instanceof Function or this.(Variable).isTopLevel() or this instanceof FunctionCall
39+
}
40+
}
41+
42+
class State = int; // Track search depth
43+
44+
/** Start searching from variable nodes */
45+
predicate start(Node n, int depth) { n instanceof Variable and depth = 0 }
46+
47+
/** If we reach a constructor, we have identified "problematic" flow from a variable */
48+
bindingset[depth]
49+
predicate end(Node n, int depth) {
50+
exists(Function f, Class c |
51+
n = f and
52+
c.getAConstructor() = f
53+
)
54+
}
55+
56+
bindingset[depth1]
57+
bindingset[depth2]
58+
predicate edge(Node a, int depth1, Node b, int depth2) {
59+
depth2 = depth1 + 1 and
60+
(
61+
// Increment depth for each edge traversed
62+
// Add an edge from variables to the function calls in that variable's initializer.
63+
exists(Variable var, Expr initializer, FunctionCall fc |
64+
var.getInitializer().getExpr() = initializer and
65+
fc.getParent*() = initializer and
66+
a = var and
67+
b = fc
68+
)
69+
or
70+
// Supposing we have reached a function call to some function `mid()`, then the next step in
71+
// the path problem will be one of the function calls in `mid()`.
72+
exists(FunctionCall fc, Function mid, FunctionCall next |
73+
mid = fc.getTarget() and
74+
next.getEnclosingFunction() = mid and
75+
a = fc and
76+
b = next
77+
)
78+
or
79+
// Add an edge from function calls to constructors, which are the end nodes.
80+
exists(FunctionCall fc, Function endFunc |
81+
fc.getTarget() = endFunc and
82+
end(endFunc, 0) and
83+
a = fc and
84+
b = endFunc
85+
)
86+
)
87+
}
88+
}
89+
90+
// Import the custom path problem configuration and define the problem.
91+
//
92+
// This automaticall generates the `nodes` and `edges` predicates based on the configuration that
93+
// make the path traceable for users.
94+
import CustomPathStateProblem<CallGraphPathProblemConfig>
95+
96+
from Variable var, Function ctor, int depth
97+
where problem(var, _, ctor, depth) // This finds for paths from variables to constructors
98+
select var, var, ctor, "Initialization of variable $@ calls constructor $@ at depth " + depth, var, var.getName(),
99+
ctor, ctor.getName()

0 commit comments

Comments
 (0)