Add an aggregable tuple class to abstract aggregations

MichaelRFairhurst · MichaelRFairhurst · commit dcc8d2820acb · 2025-06-01T12:40:25.000-07:00
diff --git a/README.md b/README.md
@@ -107,6 +107,41 @@ from Qtil::Product<Person, City>::Product product
 select product.getFirst(), product.getSecond()
 ```
 
+**AggregableTuple**: A class that can aggregate multiple values at a time, which can be useful for
+creating generic APIs involving unknown/configurable aggregation steps.
+
+```ql
+AggregableTuple::Piece getData(Person p) {
+  result = initString(p.getName()).addInt(p.getAge())
+}
+
+int two() { result = 2 }
+
+predicate useSum(AggregableTuple::Sum<two/0>::Sum agg) {
+  exists(int countVal, string nameJoin, int ageSum |
+    countVal = agg.countTotal() and
+    nameJoin = agg.asJoinedString(", ") and
+    ageSum = agg.asSummedInt() and
+    ... // Use the aggregation results in some way
+  )
+}
+```
+
+To aggregate the `AggregableTuple::Piece` values, each should be cast to a string and concatenated
+with a comma separator. The resulting value can be cast to an `AggregableTuple::Sum` type.
+
+```ql
+predicate createAndUseSum() {
+  exists(string agg |
+    agg = concat(string piece | piece = getData(getAPerson()) | piece, ",") and
+    useSum(agg)
+  )
+}
+```
+
+It is very important that every tuple is the same width and type, and that the `Sum` type is given
+the correct width as a parameter, otherwise the aggregation will not work correctly.
+
 ### Lists
 
 **Ordered**: Takes orderable data, and automatically adds `getPrevious()`, `getNext()` predicate members for ease of traversal.
diff --git a/src/qtil/tuple/AggregableTuple.qll b/src/qtil/tuple/AggregableTuple.qll
@@ -0,0 +1,153 @@
+private import qtil.parameterization.SignatureTypes
+private import qtil.parameterization.SignaturePredicates
+private import qtil.tuple.StringTuple as CustomStringTuple
+private import qtil.strings.Chars
+private import qtil.inheritance.Instance
+private import codeql.util.Boolean
+
+class StringTuple = CustomStringTuple::StringTuple<Chars::comma/0>::Tuple;
+
+/**
+ * A module that allows multiple values to be aggregated at the same time, where each value
+ * (including the aggregated value) acts like a tuple.
+ *
+ * The tuple may contain any number of the following types of columns:
+ * - `string` columns, which are concatenated with a separator
+ * - `int` columns, which are summed
+ * 
+ * Additionally, the unique values of each column can be counted, and the total number of unique
+ * aggregated tuples can be counted.
+ * 
+ * This can be useful for writing generic code where a module may wish to perform an unknown number
+ * of aggregations in a context where it cannot perform the aggregation for itself.
+ *
+ * Each value to be aggregated should be of type `AggregableTuple::Piece`, and pieces should be
+ * aggregated with `concat(Piece p | p, ",")`, as the underlying representation is a comma
+ * -separated string (a `StringTuple`).
+ *
+ * After aggregation, the result should be cast to a `AggregableTuple::Sum` to access the
+ * aggregated values of each column.
+ * 
+ * Note: This will not be as performant as individual aggregations, and should only be used in cases
+ * where a single aggregation is not practical.
+ *
+ * Example usage:
+ * ```ql
+ * // What values a "person" may aggregate over defined here:
+ * AggregableTuple::Piece personAggregant(Person p) {
+ *   result = AggregableTuple::initString(p.name)
+ *            .appendInt(p.age)
+ * }
+ * 
+ * // A usage of that aggregation can be defined separately:
+ * predicate useAggregation(AggregableTuple::Sum<two/0>::Sum aggregated) {
+ *   exists(int counted, string names, int totalAge |
+ *     counted = aggregated.getCountTotal() and
+ *     names = aggregated.getAsJoinedString(0, ",") and
+ *     totalAge = aggregated.getAsSummedInt(1) and
+ *     // Use `counted`, `names`, and `totalAge` as needed
+ *   )
+ * }
+ * ```
+ */
+module AggregableTuple {
+
+  /**
+   * Begin the construction of a new piece of an aggregable tuple with a `string` column.
+   *
+   * Sets the first column of this tuple to be the given `string` value. The `Piece`
+   * returned by this predicate can have additional columns appended to it of any type.
+   */
+  bindingset[s]
+  Piece initString(string s) { result = s }
+
+  /**
+   * Begin the construction of a new piece of an aggregable tuple with an `int` column.
+   *
+   * Sets the first column of this tuple to be the given `int` value. The `Piece`
+   * returned by this predicate can have additional columns appended to it of any type.
+   */
+  bindingset[i]
+  Piece initInt(int i) { result = i.toString() }
+
+  /**
+   * A piece of an aggregable tuple, which can be used to aggregate multiple values at the same
+   * time.
+   *
+   * This class can be built up one column at a time, beginning with one of the predicates `asInc`,
+   * `asString`, or `asInt`. Additional columns can be appended to the piece using the `appendInc`,
+   * `appendString`, or `appendInt` predicates.
+   *
+   * After all of the columns have been appended, the piece can be aggregated with
+   * `concat(Piece p | p, ",")`. Then the result can be cast to `AggregableTuple::Sum` to access the
+   * aggregated values of each column.
+   */
+  bindingset[this]
+  class Piece extends InfInstance<StringTuple>::Type {
+    bindingset[this, s]
+    Piece appendString(string s) { result = inst().append(s) }
+
+    bindingset[this, i]
+    Piece appendInt(int i) { result = inst().append(i.toString()) }
+  }
+
+  module Sum<Nullary::Ret<int>::pred/0 columns> {
+    bindingset[this]
+    class Sum extends InfInstance<StringTuple>::Type {
+
+      bindingset[this]
+      int getCountTotal() {
+        result = (inst().size()) / columns()
+      }
+
+      /**
+       * Since the underlying representation is a comma-separated string, the ith value of
+       * the nth column can be found at the index `i * columns() + n`.
+       *
+       * This predicate returns all such indexes for the nth column.
+       */
+      bindingset[this]
+      int getARawColumnValueIndex(int colIdx) {
+        colIdx in [0 .. columns()] and
+        exists(int rowIdx |
+            rowIdx = [0..getCountTotal() - 1]  and
+            result = rowIdx * columns() + colIdx
+        )
+      }
+
+      /**
+       * Get all of the raw string values for the nth column of aggregated tuples.
+       */
+      bindingset[this]
+      string getARawColumn(int colIdx) {
+        colIdx in [0 .. columns()] and
+        result = inst().get(getARawColumnValueIndex(colIdx))
+      }
+      
+      bindingset[this]
+      int countColumn(int colIdx) {
+        colIdx in [0 .. columns()] and
+        result = count(string item | item = getARawColumn(colIdx))
+      }
+
+      /**
+       * Get the nth column of aggregated tuples, treated as strings and joined with the given
+       * separator.
+       */
+      bindingset[this, sep]
+      string getAsJoinedString(int colIdx, string sep) {
+        colIdx in [0 .. columns()] and
+        result = concat(string item | item = getARawColumn(colIdx) | item, sep)
+      }
+
+      /**
+       * Get the nth column of aggregated tuples, treated as integers and summed.
+       */
+      bindingset[this]
+      int getAsSummedInt(int colIdx) {
+        colIdx in [0 .. columns()] and
+        result = sum(int item | item = getARawColumn(colIdx).toInt())
+      }
+    }
+  }
+}
diff --git a/test/qtil/locations/CustomPathProblem/CustomPathStateProblemTest.ql b/test/qtil/locations/CustomPathProblem/CustomPathStateProblemTest.ql
@@ -0,0 +1,99 @@
+/**
+ * @name Custom Path State Problem Example
+ * @description This example demonstrates how to define a custom path problem in C++ using Qtil. It
+ *   identifies paths from top-level variables to constructors that are called during their
+ *   initialization. Additionally, it tracks the depth of the search as a state.
+ * @id qtil-example-custom-path-problem
+ * @severity info
+ * @kind path-problem
+ */
+
+import cpp
+import cpp as cpp
+import qtil.locations.Locatable
+import qtil.locations.CustomPathStateProblem
+import CustomPathStateProblemCpp
+
+/** Defines cpp location behavior; this will be moved to qtil.cpp eventually. */
+module CustomPathStateProblemCpp {
+  module ElementConfig implements LocatableConfig<Location> {
+    class Locatable = cpp::Locatable;
+  }
+
+  import PathStateProblem<Location, ElementConfig>
+}
+
+/**
+ * Defines a custom path problem configuration for identifying paths from top-level variables to
+ * constructors that are called during their initialization.
+ */
+module CallGraphPathProblemConfig implements CustomPathStateProblemConfigSig {
+  /**
+   * Since we are tracking flow from variable initialization to constructor calls, that means the
+   * nodes in our path problem will be variables (roots), function calls (edges), and constructors
+   * (end nodes).
+   */
+  class Node extends Locatable {
+    Node() {
+      this instanceof Function or this.(Variable).isTopLevel() or this instanceof FunctionCall
+    }
+  }
+
+  class State = int; // Track search depth
+
+  /** Start searching from variable nodes */
+  predicate start(Node n, int depth) { n instanceof Variable and depth = 0 }
+
+  /** If we reach a constructor, we have identified "problematic" flow from a variable */
+  bindingset[depth]
+  predicate end(Node n, int depth) {
+    exists(Function f, Class c |
+      n = f and
+      c.getAConstructor() = f
+    )
+  }
+
+  bindingset[depth1]
+  bindingset[depth2]
+  predicate edge(Node a, int depth1, Node b, int depth2) {
+    depth2 = depth1 + 1 and
+    (
+      // Increment depth for each edge traversed
+      // Add an edge from variables to the function calls in that variable's initializer.
+      exists(Variable var, Expr initializer, FunctionCall fc |
+        var.getInitializer().getExpr() = initializer and
+        fc.getParent*() = initializer and
+        a = var and
+        b = fc
+      )
+      or
+      // Supposing we have reached a function call to some function `mid()`, then the next step in
+      // the path problem will be one of the function calls in `mid()`.
+      exists(FunctionCall fc, Function mid, FunctionCall next |
+        mid = fc.getTarget() and
+        next.getEnclosingFunction() = mid and
+        a = fc and
+        b = next
+      )
+      or
+      // Add an edge from function calls to constructors, which are the end nodes.
+      exists(FunctionCall fc, Function endFunc |
+        fc.getTarget() = endFunc and
+        end(endFunc, 0) and
+        a = fc and
+        b = endFunc
+      )
+    )
+  }
+}
+
+// Import the custom path problem configuration and define the problem.
+//
+// This automaticall generates the `nodes` and `edges` predicates based on the configuration that
+// make the path traceable for users.
+import CustomPathStateProblem<CallGraphPathProblemConfig>
+
+from Variable var, Function ctor, int depth
+where problem(var, _, ctor, depth) // This finds for paths from variables to constructors
+select var, var, ctor, "Initialization of variable $@ calls constructor $@ at depth " + depth, var, var.getName(),
+  ctor, ctor.getName()
diff --git a/test/qtil/tuple/AggregableTupleTest.ql b/test/qtil/tuple/AggregableTupleTest.ql