Skip to content

Commit d69bcca

Browse files
Copilotowen-mc
andauthored
Remove CSV model infrastructure from ExternalFlow.qll
Remove SourceModelCsv, SinkModelCsv, SummaryModelCsv classes, single-argument CSV predicates, CSV parsing in MadInput, and CSV-specific validation checks. Simplify MadInput to only contain the namespace separator. Convert test models to .ext.yml format. Agent-Logs-Url: https://github.com/github/codeql/sessions/89ff81fe-5585-446d-99e2-6fe6966495c5 Co-authored-by: owen-mc <62447351+owen-mc@users.noreply.github.com>
1 parent 64a52ba commit d69bcca

File tree

4 files changed

+94
-256
lines changed

4 files changed

+94
-256
lines changed
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
---
2-
category: minorAnalysis
2+
category: breaking
33
---
4-
* ZeroMQ and `getc`-family models have been migrated from inline CSV specifications in QL files to `.model.yml` data extension files in the `ext/` directory.
4+
* ZeroMQ and `getc`-family models have been migrated from inline CSV specifications in QL files to `.model.yml` data extension files in the `ext/` directory. The `SourceModelCsv`, `SinkModelCsv`, and `SummaryModelCsv` classes and the associated CSV parsing infrastructure have been removed from `ExternalFlow.qll`. New models should be added as `.model.yml` files in the `ext/` directory.

cpp/ql/lib/semmle/code/cpp/dataflow/ExternalFlow.qll

Lines changed: 8 additions & 150 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
11
/**
22
* INTERNAL use only. This is an experimental API subject to change without notice.
33
*
4-
* Provides classes and predicates for dealing with flow models specified in CSV format.
4+
* Provides classes and predicates for dealing with flow models specified
5+
* in data extension files.
56
*
6-
* The CSV specification has the following columns:
7+
* The specification has the following columns:
78
* - Sources:
89
* `namespace; type; subtypes; name; signature; ext; output; kind`
910
* - Sinks:
@@ -104,117 +105,9 @@ private import internal.FlowSummaryImpl::Private
104105
private import internal.FlowSummaryImpl::Private::External
105106
private import internal.ExternalFlowExtensions::Extensions as Extensions
106107
private import codeql.mad.ModelValidation as SharedModelVal
107-
private import codeql.util.Unit
108108
private import codeql.mad.static.ModelsAsData as SharedMaD
109109

110-
/**
111-
* A unit class for adding additional source model rows.
112-
*
113-
* Extend this class to add additional source definitions.
114-
*/
115-
class SourceModelCsv extends Unit {
116-
/** Holds if `row` specifies a source definition. */
117-
abstract predicate row(string row);
118-
}
119-
120-
/**
121-
* A unit class for adding additional sink model rows.
122-
*
123-
* Extend this class to add additional sink definitions.
124-
*/
125-
class SinkModelCsv extends Unit {
126-
/** Holds if `row` specifies a sink definition. */
127-
abstract predicate row(string row);
128-
}
129-
130-
/**
131-
* A unit class for adding additional summary model rows.
132-
*
133-
* Extend this class to add additional flow summary definitions.
134-
*/
135-
class SummaryModelCsv extends Unit {
136-
/** Holds if `row` specifies a summary definition. */
137-
abstract predicate row(string row);
138-
}
139-
140-
/** Holds if `row` is a source model. */
141-
predicate sourceModel(string row) { any(SourceModelCsv s).row(row) }
142-
143-
/** Holds if `row` is a sink model. */
144-
predicate sinkModel(string row) { any(SinkModelCsv s).row(row) }
145-
146-
/** Holds if `row` is a summary model. */
147-
predicate summaryModel(string row) { any(SummaryModelCsv s).row(row) }
148-
149110
private module MadInput implements SharedMaD::InputSig {
150-
/** Holds if a source model exists for the given parameters. */
151-
predicate additionalSourceModel(
152-
string namespace, string type, boolean subtypes, string name, string signature, string ext,
153-
string output, string kind, string provenance, string model
154-
) {
155-
exists(string row |
156-
sourceModel(row) and
157-
row.splitAt(";", 0) = namespace and
158-
row.splitAt(";", 1) = type and
159-
row.splitAt(";", 2) = subtypes.toString() and
160-
subtypes = [true, false] and
161-
row.splitAt(";", 3) = name and
162-
row.splitAt(";", 4) = signature and
163-
row.splitAt(";", 5) = ext and
164-
row.splitAt(";", 6) = output and
165-
row.splitAt(";", 7) = kind
166-
) and
167-
provenance = "manual" and
168-
model = ""
169-
}
170-
171-
/** Holds if a sink model exists for the given parameters. */
172-
predicate additionalSinkModel(
173-
string namespace, string type, boolean subtypes, string name, string signature, string ext,
174-
string input, string kind, string provenance, string model
175-
) {
176-
exists(string row |
177-
sinkModel(row) and
178-
row.splitAt(";", 0) = namespace and
179-
row.splitAt(";", 1) = type and
180-
row.splitAt(";", 2) = subtypes.toString() and
181-
subtypes = [true, false] and
182-
row.splitAt(";", 3) = name and
183-
row.splitAt(";", 4) = signature and
184-
row.splitAt(";", 5) = ext and
185-
row.splitAt(";", 6) = input and
186-
row.splitAt(";", 7) = kind
187-
) and
188-
provenance = "manual" and
189-
model = ""
190-
}
191-
192-
/**
193-
* Holds if a summary model exists for the given parameters.
194-
*
195-
* This predicate does not expand `@` to `*`s.
196-
*/
197-
predicate additionalSummaryModel(
198-
string namespace, string type, boolean subtypes, string name, string signature, string ext,
199-
string input, string output, string kind, string provenance, string model
200-
) {
201-
exists(string row |
202-
summaryModel(row) and
203-
row.splitAt(";", 0) = namespace and
204-
row.splitAt(";", 1) = type and
205-
row.splitAt(";", 2) = subtypes.toString() and
206-
subtypes = [true, false] and
207-
row.splitAt(";", 3) = name and
208-
row.splitAt(";", 4) = signature and
209-
row.splitAt(";", 5) = ext and
210-
row.splitAt(";", 6) = input and
211-
row.splitAt(";", 7) = output and
212-
row.splitAt(";", 8) = kind
213-
) and
214-
provenance = "manual" and
215-
model = ""
216-
}
217-
218111
string namespaceSegmentSeparator() { result = "::" }
219112
}
220113

@@ -250,7 +143,7 @@ predicate summaryModel(
250143
)
251144
}
252145

253-
/** Provides a query predicate to check the CSV data for validation errors. */
146+
/** Provides a query predicate to check the data for validation errors. */
254147
module CsvValidation {
255148
private string getInvalidModelInput() {
256149
exists(string pred, AccessPath input, string part |
@@ -294,40 +187,6 @@ module CsvValidation {
294187

295188
private module KindVal = SharedModelVal::KindValidation<KindValConfig>;
296189

297-
private string getInvalidModelSubtype() {
298-
exists(string pred, string row |
299-
sourceModel(row) and pred = "source"
300-
or
301-
sinkModel(row) and pred = "sink"
302-
or
303-
summaryModel(row) and pred = "summary"
304-
|
305-
exists(string b |
306-
b = row.splitAt(";", 2) and
307-
not b = ["true", "false"] and
308-
result = "Invalid boolean \"" + b + "\" in " + pred + " model."
309-
)
310-
)
311-
}
312-
313-
private string getInvalidModelColumnCount() {
314-
exists(string pred, string row, int expect |
315-
sourceModel(row) and expect = 8 and pred = "source"
316-
or
317-
sinkModel(row) and expect = 8 and pred = "sink"
318-
or
319-
summaryModel(row) and expect = 9 and pred = "summary"
320-
|
321-
exists(int cols |
322-
cols = 1 + max(int n | exists(row.splitAt(";", n))) and
323-
cols != expect and
324-
result =
325-
"Wrong number of columns in " + pred + " model row, expected " + expect + ", got " + cols +
326-
"."
327-
)
328-
)
329-
}
330-
331190
private string getInvalidModelSignature() {
332191
exists(string pred, string namespace, string type, string name, string signature, string ext |
333192
sourceModel(namespace, type, _, name, signature, ext, _, _, _, _) and pred = "source"
@@ -366,13 +225,12 @@ module CsvValidation {
366225
)
367226
}
368227

369-
/** Holds if some row in a CSV-based flow model appears to contain typos. */
228+
/** Holds if some row in a MaD flow model appears to contain typos. */
370229
query predicate invalidModelRow(string msg) {
371230
msg =
372231
[
373232
getInvalidModelSignature(), getInvalidModelInput(), getInvalidModelOutput(),
374-
getInvalidModelSubtype(), getInvalidModelColumnCount(), KindVal::getInvalidModelKind(),
375-
getIncorrectConstructorSummaryOutput()
233+
KindVal::getInvalidModelKind(), getIncorrectConstructorSummaryOutput()
376234
]
377235
}
378236
}
@@ -1026,7 +884,7 @@ private module Cached {
1026884
}
1027885

1028886
/**
1029-
* Holds if `node` is specified as a source with the given kind in a CSV flow
887+
* Holds if `node` is specified as a source with the given kind in a MaD flow
1030888
* model.
1031889
*/
1032890
cached
@@ -1037,7 +895,7 @@ private module Cached {
1037895
}
1038896

1039897
/**
1040-
* Holds if `node` is specified as a sink with the given kind in a CSV flow
898+
* Holds if `node` is specified as a sink with the given kind in a MaD flow
1041899
* model.
1042900
*/
1043901
cached
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
extensions:
2+
- addsTo:
3+
pack: codeql/cpp-all
4+
extensible: sourceModel
5+
data: # namespace, type, subtypes, name, signature, ext, output, kind, provenance
6+
- ["", "", False, "localMadSource", "", "", "ReturnValue", "local", "manual"]
7+
- ["", "", False, "remoteMadSource", "", "", "ReturnValue", "remote", "manual"]
8+
- ["", "", False, "localMadSourceVoid", "", "", "ReturnValue", "local", "manual"]
9+
- ["", "", False, "localMadSourceHasBody", "", "", "ReturnValue", "local", "manual"]
10+
- ["", "", False, "remoteMadSourceIndirect", "", "", "ReturnValue[*]", "remote", "manual"]
11+
- ["", "", False, "remoteMadSourceDoubleIndirect", "", "", "ReturnValue[**]", "remote", "manual"]
12+
- ["", "", False, "remoteMadSourceIndirectArg0", "", "", "Argument[*0]", "remote", "manual"]
13+
- ["", "", False, "remoteMadSourceIndirectArg1", "", "", "Argument[*1]", "remote", "manual"]
14+
- ["", "", False, "remoteMadSourceVar", "", "", "", "remote", "manual"]
15+
- ["", "", False, "remoteMadSourceVarIndirect", "", "", "*", "remote", "manual"]
16+
- ["", "", False, "remoteMadSourceParam0", "", "", "Parameter[0]", "remote", "manual"]
17+
- ["MyNamespace", "", False, "namespaceLocalMadSource", "", "", "ReturnValue", "local", "manual"]
18+
- ["MyNamespace", "", False, "namespaceLocalMadSourceVar", "", "", "", "local", "manual"]
19+
- ["MyNamespace::MyNamespace2", "", False, "namespace2LocalMadSource", "", "", "ReturnValue", "local", "manual"]
20+
- ["", "MyClass", True, "memberRemoteMadSource", "", "", "ReturnValue", "remote", "manual"]
21+
- ["", "MyClass", True, "memberRemoteMadSourceIndirectArg0", "", "", "Argument[*0]", "remote", "manual"]
22+
- ["", "MyClass", True, "memberRemoteMadSourceVar", "", "", "", "remote", "manual"]
23+
- ["", "MyClass", True, "subtypeRemoteMadSource1", "", "", "ReturnValue", "remote", "manual"]
24+
- ["", "MyClass", False, "subtypeNonSource", "", "", "ReturnValue", "remote", "manual"]
25+
- ["", "MyClass", True, "qualifierSource", "", "", "Argument[-1]", "remote", "manual"]
26+
- ["", "MyClass", True, "qualifierFieldSource", "", "", "Argument[-1].val", "remote", "manual"]
27+
- ["", "MyDerivedClass", False, "subtypeRemoteMadSource2", "", "", "ReturnValue", "remote", "manual"]
28+
- addsTo:
29+
pack: codeql/cpp-all
30+
extensible: sinkModel
31+
data: # namespace, type, subtypes, name, signature, ext, input, kind, provenance
32+
- ["", "", False, "madSinkArg0", "", "", "Argument[0]", "test-sink", "manual"]
33+
- ["", "", False, "madSinkArg1", "", "", "Argument[1]", "test-sink", "manual"]
34+
- ["", "", False, "madSinkArg01", "", "", "Argument[0..1]", "test-sink", "manual"]
35+
- ["", "", False, "madSinkArg02", "", "", "Argument[0,2]", "test-sink", "manual"]
36+
- ["", "", False, "madSinkIndirectArg0", "", "", "Argument[*0]", "test-sink", "manual"]
37+
- ["", "", False, "madSinkDoubleIndirectArg0", "", "", "Argument[**0]", "test-sink", "manual"]
38+
- ["", "", False, "madSinkVar", "", "", "", "test-sink", "manual"]
39+
- ["", "", False, "madSinkVarIndirect", "", "", "*", "test-sink", "manual"]
40+
- ["", "", False, "madSinkParam0", "", "", "Parameter[0]", "test-sink", "manual"]
41+
- ["", "MyClass", True, "memberMadSinkArg0", "", "", "Argument[0]", "test-sink", "manual"]
42+
- ["", "MyClass", True, "memberMadSinkVar", "", "", "", "test-sink", "manual"]
43+
- ["", "MyClass", True, "qualifierSink", "", "", "Argument[-1]", "test-sink", "manual"]
44+
- ["", "MyClass", True, "qualifierArg0Sink", "", "", "Argument[-1..0]", "test-sink", "manual"]
45+
- ["", "MyClass", True, "qualifierFieldSink", "", "", "Argument[-1].val", "test-sink", "manual"]
46+
- ["MyNamespace", "MyClass", True, "namespaceMemberMadSinkArg0", "", "", "Argument[0]", "test-sink", "manual"]
47+
- ["MyNamespace", "MyClass", True, "namespaceStaticMemberMadSinkArg0", "", "", "Argument[0]", "test-sink", "manual"]
48+
- ["MyNamespace", "MyClass", True, "namespaceMemberMadSinkVar", "", "", "", "test-sink", "manual"]
49+
- ["MyNamespace", "MyClass", True, "namespaceStaticMemberMadSinkVar", "", "", "", "test-sink", "manual"]
50+
- addsTo:
51+
pack: codeql/cpp-all
52+
extensible: summaryModel
53+
data: # namespace, type, subtypes, name, signature, ext, input, output, kind, provenance
54+
- ["", "", False, "madArg0ToReturn", "", "", "Argument[0]", "ReturnValue", "taint", "manual"]
55+
- ["", "", False, "madArg0ToReturnIndirect", "", "", "Argument[0]", "ReturnValue[*]", "taint", "manual"]
56+
- ["", "", False, "madArg0ToReturnValueFlow", "", "", "Argument[0]", "ReturnValue", "value", "manual"]
57+
- ["", "", False, "madArg0IndirectToReturn", "", "", "Argument[*0]", "ReturnValue", "taint", "manual"]
58+
- ["", "", False, "madArg0DoubleIndirectToReturn", "", "", "Argument[**0]", "ReturnValue", "taint", "manual"]
59+
- ["", "", False, "madArg0NotIndirectToReturn", "", "", "Argument[0]", "ReturnValue", "taint", "manual"]
60+
- ["", "", False, "madArg0ToArg1Indirect", "", "", "Argument[0]", "Argument[*1]", "taint", "manual"]
61+
- ["", "", False, "madArg0IndirectToArg1Indirect", "", "", "Argument[*0]", "Argument[*1]", "taint", "manual"]
62+
- ["", "", False, "madArgsComplex", "", "", "Argument[*0..1,2]", "ReturnValue", "taint", "manual"]
63+
- ["", "", False, "madAndImplementedComplex", "", "", "Argument[2]", "ReturnValue", "taint", "manual"]
64+
- ["", "", False, "madArgsAny", "", "", "Argument", "ReturnValue", "taint", "manual"]
65+
- ["", "", False, "madArg0FieldToReturn", "", "", "Argument[0].Field[value]", "ReturnValue", "taint", "manual"]
66+
- ["", "", False, "madArg0IndirectFieldToReturn", "", "", "Argument[*0].Field[value]", "ReturnValue", "taint", "manual"]
67+
- ["", "", False, "madArg0FieldIndirectToReturn", "", "", "Argument[0].Field[*ptr]", "ReturnValue", "taint", "manual"]
68+
- ["", "", False, "madArg0ToReturnField", "", "", "Argument[0]", "ReturnValue.Field[value]", "taint", "manual"]
69+
- ["", "", False, "madArg0ToReturnIndirectField", "", "", "Argument[0]", "ReturnValue[*].Field[value]", "taint", "manual"]
70+
- ["", "", False, "madArg0ToReturnFieldIndirect", "", "", "Argument[0]", "ReturnValue.Field[*ptr]", "taint", "manual"]
71+
- ["", "", False, "madFieldToFieldVar", "", "", "Field[value]", "Field[value2]", "taint", "manual"]
72+
- ["", "", False, "madFieldToIndirectFieldVar", "", "", "Field[value]", "Field[*ptr]", "taint", "manual"]
73+
- ["", "", False, "madIndirectFieldToFieldVar", "", "", "", "Field[value]", "Field[value2]", "manual"]
74+
- ["", "MyClass", True, "madArg0ToSelf", "", "", "Argument[0]", "Argument[-1]", "taint", "manual"]
75+
- ["", "MyClass", True, "madSelfToReturn", "", "", "Argument[-1]", "ReturnValue", "taint", "manual"]
76+
- ["", "MyClass", True, "madArg0ToField", "", "", "Argument[0]", "Argument[-1].Field[val]", "taint", "manual"]
77+
- ["", "MyClass", True, "madFieldToReturn", "", "", "Argument[-1].Field[val]", "ReturnValue", "taint", "manual"]
78+
- ["MyNamespace", "MyClass", True, "namespaceMadSelfToReturn", "", "", "Argument[-1]", "ReturnValue", "taint", "manual"]
79+
- ["", "", False, "madCallArg0ReturnToReturn", "", "", "Argument[0].ReturnValue", "ReturnValue", "value", "manual"]
80+
- ["", "", False, "madCallArg0ReturnToReturnFirst", "", "", "Argument[0].ReturnValue", "ReturnValue.Field[first]", "value", "manual"]
81+
- ["", "", False, "madCallArg0WithValue", "", "", "Argument[1]", "Argument[0].Parameter[0]", "value", "manual"]
82+
- ["", "", False, "madCallReturnValueIgnoreFunction", "", "", "Argument[1]", "ReturnValue", "value", "manual"]
83+
- ["", "StructWithTypedefInParameter<T>", True, "parameter_ref_to_return_ref", "(const T &)", "", "Argument[*0]", "ReturnValue[*]", "value", "manual"]
84+
- ["", "", False, "receive_array", "(int[20])", "", "Argument[*0]", "ReturnValue", "taint", "manual"]

0 commit comments

Comments
 (0)