Skip to content

Commit be7426e

Browse files
feat: implement code block failure detection (#5) and extra CWE detector (#6)
Issue #5 - CalculateToolCodeBlocksSupport: - Add single-unknown isolation (Pass 2): for each FN, if exactly 1 snippet is unsupported, isolate it as the root cause - Add FP isolation (Stage 2): identify safe snippets that tools fail to recognize, with sanity check for FPs where all snippets are supported - Add scorecard directory support: -r now accepts a directory to process all tool scorecards automatically - Track actual test case names per snippet for bidirectional mapping Issue #6 - DetectExtraCWEs (new): - Detect CWE findings outside expected test cases using existing Reader parsers - Normal mode: known CWEs in wrong test cases (e.g. CWE-89 in a hash test) - Hard mode: any CWE not in the benchmark's expected set
1 parent ee31e2d commit be7426e

3 files changed

Lines changed: 544 additions & 40 deletions

File tree

plugin/src/main/java/org/owasp/benchmarkutils/tools/CalculateToolCodeBlocksSupport.java

Lines changed: 185 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -131,8 +131,8 @@ protected void processCommandLineArgs(String[] args) {
131131
.build());
132132
options.addOption(
133133
Option.builder("r")
134-
.longOpt("file")
135-
.desc("a scorecard generated toolResults.csv file")
134+
.longOpt("results")
135+
.desc("a scorecard generated toolResults.csv file, or a directory of them")
136136
.hasArg()
137137
.required()
138138
.build());
@@ -178,32 +178,52 @@ protected void processCommandLineArgs(String[] args) {
178178
}
179179
}
180180

181-
/** Calculate the code block support for the specified tool for the specified test suite. */
181+
/** Calculate the code block support for the specified tool(s) for the specified test suite. */
182182
@Override
183183
protected void run() {
184+
// If the results parameter is a directory, iterate all scorecard CSVs in it
185+
if (csvResultsFile.isDirectory()) {
186+
File[] scorecardFiles = csvResultsFile.listFiles(
187+
f -> f.isFile() && f.getName().contains("Scorecard_for_")
188+
&& f.getName().endsWith(".csv"));
189+
if (scorecardFiles == null || scorecardFiles.length == 0) {
190+
System.out.println(
191+
"ERROR: No scorecard CSV files found in directory: "
192+
+ csvResultsFile.getAbsolutePath());
193+
return;
194+
}
195+
System.out.println(
196+
"Processing " + scorecardFiles.length + " scorecard files from: "
197+
+ csvResultsFile.getAbsolutePath() + "\n");
198+
for (File scorecardFile : scorecardFiles) {
199+
// Extract tool name from filename
200+
String fileName = scorecardFile.getName();
201+
int forIdx = fileName.indexOf("Scorecard_for_");
202+
String toolName = fileName.substring(forIdx + "Scorecard_for_".length())
203+
.replace(".csv", "");
204+
System.out.println(
205+
"\n========================================================");
206+
System.out.println("=== Tool: " + toolName + " ===");
207+
System.out.println(
208+
"========================================================");
209+
runForOneTool(scorecardFile);
210+
}
211+
return;
212+
}
184213

185-
// Initialize the expected and actual results data structure from the
186-
// TESTSUITE-attack-http.xml and results.csv already loaded.
187-
// Merge the .csv results with the codeblock details so you know which test cases
188-
// pass/fail.
189-
// - Probably can use the same structures used in ScoreCard generation, which is: TBD
190-
// - Should use an array of: TestCaseResult - This class represents a single test case
191-
// result. It documents the expected result (real),
192-
// and the actual result (result). Such an array is already contained in:
193-
// TestSuiteResults, but its contents have to be created using
194-
// put(TestCaseResult) one test case at a time.
195-
// List<AbstractTestCaseRequest> testcases = this.testSuite.getTestCases() is already loaded
196-
197-
// NOTE: The last 2 params are dummy values as I don't think we care about their type (yet)
214+
// Single file mode
215+
runForOneTool(csvResultsFile);
216+
}
217+
218+
private void runForOneTool(File toolCsvFile) {
198219
TestSuiteResults theToolResults =
199220
new TestSuiteResults(this.testSuite.getName(), false, ToolType.SAST);
200221

201-
// Get all the TestCase info loaded from TESTSUITE-attack-http.xml file
202222
List<AbstractTestCaseRequest> theTestcases = this.testSuite.getTestCases();
203223
int testSuiteSize = theTestcases.size();
204224

205225
try {
206-
java.io.Reader inReader = new java.io.FileReader(csvResultsFile);
226+
java.io.Reader inReader = new java.io.FileReader(toolCsvFile);
207227
CSVParser recordParser = CSVFormat.Builder.create().setHeader().build().parse(inReader);
208228

209229
List<CSVRecord> records = recordParser.getRecords();
@@ -457,6 +477,72 @@ protected void run() {
457477
}
458478
}
459479

480+
// 3b2. Issue #5 Pass 2: Single-unknown isolation.
481+
// For each FN, count how many of its snippets are NOT supported.
482+
// If exactly 1 is unsupported, that snippet is the isolated root cause.
483+
List<TestCaseResult> combinationFailures = new ArrayList<>();
484+
for (int tc : theToolResults.keySet()) {
485+
TestCaseResult theResult = theToolResults.get(tc).get(0);
486+
if (!theResult.isTruePositive() || theResult.isPassed()) continue;
487+
488+
CodeBlockSupportResults source =
489+
sourceCodeBlocksResults.get(theResult.getSource());
490+
CodeBlockSupportResults dataflow =
491+
dataflowCodeBlocksResults.get(theResult.getDataFlow());
492+
CodeBlockSupportResults sink = sinkCodeBlocksResults.get(theResult.getSink());
493+
494+
source.fnTestCases.add(theResult.getName());
495+
if (dataflow != null && !dataflow.name.isEmpty())
496+
dataflow.fnTestCases.add(theResult.getName());
497+
sink.fnTestCases.add(theResult.getName());
498+
499+
List<CodeBlockSupportResults> unknowns = new ArrayList<>();
500+
if (!source.supported) unknowns.add(source);
501+
if (dataflow != null && !dataflow.name.isEmpty() && !dataflow.supported)
502+
unknowns.add(dataflow);
503+
if (!sink.supported) unknowns.add(sink);
504+
505+
if (unknowns.size() == 1) {
506+
unknowns.get(0).isolatedFnCause.add(theResult.getName());
507+
} else if (unknowns.isEmpty()) {
508+
combinationFailures.add(theResult);
509+
}
510+
}
511+
512+
// 3b3. Issue #5 Stage 2: FP isolation.
513+
// For each FP, identify which snippet(s) make it safe.
514+
// If exactly 1 safe snippet, the tool fails to recognize it as safe.
515+
List<TestCaseResult> sanityCheckFailures = new ArrayList<>();
516+
for (int tc : theToolResults.keySet()) {
517+
TestCaseResult theResult = theToolResults.get(tc).get(0);
518+
if (theResult.isTruePositive() || theResult.isPassed()) continue;
519+
520+
CodeBlockSupportResults source =
521+
sourceCodeBlocksResults.get(theResult.getSource());
522+
CodeBlockSupportResults dataflow =
523+
dataflowCodeBlocksResults.get(theResult.getDataFlow());
524+
CodeBlockSupportResults sink = sinkCodeBlocksResults.get(theResult.getSink());
525+
526+
// Sanity check: all snippets are supported but tool still FPs
527+
boolean allSupported = source.supported
528+
&& (dataflow.name.isEmpty() || dataflow.supported)
529+
&& sink.supported;
530+
if (allSupported) {
531+
sanityCheckFailures.add(theResult);
532+
}
533+
534+
// Which snippet makes this test case safe (not a real vuln)?
535+
List<CodeBlockSupportResults> safeSnippets = new ArrayList<>();
536+
if (!source.truePositive) safeSnippets.add(source);
537+
if (!dataflow.name.isEmpty() && !dataflow.truePositive)
538+
safeSnippets.add(dataflow);
539+
if (!sink.truePositive) safeSnippets.add(sink);
540+
541+
if (safeSnippets.size() == 1) {
542+
safeSnippets.get(0).isolatedFpCause.add(theResult.getName());
543+
}
544+
}
545+
460546
// 3c. Calculate which sinks appear to be unsupported or always cause false positives
461547
String Always_FP_Output = "\n"; // Used to print all the FPs AFTER the Always FN values
462548
for (CodeBlockSupportResults sinkResult : sinkCodeBlocksResults.values()) {
@@ -568,28 +654,87 @@ protected void run() {
568654
System.out.println("Always FP: " + dataflowResult);
569655
}
570656

571-
/*
572-
// Print out codeblock coordinates of suspect False Positives
573-
for (int tc : theToolResults.keySet()) {
574-
TestCaseResult theResult = theToolResults.get(tc).get(0); // Always only one.
575-
boolean passed = theResult.isPassed();
576-
CodeBlockSupportResults source = sourceCodeBlocksResults.get(theResult.getSource());
577-
CodeBlockSupportResults dataflow =
578-
dataflowCodeBlocksResults.get(theResult.getDataFlow());
579-
CodeBlockSupportResults sink = sinkCodeBlocksResults.get(theResult.getSink());
580-
581-
if (!theResult.isTruePositive() && !passed && !sink.reported && !source.reported) {
582-
if (source.supported && dataflow.supported) {
583-
System.out.println(
584-
"False Positive possibly caused by SINK, since both source and dataflow supported. For: "
585-
+ theResult.toString());
586-
System.out.println(" " + source.toString());
587-
System.out.println(" " + dataflow.toString());
588-
System.out.println(" " + sink.toString());
589-
}
590-
}
591-
}
592-
*/
657+
// --- Issue #5 Pass 2 Report: Isolated FN Root Causes ---
658+
System.out.println("\n--- Pass 2: Isolated FN Root Causes ---");
659+
boolean foundIsolated = false;
660+
for (CodeBlockSupportResults sinkResult : sinkCodeBlocksResults.values()) {
661+
if (!sinkResult.isolatedFnCause.isEmpty()) {
662+
System.out.println(" " + sinkResult.toIsolationString());
663+
foundIsolated = true;
664+
}
665+
}
666+
for (CodeBlockSupportResults sourceResult : sourceCodeBlocksResults.values()) {
667+
if (!sourceResult.isolatedFnCause.isEmpty()) {
668+
System.out.println(" " + sourceResult.toIsolationString());
669+
foundIsolated = true;
670+
}
671+
}
672+
for (CodeBlockSupportResults dataflowResult : dataflowCodeBlocksResults.values()) {
673+
if (!dataflowResult.isolatedFnCause.isEmpty()) {
674+
System.out.println(" " + dataflowResult.toIsolationString());
675+
foundIsolated = true;
676+
}
677+
}
678+
if (!foundIsolated) {
679+
System.out.println(" (none found)");
680+
}
681+
if (!combinationFailures.isEmpty()) {
682+
System.out.println(
683+
" Combination failures (all snippets supported, tool still misses): "
684+
+ combinationFailures.size());
685+
for (TestCaseResult cf : combinationFailures) {
686+
System.out.println(" " + cf.toString());
687+
}
688+
}
689+
690+
// --- Issue #5 Sanity Check ---
691+
if (!sanityCheckFailures.isEmpty()) {
692+
System.out.println(
693+
"\n--- Sanity Check: FPs where all snippets are supported ("
694+
+ sanityCheckFailures.size() + ") ---");
695+
for (TestCaseResult sf : sanityCheckFailures) {
696+
System.out.println(" " + sf.toString());
697+
}
698+
}
699+
700+
// --- Issue #5 Stage 2 Report: Isolated FP Root Causes ---
701+
System.out.println("\n--- Stage 2: FP Root Causes (safe snippets tool doesn't recognize) ---");
702+
boolean foundFPIsolated = false;
703+
for (CodeBlockSupportResults sinkResult : sinkCodeBlocksResults.values()) {
704+
if (!sinkResult.isolatedFpCause.isEmpty()) {
705+
System.out.println(
706+
" [SINK] " + sinkResult.name
707+
+ " (" + sinkResult.vulnCat + ", safe) -- "
708+
+ sinkResult.isolatedFpCause.size()
709+
+ " FPs isolated to this snippet");
710+
foundFPIsolated = true;
711+
}
712+
}
713+
for (CodeBlockSupportResults sourceResult : sourceCodeBlocksResults.values()) {
714+
if (!sourceResult.isolatedFpCause.isEmpty()) {
715+
System.out.println(
716+
" [SOURCE] " + sourceResult.name
717+
+ " (safe) -- "
718+
+ sourceResult.isolatedFpCause.size()
719+
+ " FPs isolated to this snippet");
720+
foundFPIsolated = true;
721+
}
722+
}
723+
for (CodeBlockSupportResults dataflowResult : dataflowCodeBlocksResults.values()) {
724+
if (!dataflowResult.isolatedFpCause.isEmpty()) {
725+
String displayName =
726+
dataflowResult.name.isEmpty() ? "NoDataFlow" : dataflowResult.name;
727+
System.out.println(
728+
" [DATAFLOW] " + displayName
729+
+ " (safe) -- "
730+
+ dataflowResult.isolatedFpCause.size()
731+
+ " FPs isolated to this snippet");
732+
foundFPIsolated = true;
733+
}
734+
}
735+
if (!foundFPIsolated) {
736+
System.out.println(" (none found)");
737+
}
593738

594739
// Print out codeblock coordinates of the rest of the False Positives, ignoring all with
595740
// sinks or sources already known to cause FPs

plugin/src/main/java/org/owasp/benchmarkutils/tools/CodeBlockSupportResults.java

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
package org.owasp.benchmarkutils.tools;
22

3+
import java.util.HashSet;
4+
import java.util.Set;
5+
36
/*
47
* This class is used to contain the results, per codeblock type, of how well a tool did with respect
58
* to that codeblock.
@@ -39,6 +42,13 @@ class CodeBlockSupportResults {
3942
int numFPSinksWhereSourceDataflowAreTruePositivesUsed = 0;
4043
int numFPSinksWhereSourceDataflowAreTruePositivesPassed = 0;
4144

45+
// Issue #5: Track actual test case names for bidirectional mapping and isolation analysis
46+
Set<String> fnTestCases = new HashSet<>();
47+
// FNs where THIS is the single unsupported snippet (isolated root cause)
48+
Set<String> isolatedFnCause = new HashSet<>();
49+
// FPs where THIS is the single safe snippet the tool fails to recognize
50+
Set<String> isolatedFpCause = new HashSet<>();
51+
4252
CodeBlockSupportResults(String name, String type, boolean truePositive) {
4353
this.name = name;
4454
this.type = type;
@@ -118,6 +128,15 @@ public String toStringIgnoringUnsupportedSinks() {
118128
+ supported;
119129
}
120130

131+
public String toIsolationString() {
132+
String displayName = ("DATAFLOW".equals(type) && "".equals(name)) ? "NoDataFlow" : name;
133+
return "[" + type + "] " + displayName
134+
+ ("SINK".equals(type) ? " (" + vulnCat + ")" : "")
135+
+ " -- " + isolatedFnCause.size() + " FNs isolated to this snippet"
136+
+ (isolatedFpCause.isEmpty() ? ""
137+
: ", " + isolatedFpCause.size() + " FPs isolated to this snippet");
138+
}
139+
121140
public String toStringForFalsePositiveSinks() {
122141
return "Codeblock type: "
123142
+ type

0 commit comments

Comments
 (0)