Skip to content

Commit 2aa7174

Browse files
committed
Add graph visualizations to anomaly detection
1 parent ebf9aed commit 2aa7174

5 files changed

Lines changed: 165 additions & 2 deletions

File tree

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
// Anomaly Detection Graphs: Find top nodes marked as "hub" including their incoming dependencies and output them in Graphviz format.
2+
3+
// Step 1: Query overall statistics, e.g. min/max weight for later normalization
4+
MATCH (sourceForStatistics)-[dependencyForStatistics:DEPENDS_ON]->(targetForStatistics)
5+
WHERE $projection_node_label IN labels(sourceForStatistics)
6+
AND $projection_node_label IN labels(targetForStatistics)
7+
WITH min(coalesce(dependencyForStatistics.weight25PercentInterfaces, dependencyForStatistics.weight)) AS minWeight
8+
,max(coalesce(dependencyForStatistics.weight25PercentInterfaces, dependencyForStatistics.weight)) AS maxWeight
9+
// Step 2: Query direct dependencies to the target
10+
MATCH (source)-[directDependency:DEPENDS_ON]->(target)
11+
WHERE $projection_node_label IN labels(source)
12+
AND $projection_node_label IN labels(target)
13+
AND target.anomalyScore > 0
14+
AND target.anomalyHubRank = 1
15+
ORDER BY directDependency.weight DESC
16+
WITH minWeight
17+
,maxWeight
18+
,target
19+
,collect(source) AS sources
20+
,collect(directDependency) AS directDependencies
21+
// Step 3: Query dependencies among sources
22+
UNWIND sources AS source1
23+
UNWIND sources AS source2
24+
MATCH (source1)-[indirectDependency:DEPENDS_ON]->(source2)
25+
WITH minWeight
26+
,maxWeight
27+
,target
28+
,directDependencies
29+
,collect(indirectDependency) AS indirectDependencies
30+
WITH *, directDependencies + indirectDependencies AS allDependencies
31+
// Step 4: Prepare results in GraphViz format for all dependencies
32+
UNWIND allDependencies AS dependency
33+
WITH *, (endNode(dependency) = target) AS isTargetEndNode
34+
WITH *, CASE WHEN isTargetEndNode THEN endNode(dependency) ELSE null END AS targetEndNodeOrNull
35+
WITH *, CASE WHEN isTargetEndNode THEN null ELSE endNode(dependency) END AS nonTargetEndNodeOrNull
36+
WITH *, coalesce(dependency.weight25PercentInterfaces, dependency.weight) AS weight
37+
WITH *, toFloat(weight - minWeight) / toFloat(maxWeight - minWeight) AS normalizedWeight
38+
WITH *, round((normalizedWeight * 5) + 1, 2) AS penWidth
39+
WITH *, "\\nhub #" + targetEndNodeOrNull.anomalyHubRank AS hubSubLabel
40+
WITH *, coalesce("\"hub\" [label=\"" + target.name + hubSubLabel + "\";]; ", "") AS hubNode
41+
WITH *, "\"" + startNode(dependency).name + "\"" AS sourceNode
42+
WITH *, coalesce("\"" + nonTargetEndNodeOrNull.name + "\"", "\"hub\"") AS targetNode
43+
WITH *, " -> " + targetNode
44+
+ " [label = " + weight + ";"
45+
+ " penwidth = " + penWidth + ";"
46+
+ " ];" AS graphVizDotNotationEdge
47+
WITH *, hubNode + sourceNode + coalesce(graphVizDotNotationEdge, " [];") AS graphVizDotNotationLine
48+
ORDER BY target.anomalyHubRank DESC, target.name ASC
49+
RETURN DISTINCT graphVizDotNotationLine
50+
//Debugging
51+
,startNode(dependency).name AS sourceName
52+
,endNode(dependency).name AS targetName
53+
,hubNode
54+
,penWidth
55+
,normalizedWeight
56+
,dependency.weight AS weight
57+
,minWeight
58+
,maxWeight
59+
LIMIT 100
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
// This is a GraphViz dot template file for the visualization of a anomaly archetype graph.
2+
// The main part of the template is marked by the comments "Begin-Template" and "End-Template".
3+
// It also contains a simple example graph.
4+
//
5+
strict digraph anomaly_archetype_template {
6+
//Begin-Template
7+
graph [layout = "fdp"; fontname = "Helvetica,Arial,sans-serif";]
8+
node [fontname = "Helvetica,Arial,sans-serif"; fontsize = 8;];
9+
edge [fontname = "Helvetica,Arial,sans-serif"; fontsize = 6;];
10+
node [style = "filled"; fillcolor = "0.560 0.400 0.999";];
11+
node [color = "0.560 0.900 0.700";];
12+
edge [color = "0.560 0.900 0.700";];
13+
"hub" [shape="circle"; color = "0.990 0.950 0.70"; fillcolor = "0.999 0.350 0.970"; penwidth = 2; fontsize = 11];
14+
//End-Template
15+
"A" -> "hub" [penwidth = 1.0; label = 1;];
16+
"A" -> "B" [penwidth = 3.0; label = 4;];
17+
"B" -> "hub" [penwidth = 2.0; label = 2;];
18+
}
Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
#!/usr/bin/env bash
2+
3+
# Executes selected anomaly detection Cypher queries for GraphViz visualization.
4+
# Visualizes top ranked anomaly archetypes.
5+
# Requires an already running Neo4j graph database with already scanned and analyzed artifacts.
6+
# The reports (csv, dot and svg files) will be written into the sub directory reports/anomaly-detection/{language}_{codeUnit}.
7+
8+
# Requires executeQueryFunctions.sh, visualizeQueryResults.sh, cleanupAfterReportGeneration.sh
9+
10+
# Fail on any error ("-e" = exit on first error, "-o pipefail" exist on errors within piped commands)
11+
set -o errexit -o pipefail
12+
13+
# Overrideable Constants (defaults also defined in sub scripts)
14+
REPORTS_DIRECTORY=${REPORTS_DIRECTORY:-"reports"}
15+
16+
## Get this "scripts/reports" directory if not already set
17+
# Even if $BASH_SOURCE is made for Bourne-like shells it is also supported by others and therefore here the preferred solution.
18+
# CDPATH reduces the scope of the cd command to potentially prevent unintended directory changes.
19+
# This way non-standard tools like readlink aren't needed.
20+
ANOMALY_DETECTION_GRAPHS_DIR=${REPORTS_SCRIPT_DIR:-$( CDPATH=. cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd -P )}
21+
#echo "anomalyDetectionGraphVisualization: ANOMALY_DETECTION_GRAPHS_DIR=${ANOMALY_DETECTION_GRAPHS_DIR}"
22+
23+
# Get the "scripts" directory by taking the path of this script and going one directory up.
24+
SCRIPTS_DIR=${SCRIPTS_DIR:-"${ANOMALY_DETECTION_GRAPHS_DIR}/../../../scripts"} # Repository directory containing the shell scripts
25+
# echo "anomalyDetectionGraphVisualization: SCRIPTS_DIR=${SCRIPTS_DIR}"
26+
27+
# Get the "scripts/visualization" directory.
28+
VISUALIZATION_SCRIPTS_DIR=${VISUALIZATION_SCRIPTS_DIR:-"${SCRIPTS_DIR}/visualization"} # Repository directory containing the shell scripts for visualization
29+
# echo "anomalyDetectionGraphVisualization: VISUALIZATION_SCRIPTS_DIR=${VISUALIZATION_SCRIPTS_DIR}"
30+
31+
# Define functions to execute cypher queries from within a given file
32+
source "${SCRIPTS_DIR}/executeQueryFunctions.sh"
33+
34+
# Run queries, outputs their results in GraphViz format and create Graph visualizations.
35+
#
36+
# Required Parameters:
37+
# - projection_node_label=...
38+
# Label of the nodes that will be used for the projection. Example: "Package"
39+
# - projection_language=...
40+
# Name of the associated programming language. Examples: "Java", "Typescript"
41+
anomaly_detection_graph_visualization() {
42+
local nodeLabel
43+
nodeLabel=$( extractQueryParameter "projection_node_label" "${@}" )
44+
45+
local language
46+
language=$( extractQueryParameter "projection_language" "${@}" )
47+
48+
echo "anomalyDetectionSummary: $(date +'%Y-%m-%dT%H:%M:%S%z') Creating ${language} ${nodeLabel} anomaly summary Markdown report..."
49+
50+
local detail_report_directory_name="${language}_${nodeLabel}"
51+
local detail_report_directory="${FULL_REPORT_DIRECTORY}/${detail_report_directory_name}"
52+
mkdir -p "${detail_report_directory}"
53+
54+
local queryResultFile="${detail_report_directory}/TopHubsGraphVisualization.csv"
55+
execute_cypher "${ANOMALY_DETECTION_GRAPHS_DIR}/AnomalyDetectionTopHubsGraph.cypher" "${@}" > "${queryResultFile}"
56+
57+
source "${SCRIPTS_DIR}/cleanupAfterReportGeneration.sh" "${detail_report_directory}" # Remove empty files
58+
59+
if [ -f "${queryResultFile}" ] ; then
60+
source "${VISUALIZATION_SCRIPTS_DIR}/visualizeQueryResults.sh" "${queryResultFile}" --template "${ANOMALY_DETECTION_GRAPHS_DIR}/anomaly-archetype-hub.template.gv"
61+
fi
62+
}
63+
64+
65+
# Create report directory
66+
REPORT_NAME="anomaly-detection"
67+
FULL_REPORT_DIRECTORY="${REPORTS_DIRECTORY}/${REPORT_NAME}"
68+
mkdir -p "${FULL_REPORT_DIRECTORY}"
69+
70+
# Query Parameter key pairs for projection and algorithm side
71+
ALGORITHM_NODE="projection_node_label"
72+
ALGORITHM_LANGUAGE="projection_language"
73+
74+
# -- Detail Reports for each code type -------------------------------
75+
76+
anomaly_detection_graph_visualization "${ALGORITHM_NODE}=Artifact" "${ALGORITHM_LANGUAGE}=Java"
77+
anomaly_detection_graph_visualization "${ALGORITHM_NODE}=Package" "${ALGORITHM_LANGUAGE}=Java"
78+
anomaly_detection_graph_visualization "${ALGORITHM_NODE}=Type" "${ALGORITHM_LANGUAGE}=Java"
79+
anomaly_detection_graph_visualization "${ALGORITHM_NODE}=Module" "${ALGORITHM_LANGUAGE}=Typescript"
80+
81+
# ---------------------------------------------------------------
82+
83+
echo "anomalyDetectionSummary: $(date +'%Y-%m-%dT%H:%M:%S%z') Successfully finished."

scripts/visualization/convertQueryResultCsvToGraphVizDotFile.sh

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,10 @@ outputFilename="${inputFilePath}/${graphName}.gv"
105105
echo "strict digraph ${graphName} {"
106106
# Extract the template content from the template file and remove the begin and end markers
107107
sed -n '/\/\/Begin-Template/,/\/\/End-Template/{//!p;}' "${templateFile}"
108-
# Remove the first (header) line of the CSV file, remove the enclosing double quotes and replace the escaped double quotes by double quotes
108+
# Remove the first (header) line of the CSV file,
109+
# print the first column prefixed with a tab,
110+
# remove the enclosing double quotes and
111+
# replace the escaped double quotes by double quotes
109112
awk -F ',' 'NR>1 {print "\t" $1}' "${inputFilename}" \
110113
| sed 's/^\t\"\"\"/\t"/' \
111114
| sed 's/^\t\"\\\"\"/\t"/' \

scripts/visualization/visualizeQueryResults.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ echo "visualizeQueryResults: VISUALIZATION_SCRIPTS_DIR=${VISUALIZATION_SCRIPTS_D
1717
# Read the first unnamed input argument containing the version of the project
1818
inputCsvFileName=""
1919
case "${1}" in
20-
"--"*) ;; # Skipping named command line options to forward them later to the "analyze" command
20+
"--"*) ;; # Skipping named command line options to forward them later to the "convertQueryResultCsvToGraphVizDotFile" command
2121
*)
2222
inputCsvFileName="${1}"
2323
shift || true

0 commit comments

Comments
 (0)