Skip to content

Commit 90a2667

Browse files
authored
Merge pull request #516 from JohT/fix/member-type-connected-component-scoped-topological-sort
Fix topological sort only applied to connected components of one member type
2 parents abfc0c8 + ed63d69 commit 90a2667

8 files changed

Lines changed: 57 additions & 15 deletions

domains/anomaly-detection/anomalyDetectionCsv.sh

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,8 @@ anomaly_detection_features() {
8181
"${ANOMALY_DETECTION_FEATURE_CYPHER_DIR}/AnomalyDetectionFeature-WeaklyConnectedComponents-Write.cypher" "${@}"
8282
execute_cypher "${ANOMALY_DETECTION_FEATURE_CYPHER_DIR}/AnomalyDetectionFeature-WeaklyConnectedComponents-CreateNode.cypher" "${@}"
8383
# Determines topological sort max distance from source for strongly connected components if not already done
84+
execute_cypher_queries_until_results "${ANOMALY_DETECTION_FEATURE_CYPHER_DIR}/AnomalyDetectionFeature-TopologicalSortComponents-Exists.cypher" \
85+
"${ANOMALY_DETECTION_FEATURE_CYPHER_DIR}/AnomalyDetectionFeature-TopologicalSortComponents-Delete-Projection.cypher" "${@}"
8486
execute_cypher_queries_until_results "${ANOMALY_DETECTION_FEATURE_CYPHER_DIR}/AnomalyDetectionFeature-TopologicalSortComponents-Exists.cypher" \
8587
"${ANOMALY_DETECTION_FEATURE_CYPHER_DIR}/AnomalyDetectionFeature-TopologicalSortComponents-Projection.cypher" "${@}"
8688
execute_cypher_queries_until_results "${ANOMALY_DETECTION_FEATURE_CYPHER_DIR}/AnomalyDetectionFeature-TopologicalSortComponents-Exists.cypher" \
@@ -106,6 +108,8 @@ anomaly_detection_queries() {
106108
mkdir -p "${detail_report_directory}"
107109

108110
echo "anomalyDetectionCsv: $(date +'%Y-%m-%dT%H:%M:%S%z') Executing Queries for ${nodeLabel} nodes..."
111+
execute_cypher "${ANOMALY_DETECTION_QUERY_CYPHER_DIR}/AnomalyDetectionProjectionStatistics.cypher" "${@}" > "${detail_report_directory}/AnomalyDetection_GraphProjectionStatistics.csv"
112+
109113
execute_cypher "${ANOMALY_DETECTION_QUERY_CYPHER_DIR}/AnomalyDetectionPotentialImbalancedRoles.cypher" "${@}" > "${detail_report_directory}/AnomalyDetection_PotentialImbalancedRoles.csv"
110114
execute_cypher "${ANOMALY_DETECTION_QUERY_CYPHER_DIR}/AnomalyDetectionPotentialOverEngineerOrIsolated.cypher" "${@}" > "${detail_report_directory}/AnomalyDetection_PotentialOverEngineerOrIsolated.csv"
111115

domains/anomaly-detection/anomalyDetectionPython.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,8 @@ anomaly_detection_features() {
128128
"${ANOMALY_DETECTION_FEATURE_CYPHER_DIR}/AnomalyDetectionFeature-WeaklyConnectedComponents-Write.cypher" "${@}"
129129
execute_cypher "${ANOMALY_DETECTION_FEATURE_CYPHER_DIR}/AnomalyDetectionFeature-WeaklyConnectedComponents-CreateNode.cypher" "${@}"
130130
# Determines topological sort max distance from source for strongly connected components if not already done
131+
execute_cypher_queries_until_results "${ANOMALY_DETECTION_FEATURE_CYPHER_DIR}/AnomalyDetectionFeature-TopologicalSortComponents-Exists.cypher" \
132+
"${ANOMALY_DETECTION_FEATURE_CYPHER_DIR}/AnomalyDetectionFeature-TopologicalSortComponents-Delete-Projection.cypher" "${@}"
131133
execute_cypher_queries_until_results "${ANOMALY_DETECTION_FEATURE_CYPHER_DIR}/AnomalyDetectionFeature-TopologicalSortComponents-Exists.cypher" \
132134
"${ANOMALY_DETECTION_FEATURE_CYPHER_DIR}/AnomalyDetectionFeature-TopologicalSortComponents-Projection.cypher" "${@}"
133135
execute_cypher_queries_until_results "${ANOMALY_DETECTION_FEATURE_CYPHER_DIR}/AnomalyDetectionFeature-TopologicalSortComponents-Exists.cypher" \

domains/anomaly-detection/features/AnomalyDetectionFeature-StronglyConnectedComponents-CreateNode.cypher

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
// 3) Create or update the StronglyConnectedComponent node with member type label e.g. ("TypeMembers")
1414
// - size: number of code units in the component
1515
// - name: derived from the highest PageRank member
16-
MERGE (component:StronglyConnectedComponent {id: componentId})
16+
MERGE (component:StronglyConnectedComponent {id: componentId, memberType: $projection_node_label})
1717
WITH *
1818
,CASE componentSize WHEN = 1 THEN 'Component ' ELSE 'Cycle around ' END AS componentNamePrefix
1919
CALL apoc.create.addLabels(component, [$projection_node_label + 'Members']) YIELD node
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
// Delete projection if existing. Variables: projection_name
2+
3+
CALL gds.graph.drop($projection_name + '-components', false)
4+
YIELD graphName, nodeCount, relationshipCount, creationTime, modificationTime
5+
RETURN graphName, nodeCount, relationshipCount, creationTime, modificationTime

domains/anomaly-detection/features/AnomalyDetectionFeature-TopologicalSortComponents-Projection.cypher

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
// Creates a projection of the strongly connected components graph for the given member type. Requires: "AnomalyDetectionFeature-StronglyConnectedComponents-CreateDependency.cypher"
22

33
MATCH (sourceComponent:StronglyConnectedComponent)
4-
OPTIONAL MATCH (sourceComponent)-[:DEPENDS_ON]->(targetComponent:StronglyConnectedComponent)
54
WHERE $projection_node_label + 'Members' IN labels(sourceComponent)
6-
AND $projection_node_label + 'Members' IN labels(targetComponent)
5+
OPTIONAL MATCH (sourceComponent)-[:DEPENDS_ON]->(targetComponent:StronglyConnectedComponent)
6+
WHERE $projection_node_label + 'Members' IN labels(targetComponent)
77
WITH gds.graph.project($projection_name + '-components', sourceComponent, targetComponent) AS graph
88
RETURN graph.graphName AS graphName
99
,graph.nodeCount AS nodeCount

domains/anomaly-detection/features/AnomalyDetectionFeature-WeaklyConnectedComponents-CreateNode.cypher

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
// 3) Create or update the WeaklyConnectedComponent node with member type label e.g. ("TypeMembers")
1313
// - size: number of code units in the component
1414
// - name: derived from the highest PageRank member
15-
MERGE (component:WeaklyConnectedComponent {id: componentId})
15+
MERGE (component:WeaklyConnectedComponent {id: componentId, memberType: $projection_node_label})
1616
WITH *
1717
CALL apoc.create.addLabels(component, [$projection_node_label + 'Members']) YIELD node
1818
SET component.size = size(members)
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
// Reads projection statistics
2+
3+
CALL gds.graph.list($projection_name + '-cleaned')
4+
YIELD nodeCount, relationshipCount, density, sizeInBytes, degreeDistribution
5+
RETURN nodeCount
6+
,relationshipCount
7+
,density
8+
,sizeInBytes
9+
,degreeDistribution.min
10+
,degreeDistribution.mean
11+
,degreeDistribution.max
12+
,degreeDistribution.p50
13+
,degreeDistribution.p75
14+
,degreeDistribution.p90
15+
,degreeDistribution.p95
16+
,degreeDistribution.p99
17+
,degreeDistribution.p999
Lines changed: 25 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,34 @@
11
// Anomaly Detection DeepDive: Overview of analyzed code units and the number of anomalies detected. Requires all other labels/*.cypher queries to run first. Variables: projection_language, projection_node_label
22

3-
MATCH (codeUnit)
4-
WHERE $projection_node_label IN labels(codeUnit)
5-
AND (codeUnit.incomingDependencies IS NOT NULL
6-
OR codeUnit.outgoingDependencies IS NOT NULL)
3+
MATCH (codeUnit)
4+
WHERE $projection_node_label IN labels(codeUnit)
5+
AND (codeUnit.incomingDependencies > 0
6+
OR codeUnit.outgoingDependencies > 0)
7+
AND coalesce(codeUnit.testMarkerInteger, 0) = 0
8+
OPTIONAL MATCH (codeUnit)-[dependency:DEPENDS_ON]-(target)
9+
WHERE $projection_node_label IN labels(target)
10+
AND (target.incomingDependencies > 0
11+
OR target.outgoingDependencies > 0)
12+
AND coalesce(target.testMarkerInteger, 0) = 0
13+
WITH count(dependency) AS relationshipCount
14+
,collect(DISTINCT codeUnit) AS codeUnits
15+
UNWIND codeUnits AS codeUnit
716
WITH sum(codeUnit.anomalyLabel) AS anomalyCount
817
,sum(sign(codeUnit.anomalyAuthorityRank)) AS authorityCount
918
,sum(sign(codeUnit.anomalyBottleneckRank)) AS bottleNeckCount
1019
,sum(sign(codeUnit.anomalyBridgeRank)) AS bridgeCount
1120
,sum(sign(codeUnit.anomalyHubRank)) AS hubCount
1221
,sum(sign(codeUnit.anomalyOutlierRank)) AS outlierCount
22+
,count(codeUnit) AS nodeCount
23+
,relationshipCount
1324
//,collect(codeUnit.name)[0..4] AS exampleNames
14-
RETURN anomalyCount AS `Anomalies`
15-
,authorityCount AS `Authorities`
16-
,bottleNeckCount AS `Bottlenecks`
17-
,bridgeCount AS `Bridges`
18-
,hubCount AS `Hubs`
19-
,outlierCount AS `Outliers`
20-
//,exampleNames
25+
RETURN anomalyCount AS `Anomalies`
26+
,authorityCount AS `Authorities`
27+
,bottleNeckCount AS `Bottlenecks`
28+
,bridgeCount AS `Bridges`
29+
,hubCount AS `Hubs`
30+
,outlierCount AS `Outliers`
31+
,nodeCount AS `CodeUnits`
32+
,relationshipCount AS `Dependencies`
33+
,round(toFloat(relationshipCount) / (nodeCount * (nodeCount - 1)), 6) AS `GraphDensity`
34+
//,exampleNames

0 commit comments

Comments
 (0)