diff --git a/cypher/Community_Detection/Community_Detection_3a_StronglyConnectedComponents_Estimate.cypher b/cypher/Community_Detection/Community_Detection_3a_StronglyConnectedComponents_Estimate.cypher new file mode 100644 index 000000000..f83d66809 --- /dev/null +++ b/cypher/Community_Detection/Community_Detection_3a_StronglyConnectedComponents_Estimate.cypher @@ -0,0 +1,25 @@ +// Community Detection Strongly Connected Components Estimate + +CALL gds.scc.write.estimate( + $dependencies_projection + '-cleaned', { + writeProperty: $dependencies_projection_write_property + ,consecutiveIds: true +}) + YIELD requiredMemory + ,nodeCount + ,relationshipCount + ,bytesMin + ,bytesMax + ,heapPercentageMin + ,heapPercentageMax + ,treeView + ,mapView +RETURN requiredMemory + ,nodeCount + ,relationshipCount + ,bytesMin + ,bytesMax + ,heapPercentageMin + ,heapPercentageMax + ,treeView + //,mapView //doesn't work on Windows with git bash jq version jq-1.7-dirty \ No newline at end of file diff --git a/cypher/Community_Detection/Community_Detection_3a_WeaklyConnectedComponents_Estimate.cypher b/cypher/Community_Detection/Community_Detection_3a_WeaklyConnectedComponents_Estimate.cypher index 663364e3e..24f2db6b8 100644 --- a/cypher/Community_Detection/Community_Detection_3a_WeaklyConnectedComponents_Estimate.cypher +++ b/cypher/Community_Detection/Community_Detection_3a_WeaklyConnectedComponents_Estimate.cypher @@ -1,9 +1,8 @@ -// Community Detection Label Propagation Estimate +// Community Detection Weakly Connected Components -CALL gds.labelPropagation.write.estimate( +CALL gds.wcc.write.estimate( $dependencies_projection + '-cleaned', { - relationshipWeightProperty: $dependencies_projection_weight_property - ,writeProperty: $dependencies_projection_write_property + writeProperty: $dependencies_projection_write_property ,consecutiveIds: true }) YIELD requiredMemory diff --git a/cypher/Community_Detection/Community_Detection_3b_StronglyConnectedComponents_Statistics.cypher b/cypher/Community_Detection/Community_Detection_3b_StronglyConnectedComponents_Statistics.cypher new file mode 100644 index 000000000..5e33a04f2 --- /dev/null +++ b/cypher/Community_Detection/Community_Detection_3b_StronglyConnectedComponents_Statistics.cypher @@ -0,0 +1,24 @@ +// Community Detection Strongly Connected Components Statistics + +CALL gds.scc.stats( + $dependencies_projection + '-cleaned', { + consecutiveIds: true +}) + YIELD componentCount + ,preProcessingMillis + ,computeMillis + ,postProcessingMillis + ,componentDistribution +RETURN componentCount + ,preProcessingMillis + ,computeMillis + ,postProcessingMillis + ,componentDistribution.min + ,componentDistribution.mean + ,componentDistribution.max + ,componentDistribution.p50 + ,componentDistribution.p75 + ,componentDistribution.p90 + ,componentDistribution.p95 + ,componentDistribution.p99 + ,componentDistribution.p999 \ No newline at end of file diff --git a/cypher/Community_Detection/Community_Detection_3b_WeaklyConnectedComponents_Statistics.cypher b/cypher/Community_Detection/Community_Detection_3b_WeaklyConnectedComponents_Statistics.cypher index 7c7ae48e2..2ea984167 100644 --- a/cypher/Community_Detection/Community_Detection_3b_WeaklyConnectedComponents_Statistics.cypher +++ b/cypher/Community_Detection/Community_Detection_3b_WeaklyConnectedComponents_Statistics.cypher @@ -2,8 +2,7 @@ CALL gds.wcc.stats( $dependencies_projection + '-cleaned', { - relationshipWeightProperty: $dependencies_projection_weight_property - ,consecutiveIds: true + consecutiveIds: true }) YIELD componentCount ,preProcessingMillis diff --git a/cypher/Community_Detection/Community_Detection_3c_StronglyConnectedComponents_Mutate.cypher b/cypher/Community_Detection/Community_Detection_3c_StronglyConnectedComponents_Mutate.cypher new file mode 100644 index 000000000..fe8e41a77 --- /dev/null +++ b/cypher/Community_Detection/Community_Detection_3c_StronglyConnectedComponents_Mutate.cypher @@ -0,0 +1,29 @@ +// Community Detection Strongly Connected Components Mutate + +CALL gds.scc.mutate( + $dependencies_projection + '-cleaned', { + mutateProperty: $dependencies_projection_write_property + ,consecutiveIds: true +}) + YIELD componentCount + ,nodePropertiesWritten + ,preProcessingMillis + ,computeMillis + ,mutateMillis + ,postProcessingMillis + ,componentDistribution +RETURN componentCount + ,nodePropertiesWritten + ,preProcessingMillis + ,computeMillis + ,mutateMillis + ,postProcessingMillis + ,componentDistribution.min + ,componentDistribution.mean + ,componentDistribution.max + ,componentDistribution.p50 + ,componentDistribution.p75 + ,componentDistribution.p90 + ,componentDistribution.p95 + ,componentDistribution.p99 + ,componentDistribution.p999 \ No newline at end of file diff --git a/cypher/Community_Detection/Community_Detection_3c_WeaklyConnectedComponents_Mutate.cypher b/cypher/Community_Detection/Community_Detection_3c_WeaklyConnectedComponents_Mutate.cypher index b964dacb6..12c187f1f 100644 --- a/cypher/Community_Detection/Community_Detection_3c_WeaklyConnectedComponents_Mutate.cypher +++ b/cypher/Community_Detection/Community_Detection_3c_WeaklyConnectedComponents_Mutate.cypher @@ -2,8 +2,7 @@ CALL gds.wcc.mutate( $dependencies_projection + '-cleaned', { - relationshipWeightProperty: $dependencies_projection_weight_property - ,mutateProperty: $dependencies_projection_write_property + mutateProperty: $dependencies_projection_write_property ,consecutiveIds: true }) YIELD componentCount diff --git a/cypher/Community_Detection/Community_Detection_3d_StrongyConnectedComponents_Stream.cypher b/cypher/Community_Detection/Community_Detection_3d_StrongyConnectedComponents_Stream.cypher new file mode 100644 index 000000000..ba84b7330 --- /dev/null +++ b/cypher/Community_Detection/Community_Detection_3d_StrongyConnectedComponents_Stream.cypher @@ -0,0 +1,19 @@ +// Community Detection Strongly Connected Components Stream + +CALL gds.scc.stream( + $dependencies_projection + '-cleaned', { + consecutiveIds: true +}) + YIELD nodeId, componentId + WITH componentId + ,gds.util.asNode(nodeId) AS member + WITH componentId + ,member + ,coalesce(member.fqn, member.fileName, member.name) AS memberName + WITH componentId + ,count(DISTINCT member) AS memberCount + ,collect(DISTINCT memberName) AS memberNames +RETURN componentId + ,memberCount + ,memberNames + ORDER BY memberCount DESC, componentId ASC \ No newline at end of file diff --git a/cypher/Community_Detection/Community_Detection_3d_WeaklyConnectedComponents_Stream.cypher b/cypher/Community_Detection/Community_Detection_3d_WeaklyConnectedComponents_Stream.cypher index be0e68bf1..edd7db547 100644 --- a/cypher/Community_Detection/Community_Detection_3d_WeaklyConnectedComponents_Stream.cypher +++ b/cypher/Community_Detection/Community_Detection_3d_WeaklyConnectedComponents_Stream.cypher @@ -2,8 +2,7 @@ CALL gds.wcc.stream( $dependencies_projection + '-cleaned', { - relationshipWeightProperty: $dependencies_projection_weight_property, - consecutiveIds: true + consecutiveIds: true }) YIELD nodeId, componentId WITH componentId diff --git a/cypher/Community_Detection/Community_Detection_3e_StronglyConnectedComponents_Write.cypher b/cypher/Community_Detection/Community_Detection_3e_StronglyConnectedComponents_Write.cypher new file mode 100644 index 000000000..d95ce1426 --- /dev/null +++ b/cypher/Community_Detection/Community_Detection_3e_StronglyConnectedComponents_Write.cypher @@ -0,0 +1,29 @@ +// Community Detection Strongly Connected Components write node property communityStronglyConnectedComponentId + +CALL gds.scc.write( + $dependencies_projection + '-cleaned', { + consecutiveIds: true + ,writeProperty: 'communityStronglyConnectedComponentId' +}) +YIELD componentCount + ,preProcessingMillis + ,computeMillis + ,writeMillis + ,postProcessingMillis + ,nodePropertiesWritten + ,componentDistribution +RETURN componentCount + ,preProcessingMillis + ,computeMillis + ,writeMillis + ,postProcessingMillis + ,nodePropertiesWritten + ,componentDistribution.min + ,componentDistribution.mean + ,componentDistribution.max + ,componentDistribution.p50 + ,componentDistribution.p75 + ,componentDistribution.p90 + ,componentDistribution.p95 + ,componentDistribution.p99 + ,componentDistribution.p999 \ No newline at end of file diff --git a/cypher/Community_Detection/Community_Detection_3e_WeaklyConnectedComponents_Write.cypher b/cypher/Community_Detection/Community_Detection_3e_WeaklyConnectedComponents_Write.cypher index f2d5f948a..b85c4f8f3 100644 --- a/cypher/Community_Detection/Community_Detection_3e_WeaklyConnectedComponents_Write.cypher +++ b/cypher/Community_Detection/Community_Detection_3e_WeaklyConnectedComponents_Write.cypher @@ -2,8 +2,7 @@ CALL gds.wcc.write( $dependencies_projection + '-cleaned', { - relationshipWeightProperty: $dependencies_projection_weight_property - ,consecutiveIds: true + consecutiveIds: true ,writeProperty: 'communityWeaklyConnectedComponentId' }) YIELD componentCount diff --git a/cypher/Community_Detection/Community_Detection_Summary.cypher b/cypher/Community_Detection/Community_Detection_Summary.cypher index 37aba2cce..2528043cb 100644 --- a/cypher/Community_Detection/Community_Detection_Summary.cypher +++ b/cypher/Community_Detection/Community_Detection_Summary.cypher @@ -2,17 +2,20 @@ MATCH (codeUnit) WHERE (codeUnit.incomingDependencies > 0 OR codeUnit.outgoingDependencies > 0) + AND (codeUnit.testMarkerInteger IS NULL OR codeUnit.testMarkerInteger = 0) AND $dependencies_projection_node IN LABELS(codeUnit) RETURN coalesce(codeUnit.fqn, codeUnit.fileName, codeUnit.signature, codeUnit.name) AS name ,codeUnit.name AS shortName - ,codeUnit.communityLouvainId AS louvainId - ,codeUnit.communityLouvainIntermediateIds AS louvainIntermediateIds - ,codeUnit.communityLeidenId AS leidenId - ,codeUnit.communityLeidenIntermediateIds AS leidenIntermediateIds - ,codeUnit.communityLeidenIdModularity AS leidenModularity - ,codeUnit.communityWeaklyConnectedComponentId AS weaklyConnectedComponentId - ,codeUnit.communityLabelPropagationId AS labelPropagationId - ,codeUnit.communityKCoreDecompositionValue AS kCoreDecompositionValue - ,codeUnit.communityMaximumKCutId AS maximumKCutId - ,codeUnit.incomingDependencies AS incomingDependencies - ,codeUnit.outgoingDependencies AS outgoingDependencies \ No newline at end of file + ,codeUnit.communityLouvainId AS louvainId + ,codeUnit.communityLouvainIntermediateIds AS louvainIntermediateIds + ,codeUnit.communityLeidenId AS leidenId + ,codeUnit.communityLeidenIntermediateIds AS leidenIntermediateIds + ,codeUnit.communityLeidenIdModularity AS leidenModularity + ,codeUnit.communityStronglyConnectedComponentId AS stronglyConnectedComponentId + ,codeUnit.communityWeaklyConnectedComponentId AS weaklyConnectedComponentId + ,codeUnit.communityLabelPropagationId AS labelPropagationId + ,codeUnit.communityKCoreDecompositionValue AS kCoreDecompositionValue + ,codeUnit.communityMaximumKCutId AS maximumKCutId + ,codeUnit.communityFastRpHdbscanLabel AS communityFastRpHdbscanLabel + ,codeUnit.incomingDependencies AS incomingDependencies + ,codeUnit.outgoingDependencies AS outgoingDependencies \ No newline at end of file diff --git a/scripts/reports/CommunityCsv.sh b/scripts/reports/CommunityCsv.sh index bbbc6cee9..d9b90316f 100755 --- a/scripts/reports/CommunityCsv.sh +++ b/scripts/reports/CommunityCsv.sh @@ -132,6 +132,41 @@ detectCommunitiesWithLeiden() { calculateCommunityMetrics "${@}" "${writePropertyName}" } +# Community Detection using the Strongly Connected Components Algorithm +# +# Required Parameters: +# - dependencies_projection=... +# Name prefix for the in-memory projection name for dependencies. Example: "package" +# - dependencies_projection_node=... +# Label of the nodes that will be used for the projection. Example: "Package" +detectCommunitiesWithStronglyConnectedComponents() { + local COMMUNITY_DETECTION_CYPHER_DIR="${CYPHER_DIR}/Community_Detection" + local PROJECTION_CYPHER_DIR="${CYPHER_DIR}/Dependencies_Projection" + + local writePropertyName="dependencies_projection_write_property=communityStronglyConnectedComponentId" + local writeLabelName="dependencies_projection_write_label=StronglyConnectedComponent" + + # Statistics + execute_cypher "${COMMUNITY_DETECTION_CYPHER_DIR}/Community_Detection_3a_StronglyConnectedComponents_Estimate.cypher" "${@}" "${writePropertyName}" + execute_cypher "${COMMUNITY_DETECTION_CYPHER_DIR}/Community_Detection_3b_StronglyConnectedComponents_Statistics.cypher" "${@}" + + # Run the algorithm and write the result into the in-memory projection ("mutate") + execute_cypher "${COMMUNITY_DETECTION_CYPHER_DIR}/Community_Detection_3c_StronglyConnectedComponents_Mutate.cypher" "${@}" "${writePropertyName}" + + # Stream to CSV + local nodeLabel + nodeLabel=$( extractQueryParameter "dependencies_projection_node" "${@}") + execute_cypher "${PROJECTION_CYPHER_DIR}/Dependencies_8_Stream_Mutated_Grouped.cypher" "${@}" "${writePropertyName}" > "${FULL_REPORT_DIRECTORY}/${nodeLabel}_Communities_Strongly_Connected_Components.csv" + #execute_cypher "${COMMUNITY_DETECTION_CYPHER_DIR}/Community_Detection_3d_StronglyConnectedComponents_Stream.cypher" "${@}" > "${FULL_REPORT_DIRECTORY}/${nodeLabel}_Communities_Strongly_Connected_Components.csv" + + # Update Graph (node properties and labels) using the already mutated property projection + execute_cypher "${PROJECTION_CYPHER_DIR}/Dependencies_9_Write_Mutated.cypher" "${@}" "${writePropertyName}" + execute_cypher "${PROJECTION_CYPHER_DIR}/Dependencies_10_Delete_Label.cypher" "${@}" "${writePropertyName}" "${writeLabelName}" + execute_cypher "${PROJECTION_CYPHER_DIR}/Dependencies_11_Add_Label.cypher" "${@}" "${writePropertyName}" "${writeLabelName}" + + calculateCommunityMetrics "${@}" "${writePropertyName}" +} + # Community Detection using the Weakly Connected Components Algorithm # # Required Parameters: @@ -139,8 +174,6 @@ detectCommunitiesWithLeiden() { # Name prefix for the in-memory projection name for dependencies. Example: "package" # - dependencies_projection_node=... # Label of the nodes that will be used for the projection. Example: "Package" -# - dependencies_projection_weight_property=... -# Name of the node property that contains the dependency weight. Example: "weight" detectCommunitiesWithWeaklyConnectedComponents() { local COMMUNITY_DETECTION_CYPHER_DIR="${CYPHER_DIR}/Community_Detection" local PROJECTION_CYPHER_DIR="${CYPHER_DIR}/Dependencies_Projection" @@ -470,7 +503,10 @@ detectCommunities() { time calculateLocalClusteringCoefficient "${@}" compareCommunityDetectionResults "${@}" - listAllResults "${@}" +} + +detectDirectedCommunities() { + time detectCommunitiesWithStronglyConnectedComponents "${@}" } # -- Java Artifact Community Detection --------------------------- @@ -482,8 +518,14 @@ ARTIFACT_GAMMA="dependencies_leiden_gamma=1.11" # default = 1.00 ARTIFACT_KCUT="dependencies_maxkcut=5" # default = 2 if createUndirectedDependencyProjection "${ARTIFACT_PROJECTION}" "${ARTIFACT_NODE}" "${ARTIFACT_WEIGHT}"; then - detectCommunities "${ARTIFACT_PROJECTION}" "${ARTIFACT_NODE}" "${ARTIFACT_WEIGHT}" "${ARTIFACT_GAMMA}" "${ARTIFACT_KCUT}" # "${ARTIFACT_NODE_EMBEDDINGS}" + detectCommunities "${ARTIFACT_PROJECTION}" "${ARTIFACT_NODE}" "${ARTIFACT_WEIGHT}" "${ARTIFACT_GAMMA}" "${ARTIFACT_KCUT}" writeLeidenModularity "${ARTIFACT_PROJECTION}" "${ARTIFACT_NODE}" "${ARTIFACT_WEIGHT}" + + if createDirectedDependencyProjection "${ARTIFACT_PROJECTION}" "${ARTIFACT_NODE}" "${ARTIFACT_WEIGHT}"; then + detectDirectedCommunities "${ARTIFACT_PROJECTION}" "${ARTIFACT_NODE}" "${ARTIFACT_WEIGHT}" + fi + + listAllResults "${ARTIFACT_PROJECTION}" "${ARTIFACT_NODE}" "${ARTIFACT_WEIGHT}" "${ARTIFACT_GAMMA}" "${ARTIFACT_KCUT}" fi # -- Java Package Community Detection ------------------------------- @@ -500,6 +542,11 @@ if createUndirectedDependencyProjection "${PACKAGE_PROJECTION}" "${PACKAGE_NODE} detectCommunitiesWithHDBSCAN "${PACKAGE_PROJECTION}" "${PACKAGE_NODE}" "${PACKAGE_WEIGHT}" + if createDirectedDependencyProjection "${PACKAGE_PROJECTION}" "${PACKAGE_NODE}" "${PACKAGE_WEIGHT}"; then + detectDirectedCommunities "${PACKAGE_PROJECTION}" "${PACKAGE_NODE}" "${PACKAGE_WEIGHT}" + fi + listAllResults "${PACKAGE_PROJECTION}" "${PACKAGE_NODE}" "${PACKAGE_WEIGHT}" "${PACKAGE_GAMMA}" "${PACKAGE_KCUT}" + # Package Community Detection - Special CSV Queries after update execute_cypher "${CYPHER_DIR}/Community_Detection/Which_package_community_spans_several_artifacts_and_how_are_the_packages_distributed.cypher" > "${FULL_REPORT_DIRECTORY}/Package_Communities_Leiden_That_Span_Multiple_Artifacts.csv" fi @@ -513,12 +560,18 @@ TYPE_GAMMA="dependencies_leiden_gamma=5.00" # default = 1.00 TYPE_KCUT="dependencies_maxkcut=100" # default = 2 if createUndirectedJavaTypeDependencyProjection "${TYPE_PROJECTION}"; then - detectCommunities "${TYPE_PROJECTION}" "${TYPE_NODE}" "${TYPE_WEIGHT}" "${TYPE_GAMMA}" "${TYPE_KCUT}" "${TYPE_NODE_EMBEDDINGS}" + detectCommunities "${TYPE_PROJECTION}" "${TYPE_NODE}" "${TYPE_WEIGHT}" "${TYPE_GAMMA}" "${TYPE_KCUT}" detectCommunitiesWithHDBSCAN "${TYPE_PROJECTION}" "${TYPE_NODE}" "${TYPE_WEIGHT}" + # Type Community Detection - Special CSV Queries after update execute_cypher "${CYPHER_DIR}/Community_Detection/Which_type_community_spans_several_artifacts_and_how_are_the_types_distributed.cypher" > "${FULL_REPORT_DIRECTORY}/Type_Communities_Leiden_That_Span_Multiple_Artifacts.csv" execute_cypher "${CYPHER_DIR}/Community_Detection/Type_communities_with_few_members_in_foreign_packages.cypher" > "${FULL_REPORT_DIRECTORY}/Type_communities_with_few_members_in_foreign_packages.csv" execute_cypher "${CYPHER_DIR}/Community_Detection/Type_communities_that_span_the_most_packages_with_type_statistics.cypher" > "${FULL_REPORT_DIRECTORY}/Type_communities_that_span_the_most_packages_with_type_statistics.csv" + + if createDirectedJavaTypeDependencyProjection "${TYPE_PROJECTION}"; then + detectDirectedCommunities "${TYPE_PROJECTION}" "${TYPE_NODE}" "${TYPE_WEIGHT}" + fi + listAllResults "${TYPE_PROJECTION}" "${TYPE_NODE}" "${TYPE_WEIGHT}" "${TYPE_GAMMA}" "${TYPE_KCUT}" fi # -- Typescript Module Community Detection ----------------------- @@ -532,6 +585,10 @@ MODULE_KCUT="dependencies_maxkcut=20" # default = 2 if createUndirectedDependencyProjection "${MODULE_LANGUAGE}" "${MODULE_PROJECTION}" "${MODULE_NODE}" "${MODULE_WEIGHT}"; then detectCommunities "${MODULE_PROJECTION}" "${MODULE_NODE}" "${MODULE_WEIGHT}" "${MODULE_GAMMA}" "${MODULE_KCUT}" + if createDirectedDependencyProjection "${MODULE_PROJECTION}" "${MODULE_NODE}" "${MODULE_WEIGHT}"; then + detectDirectedCommunities "${MODULE_PROJECTION}" "${MODULE_NODE}" "${MODULE_WEIGHT}" + fi + listAllResults "${MODULE_PROJECTION}" "${MODULE_NODE}" "${MODULE_WEIGHT}" "${MODULE_GAMMA}" "${MODULE_KCUT}" fi # ---------------------------------------------------------------