Skip to content

Commit 7c42767

Browse files
committed
Add relative strong component size as feature to anomaly detection exploration
1 parent 6925d45 commit 7c42767

2 files changed

Lines changed: 9 additions & 1 deletion

File tree

domains/anomaly-detection/explore/AnomalyDetectionIsolationForestExploration.ipynb

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,8 @@
3636
"| `Normalized Cluster Distance` | Geometric | Relative to cluster radius | Adds context to position |\n",
3737
"| `1.0 - HDBSCAN membership probability` | Cluster Confidence | How confidently HDBSCAN clustered this node, 1-x inverted | High score = likely anomaly |\n",
3838
"| `Average Cluster Radius` | Cluster Context | How tight or spread out the cluster is | Highly spread clusters may be a less meaningful one |\n",
39-
"| `Abstractness` (Robert C. Martin) | Design / OO Metric | Ratio of abstract types (interfaces, abstract classes) to total types | Indicates architectural intent; supports Dependency Inversion Principle and stability balance |\n"
39+
"| `Abstractness` (Robert C. Martin) | Design / OO Metric | Ratio of abstract types (interfaces, abstract classes) to total types | Indicates architectural intent; supports Dependency Inversion Principle and stability balance |\n",
40+
"| `Relative Strong Component Size (vs WCC Median)` | Structural / Graph Topology | Size of the node’s strongly connected component normalized by the median SCC size within its weakly connected component | Highlights unusually large cyclic dependency groups relative to local context; high values often indicate architectural tangles or stability issues |\n"
4041
]
4142
},
4243
{
@@ -210,6 +211,8 @@
210211
" ,coalesce(codeUnit.outgoingDependencies, 0) AS outgoingDependencies\n",
211212
" ,coalesce(codeUnit.fqn, codeUnit.globalFqn, codeUnit.fileName, codeUnit.signature, codeUnit.name) AS codeUnitName\n",
212213
" ,coalesce(artifactName, projectName, \"\") AS projectName\n",
214+
" OPTIONAL MATCH (codeUnit)-[:IN_STRONGLY_CONNECTED_COMPONENT]->(stronglyConnectedComponent:StronglyConnectedComponent)\n",
215+
" OPTIONAL MATCH (codeUnit)-[:IN_WEAKLY_CONNECTED_COMPONENT]->(weaklyConnectedComponent:WeaklyConnectedComponent)\n",
213216
" RETURN DISTINCT \n",
214217
" codeUnitName\n",
215218
" ,codeUnit.name AS shortCodeUnitName\n",
@@ -232,6 +235,7 @@
232235
" ,codeUnit.clusteringHDBSCANSize AS clusterSize\n",
233236
" ,codeUnit.clusteringHDBSCANLabel AS clusterLabel\n",
234237
" ,codeUnit.clusteringHDBSCANMedoid AS clusterMedoid\n",
238+
" ,coalesce(stronglyConnectedComponent.size / weaklyConnectedComponent.stronglyConnectedComponentSizePercentile50, 1.0) AS stronglyConnectedComponentSizeRatio\n",
235239
" ,codeUnit.embeddingsFastRandomProjectionTunedForClusteringVisualizationX AS embeddingVisualizationX\n",
236240
" ,codeUnit.embeddingsFastRandomProjectionTunedForClusteringVisualizationY AS embeddingVisualizationY\n",
237241
" \"\"\"\n",

domains/anomaly-detection/tunedAnomalyDetectionExplained.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -255,6 +255,8 @@ def query_data(input_parameters: Parameters = Parameters.example()) -> pd.DataFr
255255
,coalesce(codeUnit.outgoingDependencies, 0) AS outgoingDependencies
256256
,coalesce(codeUnit.fqn, codeUnit.globalFqn, codeUnit.fileName, codeUnit.signature, codeUnit.name) AS codeUnitName
257257
,coalesce(artifactName, projectName, "") AS projectName
258+
OPTIONAL MATCH (codeUnit)-[:IN_STRONGLY_CONNECTED_COMPONENT]->(stronglyConnectedComponent:StronglyConnectedComponent)
259+
OPTIONAL MATCH (codeUnit)-[:IN_WEAKLY_CONNECTED_COMPONENT]->(weaklyConnectedComponent:WeaklyConnectedComponent)
258260
RETURN DISTINCT
259261
codeUnitName
260262
,codeUnit.name AS shortCodeUnitName
@@ -277,6 +279,7 @@ def query_data(input_parameters: Parameters = Parameters.example()) -> pd.DataFr
277279
,codeUnit.clusteringHDBSCANSize AS clusterSize
278280
,codeUnit.clusteringHDBSCANLabel AS clusterLabel
279281
,codeUnit.clusteringHDBSCANMedoid AS clusterMedoid
282+
,coalesce(stronglyConnectedComponent.size / weaklyConnectedComponent.stronglyConnectedComponentSizePercentile50, 1.0) AS stronglyConnectedComponentSizeRatio
280283
,codeUnit.embeddingsFastRandomProjectionTunedForClusteringVisualizationX AS embeddingVisualizationX
281284
,codeUnit.embeddingsFastRandomProjectionTunedForClusteringVisualizationY AS embeddingVisualizationY
282285
"""
@@ -500,6 +503,7 @@ def objective(trial) -> float:
500503
study.enqueue_trial({'isolation_max_samples': 0.42726366840740576, 'isolation_n_estimators': 141, 'proxy_n_estimators': 190, 'proxy_max_depth': 5})
501504
study.enqueue_trial({'isolation_max_samples': 0.40638732079782663, 'isolation_n_estimators': 108, 'proxy_n_estimators': 191, 'proxy_max_depth': 9})
502505

506+
study.enqueue_trial({'isolation_max_samples': 0.10105966483207725, 'isolation_n_estimators': 271, 'proxy_n_estimators': 237, 'proxy_max_depth': 9})
503507
study.enqueue_trial({'isolation_max_samples': 0.10010443935999927, 'isolation_n_estimators': 350, 'proxy_n_estimators': 344, 'proxy_max_depth': 8})
504508
study.enqueue_trial({'isolation_max_samples': 0.10015063610944819, 'isolation_n_estimators': 329, 'proxy_n_estimators': 314, 'proxy_max_depth': 8})
505509

0 commit comments

Comments
 (0)