Skip to content

Commit 1a293f6

Browse files
committed
Improve readability of anomaly detection feature plots
1 parent 0376a80 commit 1a293f6

2 files changed

Lines changed: 22 additions & 11 deletions

File tree

domains/anomaly-detection/explore/AnomalyDetectionIsolationForestExploration.ipynb

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -367,9 +367,6 @@
367367
"def plot_feature_correlation_matrix(features: pd.DataFrame) -> None:\n",
368368
" \"\"\"\n",
369369
" Plots the correlation matrix of the features in the DataFrame.\n",
370-
" \n",
371-
" :param java_package_anomaly_detection_features: DataFrame containing the features.\n",
372-
" :param java_package_features_to_standardize: List of feature names to include in the correlation matrix.\n",
373370
" \"\"\"\n",
374371
" correlation_matrix = features.corr()\n",
375372
"\n",
@@ -381,7 +378,7 @@
381378
" axis.set_xticklabels(correlation_matrix.columns, rotation=90)\n",
382379
" axis.set_yticklabels(correlation_matrix.index)\n",
383380
" for (i, j), correlation_value in np.ndenumerate(correlation_matrix.values):\n",
384-
" axis.text(j, i, f\"{correlation_value:.2f}\", ha='center', va='center', color='black', bbox=dict(facecolor='white', alpha=0.3, edgecolor='none'))\n",
381+
" axis.text(j, i, f\"{correlation_value:.2f}\", ha='center', va='center', color='black', bbox=dict(facecolor='white', alpha=0.3, edgecolor='none'), fontsize=6)\n",
385382
" plot.title(\"Feature Correlation Matrix (excluding embeddings)\", fontsize=10)\n",
386383
" plot.tight_layout()\n",
387384
" plot.show()"
@@ -946,8 +943,8 @@
946943
" x_position_column: str = 'embeddingVisualizationX',\n",
947944
" y_position_column: str = 'embeddingVisualizationY',\n",
948945
" annotate_top_n_anomalies: int = 10,\n",
949-
" annotate_top_n_non_anomalies: int = 5,\n",
950-
" annotate_top_n_clusters: int = 20,\n",
946+
" annotate_top_n_non_anomalies: int = 3,\n",
947+
" annotate_top_n_clusters: int = 10,\n",
951948
" percentile_of_distance_to_center: float = 0.8,\n",
952949
" no_cluster_coloring: bool = False,\n",
953950
") -> None:\n",
@@ -995,7 +992,11 @@
995992
" cluster_non_noise = cluster_without_anomalies[cluster_without_anomalies[cluster_label_column] != -1]\n",
996993
"\n",
997994
" plot.figure(figsize=(10, 10))\n",
998-
" plot.title(f\"{title_prefix} (size={size_column}, main-color=cluster, red=anomaly, green=non-anomaly)\", pad=20)\n",
995+
" plot.title(\n",
996+
" label=f\"{title_prefix} (size={size_column}, main-color=cluster, red=anomaly, green=non-anomaly)\", \n",
997+
" pad=30,\n",
998+
" bbox=dict(facecolor='white', edgecolor='none', pad=2, alpha=0.6)\n",
999+
" )\n",
9991000
"\n",
10001001
" # Plot noise (from clustering)\n",
10011002
" plot.scatter(\n",
@@ -1111,6 +1112,8 @@
11111112
" **plot_annotation_style\n",
11121113
" )\n",
11131114
"\n",
1115+
" plot.tight_layout(pad=0.2)\n",
1116+
" plot.axis('off')\n",
11141117
" plot.show()"
11151118
]
11161119
},
@@ -1263,6 +1266,8 @@
12631266
" **plot_annotation_style\n",
12641267
" )\n",
12651268
"\n",
1269+
" plot.tight_layout(pad=0.2)\n",
1270+
" plot.axis('off')\n",
12661271
" plot.show()"
12671272
]
12681273
},

domains/anomaly-detection/tunedAnomalyDetectionExplained.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -621,8 +621,8 @@ def plot_anomalies(
621621
return
622622

623623
annotate_top_n_anomalies: int = 10
624-
annotate_top_n_non_anomalies: int = 5
625-
annotate_top_n_clusters: int = 20
624+
annotate_top_n_non_anomalies: int = 3
625+
annotate_top_n_clusters: int = 10
626626

627627
features_to_visualize_zoomed=zoom_into_center_while_preserving_top_scores(
628628
features_to_visualize,
@@ -647,8 +647,11 @@ def get_common_plot_parameters(data: pd.DataFrame) -> dict:
647647
cluster_non_noise = cluster_without_anomalies[cluster_without_anomalies[cluster_label_column] != -1]
648648

649649
plot.figure(figsize=(10, 10))
650-
plot.title(f"{title_prefix} Anomalies (size={size_column}, main-color=cluster, red=anomaly, green=non-anomaly)", pad=20)
651-
650+
plot.title(
651+
label=f"{title_prefix} Anomalies (size={size_column}, main-color=cluster, red=anomaly, green=non-anomaly)",
652+
pad=30,
653+
bbox=dict(facecolor='white', edgecolor='none', pad=2, alpha=0.6)
654+
)
652655
# Plot noise (from clustering)
653656
plot.scatter(
654657
**get_common_plot_parameters(cluster_noise),
@@ -712,6 +715,9 @@ def get_common_plot_parameters(data: pd.DataFrame) -> dict:
712715
color="red",
713716
)
714717

718+
plot.tight_layout(pad=0.2)
719+
plot.axis('off')
720+
715721
plot.savefig(plot_file_path)
716722
plot.close()
717723

0 commit comments

Comments
 (0)