|
407 | 407 | " return scaler.fit_transform(features_to_scale)" |
408 | 408 | ] |
409 | 409 | }, |
| 410 | + { |
| 411 | + "cell_type": "code", |
| 412 | + "execution_count": null, |
| 413 | + "id": "04d510a6", |
| 414 | + "metadata": {}, |
| 415 | + "outputs": [], |
| 416 | + "source": [ |
| 417 | + "def remove_constant_features(features: pd.DataFrame, feature_names: list[str]) -> list[str]:\n", |
| 418 | + " \"\"\"\n", |
| 419 | + " Removes constant features from the feature list.\n", |
| 420 | + " \"\"\"\n", |
| 421 | + " non_constant_features = []\n", |
| 422 | + " for feature in feature_names:\n", |
| 423 | + " if features[feature].nunique() > 1:\n", |
| 424 | + " non_constant_features.append(feature)\n", |
| 425 | + " else:\n", |
| 426 | + " print(f\"Removed constant feature: {feature}\")\n", |
| 427 | + " return non_constant_features" |
| 428 | + ] |
| 429 | + }, |
410 | 430 | { |
411 | 431 | "cell_type": "code", |
412 | 432 | "execution_count": null, |
413 | 433 | "id": "2de5ade1", |
414 | 434 | "metadata": {}, |
415 | 435 | "outputs": [], |
416 | 436 | "source": [ |
417 | | - "java_package_anomaly_detection_features_standardized = standardize_features(java_package_anomaly_detection_features, java_package_features_to_standardize)" |
| 437 | + "java_package_anomaly_detection_feature_names_to_standardize = remove_constant_features(java_package_anomaly_detection_features, java_package_features_to_standardize)\n", |
| 438 | + "java_package_anomaly_detection_features_standardized = standardize_features(java_package_anomaly_detection_features, java_package_anomaly_detection_feature_names_to_standardize)" |
418 | 439 | ] |
419 | 440 | }, |
420 | 441 | { |
|
491 | 512 | "outputs": [], |
492 | 513 | "source": [ |
493 | 514 | "java_package_anomaly_detection_features_prepared = np.hstack([java_package_anomaly_detection_features_standardized, java_package_anomaly_detection_node_embeddings_reduced])\n", |
494 | | - "java_package_anomaly_detection_feature_names = list(java_package_features_to_standardize) + [f'nodeEmbeddingPCA_{i}' for i in range(java_package_anomaly_detection_node_embeddings_reduced.shape[1])]" |
| 515 | + "java_package_anomaly_detection_feature_names = list(java_package_anomaly_detection_feature_names_to_standardize) + [f'nodeEmbeddingPCA_{i}' for i in range(java_package_anomaly_detection_node_embeddings_reduced.shape[1])]" |
495 | 516 | ] |
496 | 517 | }, |
497 | 518 | { |
|
1986 | 2007 | "outputs": [], |
1987 | 2008 | "source": [ |
1988 | 2009 | "validate_data(java_type_anomaly_detection_features)\n", |
1989 | | - "java_type_anomaly_detection_features_standardized = standardize_features(java_type_anomaly_detection_features, java_type_features_to_standardize)\n", |
| 2010 | + "java_type_anomaly_detection_feature_names_to_standardize = remove_constant_features(java_type_anomaly_detection_features, java_type_features_to_standardize )\n", |
| 2011 | + "java_type_anomaly_detection_features_standardized = standardize_features(java_type_anomaly_detection_features, java_type_anomaly_detection_feature_names_to_standardize)\n", |
1990 | 2012 | "java_type_anomaly_detection_node_embeddings_reduced = reduce_dimensionality_of_node_embeddings(java_type_anomaly_detection_features, max_dimensions=35)\n", |
1991 | 2013 | "\n", |
1992 | 2014 | "java_type_anomaly_detection_features_prepared = np.hstack([java_type_anomaly_detection_features_standardized, java_type_anomaly_detection_node_embeddings_reduced])\n", |
1993 | | - "java_type_anomaly_detection_feature_names = list(java_type_features_to_standardize) + [f'nodeEmbeddingPCA_{i}' for i in range(java_type_anomaly_detection_node_embeddings_reduced.shape[1])]\n", |
| 2015 | + "java_type_anomaly_detection_feature_names = list(java_type_anomaly_detection_feature_names_to_standardize) + [f'nodeEmbeddingPCA_{i}' for i in range(java_type_anomaly_detection_node_embeddings_reduced.shape[1])]\n", |
1994 | 2016 | "\n", |
1995 | | - "plot_feature_correlation_matrix(java_type_anomaly_detection_features[java_type_features_to_standardize])" |
| 2017 | + "plot_feature_correlation_matrix(java_type_anomaly_detection_features[java_type_anomaly_detection_feature_names_to_standardize])" |
1996 | 2018 | ] |
1997 | 2019 | }, |
1998 | 2020 | { |
|
0 commit comments