@@ -20,18 +20,11 @@ public class CommitPredictionWithData : CommitMLData
2020 public float [ ] Distances { get ; set ; }
2121 }
2222
23- public class EDA
24- {
25-
26- }
27- public class DataPoint
28- {
29- [ VectorType ( 2 ) ]
30- public float [ ] Features { get ; set ; }
31- }
3223
3324 internal class Analyser
3425 {
26+ private const string FeaturesColumnName = "Features" ;
27+
3528 public static IDataView LoadJsonDataForML ( MLContext mlContext , string jsonFilePath )
3629 {
3730 if ( ! File . Exists ( jsonFilePath ) )
@@ -73,7 +66,7 @@ public static DataOperationsCatalog.TrainTestData SplitData(MLContext mlContext,
7366
7467 public static IEstimator < ITransformer > FeaturizeText ( MLContext mlContext )
7568 {
76- return mlContext . Transforms . Text . FeaturizeText ( "Features" , nameof ( CommitMLData . CommitName ) ) ;
69+ return mlContext . Transforms . Text . FeaturizeText ( FeaturesColumnName , nameof ( CommitMLData . CommitName ) ) ;
7770 }
7871
7972 public static int GetOrFindBestK ( MLContext mlContext , IDataView trainData , IDataView testData , IEstimator < ITransformer > featurizer , string kFilePath )
@@ -93,11 +86,11 @@ public static int GetOrFindBestK(MLContext mlContext, IDataView trainData, IData
9386
9487 for ( int k = 2 ; k <= 10 ; k ++ )
9588 {
96- var pipeline = featurizer . Append ( mlContext . Clustering . Trainers . KMeans ( featureColumnName : "Features" , numberOfClusters : k ) ) ;
89+ var pipeline = featurizer . Append ( mlContext . Clustering . Trainers . KMeans ( featureColumnName : FeaturesColumnName , numberOfClusters : k ) ) ;
9790 var model = pipeline . Fit ( trainData ) ;
9891
9992 var predictions = model . Transform ( testData ) ;
100- var metrics = mlContext . Clustering . Evaluate ( predictions , labelColumnName : null , scoreColumnName : "Score" , featureColumnName : "Features" ) ;
93+ var metrics = mlContext . Clustering . Evaluate ( predictions , labelColumnName : null , scoreColumnName : "Score" , featureColumnName : FeaturesColumnName ) ;
10194
10295 Console . WriteLine ( $ "K = { k } | Davies-Bouldin: { metrics . DaviesBouldinIndex : F4} | Avg Distance: { metrics . AverageDistance : F4} ") ;
10396
@@ -118,7 +111,7 @@ public static int GetOrFindBestK(MLContext mlContext, IDataView trainData, IData
118111
119112 public static ITransformer TrainKMeansClusterer ( MLContext mlContext , IDataView trainData , IEstimator < ITransformer > featurizer , int k )
120113 {
121- var pipeline = featurizer . Append ( mlContext . Clustering . Trainers . KMeans ( featureColumnName : "Features" , numberOfClusters : k ) ) ;
114+ var pipeline = featurizer . Append ( mlContext . Clustering . Trainers . KMeans ( featureColumnName : FeaturesColumnName , numberOfClusters : k ) ) ;
122115 return pipeline . Fit ( trainData ) ;
123116 }
124117
0 commit comments