Skip to content

Commit 4e330dd

Browse files
refactor: put the FeatureColumnName string in a global var
1 parent ccbf280 commit 4e330dd

1 file changed

Lines changed: 6 additions & 13 deletions

File tree

ConsoleApp2/Analyser.cs

Lines changed: 6 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -20,18 +20,11 @@ public class CommitPredictionWithData : CommitMLData
2020
public float[] Distances { get; set; }
2121
}
2222

23-
public class EDA
24-
{
25-
26-
}
27-
public class DataPoint
28-
{
29-
[VectorType(2)]
30-
public float[] Features { get; set; }
31-
}
3223

3324
internal class Analyser
3425
{
26+
private const string FeaturesColumnName = "Features";
27+
3528
public static IDataView LoadJsonDataForML(MLContext mlContext, string jsonFilePath)
3629
{
3730
if (!File.Exists(jsonFilePath))
@@ -73,7 +66,7 @@ public static DataOperationsCatalog.TrainTestData SplitData(MLContext mlContext,
7366

7467
public static IEstimator<ITransformer> FeaturizeText(MLContext mlContext)
7568
{
76-
return mlContext.Transforms.Text.FeaturizeText("Features", nameof(CommitMLData.CommitName));
69+
return mlContext.Transforms.Text.FeaturizeText(FeaturesColumnName, nameof(CommitMLData.CommitName));
7770
}
7871

7972
public static int GetOrFindBestK(MLContext mlContext, IDataView trainData, IDataView testData, IEstimator<ITransformer> featurizer, string kFilePath)
@@ -93,11 +86,11 @@ public static int GetOrFindBestK(MLContext mlContext, IDataView trainData, IData
9386

9487
for (int k = 2; k <= 10; k++)
9588
{
96-
var pipeline = featurizer.Append(mlContext.Clustering.Trainers.KMeans(featureColumnName: "Features", numberOfClusters: k));
89+
var pipeline = featurizer.Append(mlContext.Clustering.Trainers.KMeans(featureColumnName: FeaturesColumnName, numberOfClusters: k));
9790
var model = pipeline.Fit(trainData);
9891

9992
var predictions = model.Transform(testData);
100-
var metrics = mlContext.Clustering.Evaluate(predictions, labelColumnName: null, scoreColumnName: "Score", featureColumnName: "Features");
93+
var metrics = mlContext.Clustering.Evaluate(predictions, labelColumnName: null, scoreColumnName: "Score", featureColumnName: FeaturesColumnName);
10194

10295
Console.WriteLine($"K = {k} | Davies-Bouldin: {metrics.DaviesBouldinIndex:F4} | Avg Distance: {metrics.AverageDistance:F4}");
10396

@@ -118,7 +111,7 @@ public static int GetOrFindBestK(MLContext mlContext, IDataView trainData, IData
118111

119112
public static ITransformer TrainKMeansClusterer(MLContext mlContext, IDataView trainData, IEstimator<ITransformer> featurizer, int k)
120113
{
121-
var pipeline = featurizer.Append(mlContext.Clustering.Trainers.KMeans(featureColumnName: "Features", numberOfClusters: k));
114+
var pipeline = featurizer.Append(mlContext.Clustering.Trainers.KMeans(featureColumnName: FeaturesColumnName, numberOfClusters: k));
122115
return pipeline.Fit(trainData);
123116
}
124117

0 commit comments

Comments
 (0)