Skip to content

Commit 2fbddf8

Browse files
refactor: split get clusters and print cluster examples into seperate functions
1 parent 5a3115e commit 2fbddf8

3 files changed

Lines changed: 13 additions & 6 deletions

File tree

ConsoleApp2/Analyser.cs

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -119,13 +119,18 @@ public static ITransformer TrainKMeansClusterer(MLContext mlContext, IDataView t
119119
return pipeline.Fit(trainData);
120120
}
121121

122-
public static void PrintClusterExamples(MLContext mlContext, IDataView data, ITransformer model)
122+
public static IEnumerable<IGrouping<uint, CommitPredictionWithData>> GetClusters(MLContext mlContext, IDataView data, ITransformer model)
123123
{
124124
var predictions = model.Transform(data);
125125
var results = mlContext.Data.CreateEnumerable<CommitPredictionWithData>(predictions, reuseRowObject: false).ToList();
126-
127-
var clusters = results.GroupBy(x => x.PredictedClusterId).OrderBy(g => g.Key);
128-
126+
return results.GroupBy(x => x.PredictedClusterId).OrderBy(g => g.Key);
127+
}
128+
public static IEnumerable<IGrouping<uint, CommitPredictionWithData>> PredictClusterNames(IEnumerable<IGrouping<uint, CommitPredictionWithData>> clusters)
129+
{
130+
return null;
131+
}
132+
public static void PrintClusterExamples(IEnumerable<IGrouping<uint, CommitPredictionWithData>> clusters)
133+
{
129134
Console.WriteLine("\n--- Cluster Examples ---");
130135
foreach (var cluster in clusters)
131136
{

ConsoleApp2/Program.cs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,8 @@ static async Task Main(string[] args)
4141
var model = Analyser.TrainKMeansClusterer(mlContext, split.TrainSet, featurizer, bestK);
4242

4343
// 6. Print 2 examples from each cluster
44-
Analyser.PrintClusterExamples(mlContext, split.TrainSet, model);
44+
var clusters = Analyser.GetClusters(mlContext, split.TrainSet, model);
45+
Analyser.PrintClusterExamples(clusters);
4546
}
4647
}
4748
}

ConsoleApp2/TODO.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
1. Add vector embeddings instead of featurize text
22
2. Try out DB scan
33
3. Try Kmeans visualisation
4-
4. Label clusters using LLM
4+
4. Label clusters using LLM.
5+
5. Find a way to add lables to the trained model so that we can use it for classification as well.

0 commit comments

Comments
 (0)