Skip to content

Commit aaf7716

Browse files
refactor: separate AI cluster labeller into its own seperate class
1 parent b57b7da commit aaf7716

4 files changed

Lines changed: 98 additions & 85 deletions

File tree

ConsoleApp2/AiClusterLabeler.cs

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
using System;
2+
using System.Collections.Generic;
3+
using System.IO;
4+
using System.Linq;
5+
using System.Text.Json;
6+
using System.Threading.Tasks;
7+
8+
namespace GitCommitAnalyser
9+
{
10+
public class AiClusterLabeler
11+
{
12+
public static async Task<Dictionary<uint, string>> PredictClusterNamesAsync(IEnumerable<IGrouping<uint, CommitPredictionWithData>> clusters, string labelsFilePath, string modelName = "gemini-2.5-flash", int commitProcessCount = 20)
13+
{
14+
if (File.Exists(labelsFilePath))
15+
{
16+
var json = File.ReadAllText(labelsFilePath);
17+
var dict = JsonSerializer.Deserialize<Dictionary<string, string>>(json);
18+
if (dict != null && dict.Count > 0)
19+
{
20+
Console.WriteLine($"Loaded cluster names from {labelsFilePath}.");
21+
return dict.ToDictionary(k => uint.Parse(k.Key), v => v.Value);
22+
}
23+
}
24+
25+
var apiKey = Environment.GetEnvironmentVariable("GEMINI_API_KEY");
26+
if (string.IsNullOrEmpty(apiKey))
27+
{
28+
Console.WriteLine("GEMINI_API_KEY environment variable not found. Using default cluster numeric names.");
29+
return clusters.ToDictionary(g => g.Key, g => $"Cluster {g.Key}");
30+
}
31+
32+
var clusterNames = new System.Collections.Generic.Dictionary<uint, string>();
33+
using var httpClient = new System.Net.Http.HttpClient();
34+
35+
Console.WriteLine("\nPredicting cluster names using Gemini API...");
36+
foreach (var cluster in clusters)
37+
{
38+
var commitsToUse = cluster.Take(commitProcessCount).Select(c => c.CommitName).ToList();
39+
var prompt = "Based on the following git commit messages, provide a short 1-3 word category name for this cluster.\n\n" +
40+
"Commits:\n" + string.Join("\n", commitsToUse) + "\n\nCategory name:";
41+
42+
var requestBody = new
43+
{
44+
contents = new[]
45+
{
46+
new
47+
{
48+
parts = new[] { new { text = prompt } }
49+
}
50+
}
51+
};
52+
53+
var url = $"https://generativelanguage.googleapis.com/v1beta/models/{modelName}:generateContent?key={apiKey}";
54+
var jsonContent = new System.Net.Http.StringContent(JsonSerializer.Serialize(requestBody), System.Text.Encoding.UTF8, "application/json");
55+
56+
try
57+
{
58+
var response = await httpClient.PostAsync(url, jsonContent);
59+
if (response.IsSuccessStatusCode)
60+
{
61+
var responseString = await response.Content.ReadAsStringAsync();
62+
using var doc = JsonDocument.Parse(responseString);
63+
var text = doc.RootElement
64+
.GetProperty("candidates")[0]
65+
.GetProperty("content")
66+
.GetProperty("parts")[0]
67+
.GetProperty("text").GetString();
68+
69+
var cleanedName = text?.Trim().TrimEnd('\r', '\n', '.', '\"', '\'')
70+
.Replace("'", ""); // Additional cleanup for single quotes
71+
clusterNames[cluster.Key] = string.IsNullOrWhiteSpace(cleanedName) ? $"Cluster {cluster.Key}" : cleanedName;
72+
Console.WriteLine($"Cluster {cluster.Key} predicted as: {clusterNames[cluster.Key]}");
73+
}
74+
else
75+
{
76+
Console.WriteLine($"Failed to predict name for Cluster {cluster.Key}. Status: {response.StatusCode}");
77+
clusterNames[cluster.Key] = $"Cluster {cluster.Key}";
78+
}
79+
}
80+
catch (System.Exception ex)
81+
{
82+
Console.WriteLine($"Error predicting name for Cluster {cluster.Key}: {ex.Message}");
83+
clusterNames[cluster.Key] = $"Cluster {cluster.Key}";
84+
}
85+
}
86+
87+
// Save for future runs
88+
var saveFormat = clusterNames.ToDictionary(k => k.Key.ToString(), v => v.Value);
89+
var jsonOut = JsonSerializer.Serialize(saveFormat, new JsonSerializerOptions { WriteIndented = true });
90+
File.WriteAllText(labelsFilePath, jsonOut);
91+
92+
return clusterNames;
93+
}
94+
}
95+
}

ConsoleApp2/Analyser.cs

Lines changed: 0 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -136,89 +136,6 @@ public static IEnumerable<IGrouping<uint, CommitPredictionWithData>> GetClusters
136136
return results.GroupBy(x => x.PredictedClusterId).OrderBy(g => g.Key);
137137
}
138138

139-
public static async Task<Dictionary<uint, string>> PredictClusterNamesAsync(IEnumerable<IGrouping<uint, CommitPredictionWithData>> clusters, string labelsFilePath, string modelName = "gemini-2.5-flash")
140-
{
141-
if (File.Exists(labelsFilePath))
142-
{
143-
var json = File.ReadAllText(labelsFilePath);
144-
var dict = JsonSerializer.Deserialize<Dictionary<string, string>>(json);
145-
if (dict != null && dict.Count > 0)
146-
{
147-
Console.WriteLine($"Loaded cluster names from {labelsFilePath}.");
148-
return dict.ToDictionary(k => uint.Parse(k.Key), v => v.Value);
149-
}
150-
}
151-
152-
var apiKey = Environment.GetEnvironmentVariable("GEMINI_API_KEY");
153-
if (string.IsNullOrEmpty(apiKey))
154-
{
155-
Console.WriteLine("GEMINI_API_KEY environment variable not found. Using default cluster numeric names.");
156-
return clusters.ToDictionary(g => g.Key, g => $"Cluster {g.Key}");
157-
}
158-
159-
var clusterNames = new System.Collections.Generic.Dictionary<uint, string>();
160-
using var httpClient = new System.Net.Http.HttpClient();
161-
162-
Console.WriteLine("\nPredicting cluster names using Gemini API...");
163-
foreach (var cluster in clusters)
164-
{
165-
var commitsToUse = cluster.Take(10).Select(c => c.CommitName).ToList();
166-
var prompt = "Based on the following git commit messages, provide a short 1-3 word category name for this cluster.\n\n" +
167-
"Commits:\n" + string.Join("\n", commitsToUse) + "\n\nCategory name:";
168-
169-
var requestBody = new
170-
{
171-
contents = new[]
172-
{
173-
new
174-
{
175-
parts = new[] { new { text = prompt } }
176-
}
177-
}
178-
};
179-
180-
var url = $"https://generativelanguage.googleapis.com/v1beta/models/{modelName}:generateContent?key={apiKey}";
181-
var jsonContent = new System.Net.Http.StringContent(JsonSerializer.Serialize(requestBody), System.Text.Encoding.UTF8, "application/json");
182-
183-
try
184-
{
185-
var response = await httpClient.PostAsync(url, jsonContent);
186-
if (response.IsSuccessStatusCode)
187-
{
188-
var responseString = await response.Content.ReadAsStringAsync();
189-
using var doc = JsonDocument.Parse(responseString);
190-
var text = doc.RootElement
191-
.GetProperty("candidates")[0]
192-
.GetProperty("content")
193-
.GetProperty("parts")[0]
194-
.GetProperty("text").GetString();
195-
196-
var cleanedName = text?.Trim().TrimEnd('\r', '\n', '.', '\"', '\'')
197-
.Replace("'", ""); // Additional cleanup for single quotes
198-
clusterNames[cluster.Key] = string.IsNullOrWhiteSpace(cleanedName) ? $"Cluster {cluster.Key}" : cleanedName;
199-
Console.WriteLine($"Cluster {cluster.Key} predicted as: {clusterNames[cluster.Key]}");
200-
}
201-
else
202-
{
203-
Console.WriteLine($"Failed to predict name for Cluster {cluster.Key}. Status: {response.StatusCode}");
204-
clusterNames[cluster.Key] = $"Cluster {cluster.Key}";
205-
}
206-
}
207-
catch (System.Exception ex)
208-
{
209-
Console.WriteLine($"Error predicting name for Cluster {cluster.Key}: {ex.Message}");
210-
clusterNames[cluster.Key] = $"Cluster {cluster.Key}";
211-
}
212-
}
213-
214-
// Save for future runs
215-
var saveFormat = clusterNames.ToDictionary(k => k.Key.ToString(), v => v.Value);
216-
var jsonOut = JsonSerializer.Serialize(saveFormat, new JsonSerializerOptions { WriteIndented = true });
217-
File.WriteAllText(labelsFilePath, jsonOut);
218-
219-
return clusterNames;
220-
}
221-
222139
public static void PrintClusterExamples(IEnumerable<IGrouping<uint, CommitPredictionWithData>> clusters, Dictionary<uint, string> clusterNames = null)
223140
{
224141
Console.WriteLine("\n--- Cluster Examples ---");

ConsoleApp2/Program.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ static async Task Main(string[] args)
7272
var clusters = Analyser.GetClusters(mlContext, split.TrainSet, model);
7373

7474
// 6. Predict cluster names via Gemini (or load from cache)
75-
var clusterNames = await Analyser.PredictClusterNamesAsync(clusters, labelsFilePath);
75+
var clusterNames = await AiClusterLabeler.PredictClusterNamesAsync(clusters, labelsFilePath);
7676

7777
// 7. Print examples from each cluster with readable names
7878
Analyser.PrintClusterExamples(clusters, clusterNames);

ConsoleApp2/TODO.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,5 @@
22
2. Try out DB scan
33
3. Try Kmeans visualisation
44
4. Label clusters using LLM.
5-
5. Find a way to add lables to the trained model so that we can use it for classification as well.
5+
5. Find a way to add lables to the trained model so that we can use it for classification as well.
6+
6. Add a CLI to input any text and classify it using the trained model.

0 commit comments

Comments
 (0)