11using Microsoft . ML . Data ;
2+ using System . Security . Cryptography ;
23using System . Text . Json ;
34using Microsoft . ML ;
45
@@ -25,6 +26,15 @@ internal class Analyser
2526 {
2627 private const string FeaturesColumnName = "Features" ;
2728
29+ public static string CalculateFileHash ( string filePath )
30+ {
31+ if ( ! File . Exists ( filePath ) ) return string . Empty ;
32+ using var sha256 = SHA256 . Create ( ) ;
33+ using var stream = File . OpenRead ( filePath ) ;
34+ var hash = sha256 . ComputeHash ( stream ) ;
35+ return BitConverter . ToString ( hash ) . Replace ( "-" , "" ) . ToLowerInvariant ( ) ;
36+ }
37+
2838 public static IDataView LoadJsonDataForML ( MLContext mlContext , string jsonFilePath )
2939 {
3040 if ( ! File . Exists ( jsonFilePath ) )
@@ -125,16 +135,97 @@ public static IEnumerable<IGrouping<uint, CommitPredictionWithData>> GetClusters
125135 var results = mlContext . Data . CreateEnumerable < CommitPredictionWithData > ( predictions , reuseRowObject : false ) . ToList ( ) ;
126136 return results . GroupBy ( x => x . PredictedClusterId ) . OrderBy ( g => g . Key ) ;
127137 }
128- public static IEnumerable < IGrouping < uint , CommitPredictionWithData > > PredictClusterNames ( IEnumerable < IGrouping < uint , CommitPredictionWithData > > clusters )
138+
139+ public static async Task < Dictionary < uint , string > > PredictClusterNamesAsync ( IEnumerable < IGrouping < uint , CommitPredictionWithData > > clusters , string labelsFilePath , string modelName = "gemini-2.5-flash" )
129140 {
130- return null ;
141+ if ( File . Exists ( labelsFilePath ) )
142+ {
143+ var json = File . ReadAllText ( labelsFilePath ) ;
144+ var dict = JsonSerializer . Deserialize < Dictionary < string , string > > ( json ) ;
145+ if ( dict != null && dict . Count > 0 )
146+ {
147+ Console . WriteLine ( $ "Loaded cluster names from { labelsFilePath } .") ;
148+ return dict . ToDictionary ( k => uint . Parse ( k . Key ) , v => v . Value ) ;
149+ }
150+ }
151+
152+ var apiKey = Environment . GetEnvironmentVariable ( "GEMINI_API_KEY" ) ;
153+ if ( string . IsNullOrEmpty ( apiKey ) )
154+ {
155+ Console . WriteLine ( "GEMINI_API_KEY environment variable not found. Using default cluster numeric names." ) ;
156+ return clusters . ToDictionary ( g => g . Key , g => $ "Cluster { g . Key } ") ;
157+ }
158+
159+ var clusterNames = new System . Collections . Generic . Dictionary < uint , string > ( ) ;
160+ using var httpClient = new System . Net . Http . HttpClient ( ) ;
161+
162+ Console . WriteLine ( "\n Predicting cluster names using Gemini API..." ) ;
163+ foreach ( var cluster in clusters )
164+ {
165+ var commitsToUse = cluster . Take ( 10 ) . Select ( c => c . CommitName ) . ToList ( ) ;
166+ var prompt = "Based on the following git commit messages, provide a short 1-3 word category name for this cluster.\n \n " +
167+ "Commits:\n " + string . Join ( "\n " , commitsToUse ) + "\n \n Category name:" ;
168+
169+ var requestBody = new
170+ {
171+ contents = new [ ]
172+ {
173+ new
174+ {
175+ parts = new [ ] { new { text = prompt } }
176+ }
177+ }
178+ } ;
179+
180+ var url = $ "https://generativelanguage.googleapis.com/v1beta/models/{ modelName } :generateContent?key={ apiKey } ";
181+ var jsonContent = new System . Net . Http . StringContent ( JsonSerializer . Serialize ( requestBody ) , System . Text . Encoding . UTF8 , "application/json" ) ;
182+
183+ try
184+ {
185+ var response = await httpClient . PostAsync ( url , jsonContent ) ;
186+ if ( response . IsSuccessStatusCode )
187+ {
188+ var responseString = await response . Content . ReadAsStringAsync ( ) ;
189+ using var doc = JsonDocument . Parse ( responseString ) ;
190+ var text = doc . RootElement
191+ . GetProperty ( "candidates" ) [ 0 ]
192+ . GetProperty ( "content" )
193+ . GetProperty ( "parts" ) [ 0 ]
194+ . GetProperty ( "text" ) . GetString ( ) ;
195+
196+ var cleanedName = text ? . Trim ( ) . TrimEnd ( '\r ' , '\n ' , '.' , '\" ' , '\' ' )
197+ . Replace ( "'" , "" ) ; // Additional cleanup for single quotes
198+ clusterNames [ cluster . Key ] = string . IsNullOrWhiteSpace ( cleanedName ) ? $ "Cluster { cluster . Key } " : cleanedName ;
199+ Console . WriteLine ( $ "Cluster { cluster . Key } predicted as: { clusterNames [ cluster . Key ] } ") ;
200+ }
201+ else
202+ {
203+ Console . WriteLine ( $ "Failed to predict name for Cluster { cluster . Key } . Status: { response . StatusCode } ") ;
204+ clusterNames [ cluster . Key ] = $ "Cluster { cluster . Key } ";
205+ }
206+ }
207+ catch ( System . Exception ex )
208+ {
209+ Console . WriteLine ( $ "Error predicting name for Cluster { cluster . Key } : { ex . Message } ") ;
210+ clusterNames [ cluster . Key ] = $ "Cluster { cluster . Key } ";
211+ }
212+ }
213+
214+ // Save for future runs
215+ var saveFormat = clusterNames . ToDictionary ( k => k . Key . ToString ( ) , v => v . Value ) ;
216+ var jsonOut = JsonSerializer . Serialize ( saveFormat , new JsonSerializerOptions { WriteIndented = true } ) ;
217+ File . WriteAllText ( labelsFilePath , jsonOut ) ;
218+
219+ return clusterNames ;
131220 }
132- public static void PrintClusterExamples ( IEnumerable < IGrouping < uint , CommitPredictionWithData > > clusters )
221+
222+ public static void PrintClusterExamples ( IEnumerable < IGrouping < uint , CommitPredictionWithData > > clusters , Dictionary < uint , string > clusterNames = null )
133223 {
134224 Console . WriteLine ( "\n --- Cluster Examples ---" ) ;
135225 foreach ( var cluster in clusters )
136226 {
137- Console . WriteLine ( $ "\n Cluster { cluster . Key } :") ;
227+ var name = clusterNames != null && clusterNames . TryGetValue ( cluster . Key , out var cn ) ? cn : $ "Cluster { cluster . Key } ";
228+ Console . WriteLine ( $ "\n { name } :") ;
138229 foreach ( var example in cluster . Take ( 2 ) ) // 2 examples each from each cluster
139230 {
140231 Console . WriteLine ( $ " - [{ example . Repository } ] { example . CommitName } ") ;
0 commit comments