Skip to content

Commit 5bd9a8b

Browse files
refactor: main function into config nd orchestrator
Refactored `Program.cs` to delegate main logic to a new `AppOrchestrator` class, improving modularity and separation of concerns. Introduced `AppConfig` to handle user-driven configuration, replacing hardcoded values. Updated `CommitFetcher.RunAsync` to support an optional `maxRepos` parameter for limiting repository fetches. Added `AppOrchestrator` to centralize the application workflow, including data fetching, ML pipeline setup, and interactive labeling. Implemented cache validation to ensure data integrity and reduce redundant operations. Performed minor cleanup, including removing unused `using` directives and simplifying the entry point. Overall, improved maintainability, flexibility, and user interactivity.
1 parent 8de2fdd commit 5bd9a8b

4 files changed

Lines changed: 129 additions & 74 deletions

File tree

ConsoleApp2/AppConfig.cs

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
using System;
2+
3+
namespace GitCommitAnalyser
4+
{
5+
public class AppConfig
6+
{
7+
public string OrgName { get; set; }
8+
public int? MaxRepos { get; set; }
9+
public string OutputPath { get; set; } = "repo_commits.json";
10+
public string KFilePath { get; set; } = "best_k.txt";
11+
public string LabelsFilePath { get; set; } = "cluster_labels.json";
12+
public string HashFilePath { get; set; } = "data_hash.txt";
13+
public string ModelFilePath { get; set; } = "kmeans_model.zip";
14+
15+
public static AppConfig PromptUserForConfiguration()
16+
{
17+
var config = new AppConfig();
18+
19+
Console.Write("Enter GitHub Organization name [default: CodeChefVIT]: ");
20+
var orgInput = Console.ReadLine();
21+
config.OrgName = string.IsNullOrWhiteSpace(orgInput) ? "CodeChefVIT" : orgInput.Trim();
22+
23+
Console.Write("Enter Max Repos to fetch (leave blank for all) [default: all]: ");
24+
var maxReposInput = Console.ReadLine();
25+
if (int.TryParse(maxReposInput, out int count) && count > 0)
26+
{
27+
config.MaxRepos = count;
28+
}
29+
30+
return config;
31+
}
32+
}
33+
}

ConsoleApp2/AppOrchestrator.cs

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
using System;
2+
using System.IO;
3+
using System.Threading.Tasks;
4+
using Microsoft.ML;
5+
6+
namespace GitCommitAnalyser
7+
{
8+
public class AppOrchestrator
9+
{
10+
private readonly AppConfig _config;
11+
12+
public AppOrchestrator(AppConfig config)
13+
{
14+
_config = config;
15+
}
16+
17+
public async Task RunAsync()
18+
{
19+
// 1. Ensure Data Exists
20+
if (!File.Exists(_config.OutputPath))
21+
{
22+
Console.WriteLine($"\nFile '{_config.OutputPath}' not found. Fetching commits for {_config.OrgName} from GitHub...");
23+
await CommitFetcher.RunAsync(_config.OrgName, _config.OutputPath, _config.MaxRepos);
24+
}
25+
26+
var mlContext = new MLContext(seed: 0);
27+
28+
// 2. Load data
29+
var dataView = Analyser.LoadJsonDataForML(mlContext, _config.OutputPath);
30+
if (dataView == null) return;
31+
32+
Console.WriteLine($"Loaded {dataView.GetRowCount()} rows into ML.NET IDataView.");
33+
34+
// 3. Check Data Integrity / Caching
35+
ValidateCache();
36+
37+
// 4. Split data into 80% train and 20% test
38+
var split = Analyser.SplitData(mlContext, dataView);
39+
40+
// 5. ML Training or Loading
41+
ITransformer model;
42+
if (File.Exists(_config.ModelFilePath))
43+
{
44+
Console.WriteLine($"Loading existing model from {_config.ModelFilePath}...");
45+
model = mlContext.Model.Load(_config.ModelFilePath, out var schema);
46+
}
47+
else
48+
{
49+
var featurizer = Analyser.FeaturizeText(mlContext);
50+
int bestK = Analyser.GetOrFindBestK(mlContext, split.TrainSet, featurizer, _config.KFilePath);
51+
52+
model = Analyser.TrainKMeansClusterer(mlContext, split.TrainSet, featurizer, bestK);
53+
54+
mlContext.Model.Save(model, split.TrainSet.Schema, _config.ModelFilePath);
55+
Console.WriteLine($"Saved newly trained model to {_config.ModelFilePath}.");
56+
}
57+
58+
var clusters = Analyser.GetClusters(mlContext, split.TrainSet, model);
59+
60+
// 6. Predict cluster names via Gemini (or load from cache)
61+
var clusterNames = await AiClusterLabeler.PredictClusterNamesAsync(clusters, _config.LabelsFilePath);
62+
63+
// 7. Print examples from each cluster with readable names
64+
Analyser.PrintClusterExamples(clusters, clusterNames);
65+
66+
// 8. Start the interactive labeling loop
67+
var interactiveLabeler = new CommitInteractiveLabeler(mlContext, model, clusterNames);
68+
interactiveLabeler.StartInteractiveLoop();
69+
}
70+
71+
private void ValidateCache()
72+
{
73+
string currentHash = Analyser.CalculateFileHash(_config.OutputPath);
74+
string savedHash = File.Exists(_config.HashFilePath) ? File.ReadAllText(_config.HashFilePath) : string.Empty;
75+
76+
if (currentHash != savedHash)
77+
{
78+
Console.WriteLine("Data file changed or hash not found. Invalidating cache (K, labels, model)...");
79+
if (File.Exists(_config.KFilePath)) File.Delete(_config.KFilePath);
80+
if (File.Exists(_config.LabelsFilePath)) File.Delete(_config.LabelsFilePath);
81+
if (File.Exists(_config.ModelFilePath)) File.Delete(_config.ModelFilePath);
82+
83+
File.WriteAllText(_config.HashFilePath, currentHash);
84+
}
85+
}
86+
}
87+
}

ConsoleApp2/CommitFetcher.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,15 +13,15 @@ public class CommitFetcher
1313
{
1414
private const int PROCESSED_COUNT = 10; // Adjust this value as needed to control how often the intermediate results are saved
1515

16-
public static async Task RunAsync(string orgName, string outputPath)
16+
public static async Task RunAsync(string orgName, string outputPath, int? maxRepos = null)
1717
{
1818
var github = new GitHubClient(new ProductHeaderValue("GitCommitAnalyser"));
1919
if (!ConfigureAuthentication(github))
2020
{
2121
return;
2222
}
2323

24-
var repoCommits = await FetchRepoCommitsAsync(github, orgName, outputPath);
24+
var repoCommits = await FetchRepoCommitsAsync(github, orgName, outputPath, maxRepos);
2525

2626
Console.WriteLine($"Successfully gathered commits across {repoCommits.Count} repositories.");
2727

ConsoleApp2/Program.cs

Lines changed: 7 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -1,86 +1,21 @@
1-
using System;
2-
using System.IO;
3-
using System.Threading.Tasks;
1+
using System.Threading.Tasks;
42
using DotNetEnv;
5-
using Microsoft.ML;
63

74
namespace GitCommitAnalyser
85
{
96
class Program
107
{
118
static async Task Main(string[] args)
129
{
13-
// cluster first then categorize
10+
// Load environment variables (.env file)
1411
Env.TraversePath().Load();
1512

16-
var orgName = "CodeChefVIT";
17-
var outputPath = "repo_commits.json";
18-
var kFilePath = "best_k.txt";
19-
20-
//await CommitFetcher.RunAsync(orgName, outputPath);
13+
// Prompt user for input and configuration
14+
var config = AppConfig.PromptUserForConfiguration();
2115

22-
var mlContext = new MLContext(seed: 0);
23-
24-
// 1. Load data
25-
var dataView = Analyser.LoadJsonDataForML(mlContext, outputPath);
26-
27-
var labelsFilePath = "cluster_labels.json";
28-
29-
if (dataView != null)
30-
{
31-
Console.WriteLine($"Loaded {dataView.GetRowCount()} rows into ML.NET IDataView.");
32-
33-
var hashFilePath = "data_hash.txt";
34-
var modelFilePath = "kmeans_model.zip";
35-
36-
string currentHash = Analyser.CalculateFileHash(outputPath);
37-
string savedHash = File.Exists(hashFilePath) ? File.ReadAllText(hashFilePath) : string.Empty;
38-
39-
if (currentHash != savedHash)
40-
{
41-
Console.WriteLine("Data file changed or hash not found. Invalidating cache (K, labels, model)...");
42-
if (File.Exists(kFilePath)) File.Delete(kFilePath);
43-
if (File.Exists(labelsFilePath)) File.Delete(labelsFilePath);
44-
if (File.Exists(modelFilePath)) File.Delete(modelFilePath);
45-
File.WriteAllText(hashFilePath, currentHash);
46-
}
47-
48-
// 2. Split data into 80% train and 20% test
49-
var split = Analyser.SplitData(mlContext, dataView);
50-
51-
ITransformer model;
52-
if (File.Exists(modelFilePath))
53-
{
54-
Console.WriteLine($"Loading existing model from {modelFilePath}...");
55-
model = mlContext.Model.Load(modelFilePath, out var schema);
56-
}
57-
else
58-
{
59-
// 3. Featurize Text
60-
var featurizer = Analyser.FeaturizeText(mlContext);
61-
62-
// 4. Find Best K using Grid Search or load from file
63-
int bestK = Analyser.GetOrFindBestK(mlContext, split.TrainSet, featurizer, kFilePath);
64-
65-
// 5. Train KMeans Clusterer
66-
model = Analyser.TrainKMeansClusterer(mlContext, split.TrainSet, featurizer, bestK);
67-
68-
mlContext.Model.Save(model, split.TrainSet.Schema, modelFilePath);
69-
Console.WriteLine($"Saved newly trained model to {modelFilePath}.");
70-
}
71-
72-
var clusters = Analyser.GetClusters(mlContext, split.TrainSet, model);
73-
74-
// 6. Predict cluster names via Gemini (or load from cache)
75-
var clusterNames = await AiClusterLabeler.PredictClusterNamesAsync(clusters, labelsFilePath);
76-
77-
// 7. Print examples from each cluster with readable names
78-
Analyser.PrintClusterExamples(clusters, clusterNames);
79-
80-
// 8. Start the interactive labeling loop
81-
var interactiveLabeler = new CommitInteractiveLabeler(mlContext, model, clusterNames);
82-
interactiveLabeler.StartInteractiveLoop();
83-
}
16+
// Run the main application orchestrator
17+
var orchestrator = new AppOrchestrator(config);
18+
await orchestrator.RunAsync();
8419
}
8520
}
8621
}

0 commit comments

Comments
 (0)