1+ using System . Diagnostics ;
12using System . Security . Cryptography ;
23using System . Text ;
34using System . Text . Json ;
89using GithubActionsOrchestrator . Models ;
910using Microsoft . AspNetCore . Mvc ;
1011using Microsoft . EntityFrameworkCore ;
12+ using OpenTelemetry . Resources ;
13+ using OpenTelemetry . Trace ;
1114using Prometheus ;
1215using Serilog ;
1316using Serilog . Events ;
@@ -18,6 +21,9 @@ public class Program
1821{
1922 public static AutoScalerConfiguration Config = new ( ) ;
2023
24+ internal const string ServiceName = "github-actions-orchestrator" ;
25+ internal static readonly ActivitySource OrchestratorActivitySource = new ( ServiceName ) ;
26+
2127 private static readonly Counter ProcessedJobCount = Metrics
2228 . CreateCounter ( "github_autoscaler_jobs_processed" , "Number of processed jobs" , labelNames : [ "org" , "size" ] ) ;
2329
@@ -95,6 +101,18 @@ public static void Main(string[] args)
95101
96102 WebApplicationBuilder builder = WebApplication . CreateBuilder ( args ) ;
97103 builder . Services . AddSerilog ( ) ;
104+ builder . Services . AddOpenTelemetry ( )
105+ . ConfigureResource ( resource => resource . AddService ( serviceName : ServiceName ) )
106+ . WithTracing ( tracing =>
107+ {
108+ tracing
109+ . AddSource ( ServiceName )
110+ . AddSource ( "Npgsql" )
111+ . AddAspNetCoreInstrumentation ( )
112+ . AddHttpClientInstrumentation ( )
113+ . AddOtlpExporter ( )
114+ . AddProcessor ( new Pyroscope . OpenTelemetry . PyroscopeSpanProcessor ( ) ) ;
115+ } ) ;
98116 builder . Services . AddSingleton < RunnerQueue > ( ) ;
99117 builder . Services . AddHostedService < PoolManager > ( ) ;
100118
@@ -209,6 +227,8 @@ public static bool LoadConfiguration()
209227
210228 private static async Task < IResult > GithubWebhookHandler ( HttpRequest request , [ FromServices ] HetznerCloudController cloud , [ FromServices ] ILogger < Program > logger , [ FromServices ] RunnerQueue poolMgr )
211229 {
230+ using var activity = OrchestratorActivitySource . StartActivity ( "webhook.github" ) ;
231+
212232 // Verify webhook HMAC
213233 request . EnableBuffering ( ) ;
214234 string requestBody ;
@@ -227,6 +247,7 @@ private static async Task<IResult> GithubWebhookHandler(HttpRequest request, [Fr
227247 }
228248 catch ( Exception ex )
229249 {
250+ activity ? . SetStatus ( ActivityStatusCode . Error , ex . Message ) ;
230251 logger . LogWarning ( $ "Webhook signature verification failed: { ex . Message } ") ;
231252 return Results . StatusCode ( 401 ) ;
232253 }
@@ -246,6 +267,8 @@ private static async Task<IResult> GithubWebhookHandler(HttpRequest request, [Fr
246267 return Results . StatusCode ( 201 ) ;
247268 }
248269
270+ activity ? . SetTag ( "github.action" , action ) ;
271+
249272 if ( ! json . RootElement . TryGetProperty ( "workflow_job" , out JsonElement workflowJson ) )
250273 {
251274 logger . LogDebug ( "Received a non-workflowJob request. Ignoring." ) ;
@@ -274,6 +297,9 @@ private static async Task<IResult> GithubWebhookHandler(HttpRequest request, [Fr
274297 string orgName = Config . TargetConfigs . FirstOrDefault ( x => x . Target == TargetType . Organization && x . Name . ToLower ( ) == orgNameRequest . ToLower ( ) ) ? . Name ?? orgNameRequest ;
275298 string repoName = Config . TargetConfigs . FirstOrDefault ( x => x . Target == TargetType . Repository && x . Name . ToLower ( ) == repoNameRequest . ToLower ( ) ) ? . Name ?? repoNameRequest ;
276299
300+ activity ? . SetTag ( "github.job_id" , jobId ) ;
301+ activity ? . SetTag ( "github.repo" , repoName ) ;
302+ activity ? . SetTag ( "github.org" , orgName ) ;
277303
278304 // Check if its an org or a repo
279305 if ( String . IsNullOrEmpty ( orgName ) )
@@ -292,81 +318,90 @@ private static async Task<IResult> GithubWebhookHandler(HttpRequest request, [Fr
292318 return Results . StatusCode ( 201 ) ;
293319 }
294320
295-
321+
296322 await using var db = new ActionsRunnerContext ( ) ;
297323
298324 try
299325 {
300326 switch ( action )
301327 {
302328 case "queued" :
303- await JobQueued ( logger , repoName , labels , orgName , poolMgr , isRepo ? TargetType . Repository : TargetType . Organization , jobId , jobUrl ) ;
329+ using ( OrchestratorActivitySource . StartActivity ( "webhook.github.queued" ) )
330+ {
331+ await JobQueued ( logger , repoName , labels , orgName , poolMgr , isRepo ? TargetType . Repository : TargetType . Organization , jobId , jobUrl ) ;
332+ }
304333 break ;
305334 case "in_progress" :
306- var dbWorkflow = await db . Jobs . FirstOrDefaultAsync ( x => x . GithubJobId == jobId ) ;
307- if ( dbWorkflow == null )
335+ using ( OrchestratorActivitySource . StartActivity ( "webhook.github.in_progress" ) )
308336 {
309- logger . LogWarning ( "Processing job on manually created runner" ) ;
310- Job progressJob = new ( )
337+ var dbWorkflow = await db . Jobs . FirstOrDefaultAsync ( x => x . GithubJobId == jobId ) ;
338+ if ( dbWorkflow == null )
311339 {
312- GithubJobId = jobId ,
313- Repository = repoName ,
314- Owner = isRepo ? repoName : orgName ,
315- State = JobState . InProgress ,
316- InProgressTime = DateTime . UtcNow ,
317- JobUrl = jobUrl ,
318- Orphan = true
319- } ;
320- await db . Jobs . AddAsync ( progressJob ) ;
321- }
322- else
323- {
324- dbWorkflow . State = JobState . InProgress ;
325- dbWorkflow . QueueTime = DateTime . UtcNow ;
340+ logger . LogWarning ( "Processing job on manually created runner" ) ;
341+ Job progressJob = new ( )
342+ {
343+ GithubJobId = jobId ,
344+ Repository = repoName ,
345+ Owner = isRepo ? repoName : orgName ,
346+ State = JobState . InProgress ,
347+ InProgressTime = DateTime . UtcNow ,
348+ JobUrl = jobUrl ,
349+ Orphan = true
350+ } ;
351+ await db . Jobs . AddAsync ( progressJob ) ;
352+ }
353+ else
354+ {
355+ dbWorkflow . State = JobState . InProgress ;
356+ dbWorkflow . QueueTime = DateTime . UtcNow ;
357+ }
358+ await db . SaveChangesAsync ( ) ;
359+ await JobInProgress ( workflowJson , logger , jobId , repoName , orgName ) ;
326360 }
327- await db . SaveChangesAsync ( ) ;
328- await JobInProgress ( workflowJson , logger , jobId , repoName , orgName ) ;
329361 break ;
330362 case "completed" :
331- string conclusion = String . Empty ;
332- if ( json . RootElement . TryGetProperty ( "conclusion" , out JsonElement conclusionJson ) )
363+ using ( OrchestratorActivitySource . StartActivity ( "webhook.github.completed" ) )
333364 {
334- conclusion = conclusionJson . GetString ( ) ?? string . Empty ;
335- }
365+ string conclusion = String . Empty ;
366+ if ( json . RootElement . TryGetProperty ( "conclusion" , out JsonElement conclusionJson ) )
367+ {
368+ conclusion = conclusionJson . GetString ( ) ?? string . Empty ;
369+ }
336370
337- var dbWorkflowComplete = await db . Jobs . FirstOrDefaultAsync ( x => x . GithubJobId == jobId ) ;
338- if ( dbWorkflowComplete == null )
339- {
340- logger . LogWarning ( $ "Completed webhook for unknown job { jobId } in { repoName } . Creating record.") ;
341- dbWorkflowComplete = new Job
371+ var dbWorkflowComplete = await db . Jobs . FirstOrDefaultAsync ( x => x . GithubJobId == jobId ) ;
372+ if ( dbWorkflowComplete == null )
342373 {
343- GithubJobId = jobId ,
344- Repository = repoName ,
345- Owner = isRepo ? repoName : orgName ,
346- State = JobState . Completed ,
347- CompleteTime = DateTime . UtcNow ,
348- Orphan = true
349- } ;
350- await db . Jobs . AddAsync ( dbWorkflowComplete ) ;
351- await db . SaveChangesAsync ( ) ;
352- return Results . StatusCode ( 201 ) ;
353- }
354- dbWorkflowComplete . CompleteTime = DateTime . UtcNow ;
355- bool wasCancelled = false ;
356- switch ( conclusion )
357- {
358- case "cancelled" :
359- dbWorkflowComplete . State = JobState . Cancelled ;
360- await db . SaveChangesAsync ( ) ;
361- wasCancelled = true ;
362- break ;
363- default :
364- dbWorkflowComplete . State = JobState . Completed ;
374+ logger . LogWarning ( $ "Completed webhook for unknown job { jobId } in { repoName } . Creating record.") ;
375+ dbWorkflowComplete = new Job
376+ {
377+ GithubJobId = jobId ,
378+ Repository = repoName ,
379+ Owner = isRepo ? repoName : orgName ,
380+ State = JobState . Completed ,
381+ CompleteTime = DateTime . UtcNow ,
382+ Orphan = true
383+ } ;
384+ await db . Jobs . AddAsync ( dbWorkflowComplete ) ;
365385 await db . SaveChangesAsync ( ) ;
366- break ;
367- }
386+ return Results . StatusCode ( 201 ) ;
387+ }
388+ dbWorkflowComplete . CompleteTime = DateTime . UtcNow ;
389+ bool wasCancelled = false ;
390+ switch ( conclusion )
391+ {
392+ case "cancelled" :
393+ dbWorkflowComplete . State = JobState . Cancelled ;
394+ await db . SaveChangesAsync ( ) ;
395+ wasCancelled = true ;
396+ break ;
397+ default :
398+ dbWorkflowComplete . State = JobState . Completed ;
399+ await db . SaveChangesAsync ( ) ;
400+ break ;
401+ }
368402
369- await JobCompleted ( logger , jobId , poolMgr , repoName , orgName , workflowJson , wasCancelled ) ;
403+ await JobCompleted ( logger , jobId , poolMgr , repoName , orgName , workflowJson , wasCancelled ) ;
404+ }
370405 break ;
371406 default :
372407 logger . LogWarning ( "Unknown action. Ignoring" ) ;
@@ -375,12 +410,14 @@ private static async Task<IResult> GithubWebhookHandler(HttpRequest request, [Fr
375410 }
376411 catch ( Exception ex )
377412 {
413+ activity ? . SetStatus ( ActivityStatusCode . Error , ex . Message ) ;
414+ activity ? . AddException ( ex ) ;
378415 // This should make the webhook as bad and the timer will redeliver it after a while
379416 Log . Error ( $ "Failed to process { action } webhook: { ex . Message } ") ;
380417 return Results . StatusCode ( 500 ) ;
381418 }
382419
383- // All was well
420+ // All was well
384421 return Results . StatusCode ( 201 ) ;
385422 }
386423
0 commit comments