1+ using System ;
2+ using System . Threading ;
3+ using System . Threading . Tasks ;
14using ManagedCode . AgentLightning . Core . Adapters ;
25using ManagedCode . AgentLightning . Core . Models ;
36using ManagedCode . AgentLightning . Core . Stores ;
@@ -15,6 +18,7 @@ public sealed class LitAgentRunner
1518 private readonly ILightningStore _store ;
1619 private readonly ILogger < LitAgentRunner > _logger ;
1720 private readonly TimeProvider _timeProvider ;
21+ private string ? _workerId ;
1822
1923 public LitAgentRunner (
2024 LightningAgent agent ,
@@ -37,7 +41,7 @@ public async Task<IReadOnlyList<LightningExecutionResult>> RunBatchAsync(int exp
3741
3842 var results = new List < LightningExecutionResult > ( expectedRollouts ) ;
3943
40- for ( var i = 0 ; i < expectedRollouts ; i ++ )
44+ while ( results . Count < expectedRollouts && ! cancellationToken . IsCancellationRequested )
4145 {
4246 var attempted = await _store . DequeueRolloutAsync ( cancellationToken ) . ConfigureAwait ( false ) ;
4347 if ( attempted is null )
@@ -47,29 +51,87 @@ public async Task<IReadOnlyList<LightningExecutionResult>> RunBatchAsync(int exp
4751
4852 try
4953 {
54+ var workerId = GetWorkerId ( ) ;
55+ attempted . Attempt . AttachWorker ( workerId ) ;
56+ attempted . Attempt . UpdateStatus ( AttemptStatus . Running ) ;
57+ attempted . Attempt . Touch ( _timeProvider . GetUtcNow ( ) ) ;
58+ await _store . UpdateAttemptAsync ( attempted . Attempt , cancellationToken ) . ConfigureAwait ( false ) ;
59+
5060 var execution = await _agent . ExecuteAsync ( attempted . Rollout . Input , cancellationToken ) . ConfigureAwait ( false ) ;
5161 results . Add ( execution ) ;
5262
5363 attempted . Attempt . UpdateStatus ( execution . Attempt . Status , execution . Attempt . EndTime ?? _timeProvider . GetUtcNow ( ) ) ;
5464 await _store . UpdateAttemptAsync ( attempted . Attempt , cancellationToken ) . ConfigureAwait ( false ) ;
55- await _store . UpdateRolloutStatusAsync ( attempted . Rollout . RolloutId , execution . Rollout . Status , execution . Rollout . EndTime , cancellationToken ) . ConfigureAwait ( false ) ;
5665
57- var span = BuildSpanFromResult ( attempted , execution ) ;
58- await _store . AddSpanAsync ( attempted . Rollout . RolloutId , attempted . Attempt . AttemptId , span , cancellationToken ) . ConfigureAwait ( false ) ;
66+ if ( execution . Attempt . Status == AttemptStatus . Succeeded )
67+ {
68+ await _store . UpdateRolloutStatusAsync (
69+ attempted . Rollout . RolloutId ,
70+ execution . Rollout . Status ,
71+ execution . Rollout . EndTime ?? _timeProvider . GetUtcNow ( ) ,
72+ cancellationToken ) . ConfigureAwait ( false ) ;
73+
74+ var sequenceId = await _store . GetNextSpanSequenceIdAsync (
75+ attempted . Rollout . RolloutId ,
76+ attempted . Attempt . AttemptId ,
77+ cancellationToken ) . ConfigureAwait ( false ) ;
78+
79+ var span = BuildSpanFromResult ( attempted , execution , sequenceId ) ;
80+ await _store . AddSpanAsync ( span , cancellationToken ) . ConfigureAwait ( false ) ;
81+ }
82+ else
83+ {
84+ await HandleAttemptFailureAsync ( attempted , cancellationToken ) . ConfigureAwait ( false ) ;
85+ }
5986 }
6087 catch ( Exception ex ) when ( ! cancellationToken . IsCancellationRequested )
6188 {
6289 attempted . Attempt . UpdateStatus ( AttemptStatus . Failed , _timeProvider . GetUtcNow ( ) ) ;
6390 await _store . UpdateAttemptAsync ( attempted . Attempt , cancellationToken ) . ConfigureAwait ( false ) ;
64- await _store . UpdateRolloutStatusAsync ( attempted . Rollout . RolloutId , RolloutStatus . Failed , _timeProvider . GetUtcNow ( ) , cancellationToken ) . ConfigureAwait ( false ) ;
91+ await HandleAttemptFailureAsync ( attempted , cancellationToken ) . ConfigureAwait ( false ) ;
6592 _logger . LogError ( ex , "Runner failed while executing rollout {RolloutId}." , attempted . Rollout . RolloutId ) ;
6693 }
6794 }
6895
6996 return results ;
7097 }
7198
72- private static SpanModel BuildSpanFromResult ( AttemptedRollout attempted , LightningExecutionResult execution )
99+ private string GetWorkerId ( ) =>
100+ _workerId ??= $ "runner-{ Environment . CurrentManagedThreadId } ";
101+
102+ private async Task HandleAttemptFailureAsync ( AttemptedRollout attempted , CancellationToken cancellationToken )
103+ {
104+ var shouldRetry = ShouldRetry ( attempted . Rollout , attempted . Attempt ) ;
105+ var status = shouldRetry ? RolloutStatus . Requeuing : RolloutStatus . Failed ;
106+ var endTime = shouldRetry ? ( DateTimeOffset ? ) null : _timeProvider . GetUtcNow ( ) ;
107+
108+ await _store . UpdateRolloutStatusAsync (
109+ attempted . Rollout . RolloutId ,
110+ status ,
111+ endTime ,
112+ cancellationToken ) . ConfigureAwait ( false ) ;
113+
114+ if ( shouldRetry )
115+ {
116+ _logger . LogInformation ( "Rollout {RolloutId} requeued after attempt {AttemptId} failed." , attempted . Rollout . RolloutId , attempted . Attempt . AttemptId ) ;
117+ }
118+ else
119+ {
120+ _logger . LogWarning ( "Rollout {RolloutId} failed after attempt {AttemptId}." , attempted . Rollout . RolloutId , attempted . Attempt . AttemptId ) ;
121+ }
122+ }
123+
124+ private static bool ShouldRetry ( Rollout rollout , Attempt attempt )
125+ {
126+ if ( attempt . SequenceId >= rollout . Config . MaxAttempts )
127+ {
128+ return false ;
129+ }
130+
131+ return rollout . Config . RetryOn . Contains ( attempt . Status ) ;
132+ }
133+
134+ private static SpanModel BuildSpanFromResult ( AttemptedRollout attempted , LightningExecutionResult execution , int sequenceId )
73135 {
74136 var attributes = new Dictionary < string , object ? > ( StringComparer . Ordinal ) ;
75137 if ( execution . Triplet . Prompt is IEnumerable < object ? > prompts )
@@ -103,7 +165,7 @@ private static SpanModel BuildSpanFromResult(AttemptedRollout attempted, Lightni
103165 attributes ,
104166 rolloutId : attempted . Rollout . RolloutId ,
105167 attemptId : attempted . Attempt . AttemptId ,
106- sequenceId : attempted . Attempt . SequenceId ,
168+ sequenceId : sequenceId ,
107169 name : "agentlightning.completion" ,
108170 startTime : attempted . Attempt . StartTime . ToUnixTimeSeconds ( ) ,
109171 endTime : attempted . Attempt . EndTime ? . ToUnixTimeSeconds ( ) ) ;
0 commit comments