Skip to content

Commit 5b3721a

Browse files
Harden Azure OpenAI embedding retry for 429 throttling (#1114)
## Why Embedding generation could fail fast on transient Azure OpenAI throttling (HTTP 429), which interrupted full vector rebuild runs. We need resilient retry behavior that respects service guidance while still failing clearly when retries are exhausted. ## What changed - Added robust retry handling in `EmbeddingService` for transient failures, including `ClientResultException` status-based detection (429/5xx/408), exponential backoff, jitter, and Retry-After header support when present. - Kept failure behavior explicit: retries are logged with context, and final exhaustion throws a clear terminal exception with attempt details. - Preserved cancellation semantics: caller-requested cancellation is rethrown directly and not wrapped as retry exhaustion. - Improved staging cleanup behavior after failures so cleanup is best effort and does not mask the original error. - Replaced ad-hoc logger calls with source-generated `LoggerMessage` methods to match project logging conventions. ## Configuration and ASP.NET conventions - Introduced `EmbeddingRetryOptions` and bound it via standard options binding at `AIOptions:EmbeddingRetry`. - Added validation (data annotations plus runtime validation) and safe defaults. - Added `EmbeddingRetry` defaults in `EssentialCSharp.Web/appsettings.json`. - Added `MaxDelayMs` cap to prevent delay overflow/unbounded waits. ## Additional cleanup - Removed tracked `build_output.txt` and added it to `.gitignore`. - Removed obsolete `RetryOptions` model after renaming to avoid ambiguity with `Azure.Core.RetryOptions`. ## Validation - Built `EssentialCSharp.Chat.Shared/EssentialCSharp.Chat.Common.csproj` successfully after changes. - Ran dual review passes with Opus 4.6 and GPT-5.5 and incorporated findings.
1 parent 8fd1dd0 commit 5b3721a

5 files changed

Lines changed: 369 additions & 6 deletions

File tree

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ TestResults/
3232
!Michaelis_TableOfContents.docx
3333

3434
# Old or generated files to not commit
35+
build_output.txt
3536
wwwroot/sitemap.xml
3637
wwwroot/Chapters
3738
EssentialCSharp.Web/wwwroot/Chapters

EssentialCSharp.Chat.Shared/Extensions/ServiceCollectionExtensions.cs

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
11
using Azure.AI.OpenAI;
22
using Azure.Core;
33
using Azure.Identity;
4+
using EssentialCSharp.Chat.Common.Models;
45
using EssentialCSharp.Chat.Common.Services;
56
using Microsoft.Extensions.AI;
67
using Microsoft.Extensions.Configuration;
78
using Microsoft.Extensions.DependencyInjection;
9+
using Microsoft.Extensions.Options;
810
using Microsoft.SemanticKernel;
911
using Npgsql;
1012

@@ -65,6 +67,15 @@ public static IServiceCollection AddAzureOpenAIServices(
6567
.UseOpenTelemetry();
6668
#pragma warning restore SKEXP0010
6769

70+
// Ensure options are available even when caller provides AIOptions directly.
71+
services.AddOptions<EmbeddingRetryOptions>()
72+
.ValidateDataAnnotations()
73+
.Validate(options =>
74+
{
75+
options.Validate();
76+
return true;
77+
}, "Embedding retry configuration is invalid.");
78+
6879
// Register shared AI services
6980
services.AddSingleton<EmbeddingService>();
7081
services.AddSingleton<AISearchService>();
@@ -89,6 +100,17 @@ public static IServiceCollection AddAzureOpenAIServices(
89100
// Configure AI options from configuration
90101
services.Configure<AIOptions>(configuration.GetSection("AIOptions"));
91102

103+
// Configure retry options from configuration section
104+
// Environment variables like EmbeddingRetry:MaxRetries will override defaults
105+
services.AddOptions<EmbeddingRetryOptions>()
106+
.Bind(configuration.GetSection(EmbeddingRetryOptions.SectionPath))
107+
.ValidateDataAnnotations()
108+
.Validate(options =>
109+
{
110+
options.Validate();
111+
return true;
112+
}, "Embedding retry configuration is invalid.");
113+
92114
var aiOptions = configuration.GetSection("AIOptions").Get<AIOptions>();
93115
if (aiOptions == null)
94116
{
Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
using System.ComponentModel.DataAnnotations;
2+
3+
namespace EssentialCSharp.Chat.Common.Models;
4+
5+
/// <summary>
6+
/// Configuration options for retry logic when calling external services like Azure OpenAI.
7+
/// </summary>
8+
public sealed class EmbeddingRetryOptions
9+
{
10+
/// <summary>
11+
/// Configuration section path in appsettings.json.
12+
/// </summary>
13+
public const string SectionPath = "AIOptions:EmbeddingRetry";
14+
15+
/// <summary>
16+
/// Maximum number of retries for transient failures.
17+
/// Default is 5 retries (initial attempt + 5 retries = 6 total attempts).
18+
/// </summary>
19+
[Range(0, 20)]
20+
public int MaxRetries { get; set; } = 5;
21+
22+
/// <summary>
23+
/// Base delay in milliseconds before the first retry.
24+
/// Subsequent retries use exponential backoff: baseDelay * (backoffMultiplier ^ attemptNumber).
25+
/// Default is 1000ms (1 second).
26+
/// </summary>
27+
[Range(0, 600000)]
28+
public int BaseDelayMs { get; set; } = 1000;
29+
30+
/// <summary>
31+
/// Maximum delay in milliseconds for exponential backoff before jitter.
32+
/// This caps retry delays to avoid overflow and unbounded waits.
33+
/// </summary>
34+
[Range(1, 600000)]
35+
public int MaxDelayMs { get; set; } = 60000;
36+
37+
/// <summary>
38+
/// Exponential backoff multiplier. Each retry delay is multiplied by this value.
39+
/// For example, with baseDelay=1000ms and multiplier=2.0:
40+
/// - 1st retry: 1000ms
41+
/// - 2nd retry: 2000ms
42+
/// - 3rd retry: 4000ms
43+
/// - 4th retry: 8000ms
44+
/// Default is 2.0 (double each time).
45+
/// </summary>
46+
[Range(1.0, 10.0)]
47+
public double BackoffMultiplier { get; set; } = 2.0;
48+
49+
/// <summary>
50+
/// Maximum jitter fraction added to each retry delay to prevent thundering herd.
51+
/// Jitter is a random value in range [0, maxDelay * maxJitterFraction].
52+
/// For example, with maxJitterFraction=0.2 and delay=1000ms:
53+
/// actual delay will be between 1000ms and 1200ms.
54+
/// Default is 0.2 (20% jitter).
55+
/// </summary>
56+
[Range(0.0, 1.0)]
57+
public double MaxJitterFraction { get; set; } = 0.2;
58+
59+
/// <summary>
60+
/// Validates that configuration values are reasonable.
61+
/// </summary>
62+
/// <exception cref="InvalidOperationException">Thrown if configuration is invalid.</exception>
63+
public void Validate()
64+
{
65+
if (MaxRetries < 0)
66+
throw new InvalidOperationException("MaxRetries must be non-negative.");
67+
68+
if (BaseDelayMs < 0)
69+
throw new InvalidOperationException("BaseDelayMs must be non-negative.");
70+
71+
if (MaxDelayMs <= 0)
72+
throw new InvalidOperationException("MaxDelayMs must be positive.");
73+
74+
if (BaseDelayMs > MaxDelayMs)
75+
throw new InvalidOperationException("BaseDelayMs must be less than or equal to MaxDelayMs.");
76+
77+
if (BackoffMultiplier < 1.0)
78+
throw new InvalidOperationException("BackoffMultiplier must be >= 1.0.");
79+
80+
if (MaxJitterFraction < 0.0 || MaxJitterFraction > 1.0)
81+
throw new InvalidOperationException("MaxJitterFraction must be between 0.0 and 1.0.");
82+
}
83+
}

0 commit comments

Comments
 (0)