Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -100,8 +100,8 @@ public static IServiceCollection AddAzureOpenAIServices(
// Configure AI options from configuration
services.Configure<AIOptions>(configuration.GetSection("AIOptions"));

// Configure retry options from configuration section
// Environment variables like EmbeddingRetry:MaxRetries will override defaults
// Configure retry options from configuration section.
// Environment variables can override via AIOptions__EmbeddingRetry__*.
services.AddOptions<EmbeddingRetryOptions>()
.Bind(configuration.GetSection(EmbeddingRetryOptions.SectionPath))
.ValidateDataAnnotations()
Expand Down
23 changes: 23 additions & 0 deletions EssentialCSharp.Chat.Shared/Models/EmbeddingRetryOptions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,20 @@ public sealed class EmbeddingRetryOptions
[Range(1, 600000)]
public int MaxDelayMs { get; set; } = 60000;

/// <summary>
/// Maximum embedding request payload size sent per API call.
/// The service may adaptively downshift below this value when throttled.
/// </summary>
[Range(1, 2048)]
public int MaxEmbeddingBatchSize { get; set; } = 2048;

/// <summary>
/// Minimum delay between embedding API requests in milliseconds.
/// This adds request pacing to reduce sustained rate-limit pressure.
/// </summary>
[Range(0, 600000)]
public int MinInterRequestDelayMs { get; set; } = 250;

/// <summary>
/// Exponential backoff multiplier. Each retry delay is multiplied by this value.
/// For example, with baseDelay=1000ms and multiplier=2.0:
Expand Down Expand Up @@ -74,6 +88,15 @@ public void Validate()
if (BaseDelayMs > MaxDelayMs)
throw new InvalidOperationException("BaseDelayMs must be less than or equal to MaxDelayMs.");

if (MaxEmbeddingBatchSize <= 0)
throw new InvalidOperationException("MaxEmbeddingBatchSize must be positive.");

if (MaxEmbeddingBatchSize > 2048)
throw new InvalidOperationException("MaxEmbeddingBatchSize cannot exceed Azure embedding API limit (2048).");

if (MinInterRequestDelayMs < 0)
throw new InvalidOperationException("MinInterRequestDelayMs must be non-negative.");

if (BackoffMultiplier < 1.0)
throw new InvalidOperationException("BackoffMultiplier must be >= 1.0.");

Expand Down
Loading
Loading