Skip to content

Commit c453ff3

Browse files
Add coarse embedding rebuild progress logging
- Log embedding rebuild start with known total (when available) - Emit progress at 10% milestones when total chunk count is known - Fall back to every 500 chunks when total is unknown - Include current adaptive batch size in progress logs
1 parent 273d0cb commit c453ff3

1 file changed

Lines changed: 64 additions & 0 deletions

File tree

EssentialCSharp.Chat.Shared/Services/EmbeddingService.cs

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -382,8 +382,41 @@ public async Task GenerateBookContentEmbeddingsAndUploadToVectorStore(
382382
var configuredMaxBatchSize = Math.Clamp(_retryOptions.MaxEmbeddingBatchSize, 1, EmbeddingBatchSize);
383383
var adaptiveBatchSize = configuredMaxBatchSize;
384384
var buffer = new List<BookContentChunk>(configuredMaxBatchSize);
385+
var knownTotalChunks = bookContents.TryGetNonEnumeratedCount(out var totalChunkCount) ? totalChunkCount : (int?)null;
386+
var nextProgressPercentToLog = 10;
387+
var nextProgressChunkCountToLog = 500;
385388
int totalCount = 0;
386389

390+
if (_logger is not null)
391+
{
392+
LogEmbeddingRebuildStarted(
393+
_logger,
394+
knownTotalChunks,
395+
configuredMaxBatchSize,
396+
_retryOptions.MinInterRequestDelayMs);
397+
}
398+
399+
void LogProgressIfNeeded()
400+
{
401+
if (_logger is null)
402+
return;
403+
404+
if (knownTotalChunks is > 0)
405+
{
406+
while (nextProgressPercentToLog <= 100
407+
&& totalCount * 100 >= knownTotalChunks.Value * nextProgressPercentToLog)
408+
{
409+
LogEmbeddingProgressPercent(_logger, totalCount, knownTotalChunks.Value, nextProgressPercentToLog, adaptiveBatchSize);
410+
nextProgressPercentToLog += 10;
411+
}
412+
}
413+
else if (totalCount >= nextProgressChunkCountToLog)
414+
{
415+
LogEmbeddingProgressCount(_logger, totalCount, adaptiveBatchSize);
416+
nextProgressChunkCountToLog += 500;
417+
}
418+
}
419+
387420
async Task EmbedAndUpsertExactBatchAsync(IReadOnlyList<BookContentChunk> batch)
388421
{
389422
var batchEmbeddings = await ExecuteWithRetryAsync(
@@ -403,6 +436,7 @@ async Task EmbedAndUpsertExactBatchAsync(IReadOnlyList<BookContentChunk> batch)
403436

404437
await staging.UpsertAsync(batch, cancellationToken);
405438
totalCount += batch.Count;
439+
LogProgressIfNeeded();
406440
}
407441

408442
async Task EmbedAndUpsertBatchAdaptiveAsync(IReadOnlyList<BookContentChunk> batch)
@@ -558,4 +592,34 @@ private static partial void LogEmbeddingBatchDownshift(
558592
int previousBatchSize,
559593
int newBatchSize,
560594
int retryAttemptsPerRequest);
595+
596+
[LoggerMessage(
597+
EventId = 12005,
598+
Level = LogLevel.Information,
599+
Message = "Embedding rebuild started. TotalChunks={TotalChunks}, InitialBatchSize={InitialBatchSize}, MinInterRequestDelayMs={MinInterRequestDelayMs}")]
600+
private static partial void LogEmbeddingRebuildStarted(
601+
ILogger logger,
602+
int? totalChunks,
603+
int initialBatchSize,
604+
int minInterRequestDelayMs);
605+
606+
[LoggerMessage(
607+
EventId = 12006,
608+
Level = LogLevel.Information,
609+
Message = "Embedding progress: {ProcessedChunks}/{TotalChunks} chunks ({ProgressPercent}%). CurrentAdaptiveBatchSize={AdaptiveBatchSize}")]
610+
private static partial void LogEmbeddingProgressPercent(
611+
ILogger logger,
612+
int processedChunks,
613+
int totalChunks,
614+
int progressPercent,
615+
int adaptiveBatchSize);
616+
617+
[LoggerMessage(
618+
EventId = 12007,
619+
Level = LogLevel.Information,
620+
Message = "Embedding progress: {ProcessedChunks} chunks processed. CurrentAdaptiveBatchSize={AdaptiveBatchSize}")]
621+
private static partial void LogEmbeddingProgressCount(
622+
ILogger logger,
623+
int processedChunks,
624+
int adaptiveBatchSize);
561625
}

0 commit comments

Comments
 (0)