@@ -385,6 +385,8 @@ public async Task GenerateBookContentEmbeddingsAndUploadToVectorStore(
385385 var knownTotalChunks = bookContents . TryGetNonEnumeratedCount ( out var totalChunkCount ) ? totalChunkCount : ( int ? ) null ;
386386 var nextProgressPercentToLog = 10 ;
387387 var nextProgressChunkCountToLog = 500 ;
388+ var successfulBatchRequestCounts = new Dictionary < int , int > ( ) ;
389+ var successfulBatchChunkTotals = new Dictionary < int , int > ( ) ;
388390 int totalCount = 0 ;
389391
390392 if ( _logger is not null )
@@ -419,12 +421,32 @@ void LogProgressIfNeeded()
419421
420422 async Task EmbedAndUpsertExactBatchAsync ( IReadOnlyList < BookContentChunk > batch )
421423 {
424+ const string operationName = "GenerateBatchEmbeddings" ;
425+ int attemptNumber = 0 ;
426+
422427 var batchEmbeddings = await ExecuteWithRetryAsync (
423- async ct => await ExecuteEmbeddingRequestWithPacingAsync (
424- async pacingCt => await embeddingGenerator . GenerateAsync (
425- batch . Select ( c => c . ChunkText ) , cancellationToken : pacingCt ) ,
426- ct ) ,
427- $ "GenerateBatchEmbeddings(size={ batch . Count } )",
428+ async ct =>
429+ {
430+ attemptNumber ++ ;
431+ if ( _logger is not null )
432+ {
433+ LogEmbeddingBatchRequestState (
434+ _logger ,
435+ operationName ,
436+ batch . Count ,
437+ adaptiveBatchSize ,
438+ batch . Count ,
439+ attemptNumber ,
440+ false ,
441+ false ) ;
442+ }
443+
444+ return await ExecuteEmbeddingRequestWithPacingAsync (
445+ async pacingCt => await embeddingGenerator . GenerateAsync (
446+ batch . Select ( c => c . ChunkText ) , cancellationToken : pacingCt ) ,
447+ ct ) ;
448+ } ,
449+ $ "{ operationName } (size={ batch . Count } )",
428450 cancellationToken ) ;
429451
430452 if ( batchEmbeddings . Count != batch . Count )
@@ -435,6 +457,25 @@ async Task EmbedAndUpsertExactBatchAsync(IReadOnlyList<BookContentChunk> batch)
435457 batch [ i ] . TextEmbedding = batchEmbeddings [ i ] . Vector ;
436458
437459 await staging . UpsertAsync ( batch , cancellationToken ) ;
460+ if ( _logger is not null )
461+ {
462+ LogEmbeddingBatchRequestState (
463+ _logger ,
464+ operationName ,
465+ batch . Count ,
466+ adaptiveBatchSize ,
467+ batch . Count ,
468+ attemptNumber ,
469+ true ,
470+ false ) ;
471+ }
472+
473+ if ( ! successfulBatchRequestCounts . TryAdd ( batch . Count , 1 ) )
474+ successfulBatchRequestCounts [ batch . Count ] ++ ;
475+
476+ if ( ! successfulBatchChunkTotals . TryAdd ( batch . Count , batch . Count ) )
477+ successfulBatchChunkTotals [ batch . Count ] += batch . Count ;
478+
438479 totalCount += batch . Count ;
439480 LogProgressIfNeeded ( ) ;
440481 }
@@ -454,7 +495,15 @@ async Task EmbedAndUpsertBatchAdaptiveAsync(IReadOnlyList<BookContentChunk> batc
454495 adaptiveBatchSize = splitSize ;
455496 if ( _logger is not null )
456497 {
457- LogEmbeddingBatchDownshift ( _logger , previousAdaptiveBatchSize , adaptiveBatchSize , _retryOptions . MaxRetries + 1 ) ;
498+ LogEmbeddingBatchRequestState (
499+ _logger ,
500+ "GenerateBatchEmbeddings" ,
501+ previousAdaptiveBatchSize ,
502+ adaptiveBatchSize ,
503+ batch . Count ,
504+ _retryOptions . MaxRetries + 1 ,
505+ false ,
506+ true ) ;
458507 }
459508 }
460509
@@ -491,6 +540,19 @@ async Task EmbedAndUpsertBatchAdaptiveAsync(IReadOnlyList<BookContentChunk> batc
491540 await EmbedAndUpsertBatchAdaptiveAsync ( batchToProcess ) ;
492541 }
493542
543+ if ( _logger is not null )
544+ {
545+ foreach ( var entry in successfulBatchRequestCounts . OrderBy ( kvp => kvp . Key ) )
546+ {
547+ successfulBatchChunkTotals . TryGetValue ( entry . Key , out var successfulChunkCount ) ;
548+ LogEmbeddingBatchSizeSummary (
549+ _logger ,
550+ entry . Key ,
551+ entry . Value ,
552+ successfulChunkCount ) ;
553+ }
554+ }
555+
494556 Console . WriteLine ( $ "Uploaded { totalCount } chunks to staging collection '{ stagingName } '.") ;
495557 }
496558 catch
@@ -585,13 +647,17 @@ private static partial void LogEmbeddingRetryAttemptsExhausted(
585647
586648 [ LoggerMessage (
587649 EventId = 12004 ,
588- Level = LogLevel . Warning ,
589- Message = "Embedding batch downshift triggered after throttling. PreviousBatchSize={PreviousBatchSize}, NewBatchSize={NewBatchSize}, RetryAttemptsPerRequest={RetryAttemptsPerRequest }" ) ]
590- private static partial void LogEmbeddingBatchDownshift (
650+ Level = LogLevel . Information ,
651+ Message = "Embedding batch request state: operation_name={operation_name} current_batch_size={current_batch_size} effective_batch_size={effective_batch_size} chunk_count_in_request={chunk_count_in_request} attempt_number={attempt_number} request_succeeded={request_succeeded} request_throttled={request_throttled }" ) ]
652+ private static partial void LogEmbeddingBatchRequestState (
591653 ILogger logger ,
592- int previousBatchSize ,
593- int newBatchSize ,
594- int retryAttemptsPerRequest ) ;
654+ string operation_name ,
655+ int current_batch_size ,
656+ int effective_batch_size ,
657+ int chunk_count_in_request ,
658+ int attempt_number ,
659+ bool request_succeeded ,
660+ bool request_throttled ) ;
595661
596662 [ LoggerMessage (
597663 EventId = 12005 ,
@@ -622,4 +688,14 @@ private static partial void LogEmbeddingProgressCount(
622688 ILogger logger ,
623689 int processedChunks ,
624690 int adaptiveBatchSize ) ;
691+
692+ [ LoggerMessage (
693+ EventId = 12008 ,
694+ Level = LogLevel . Information ,
695+ Message = "Embedding successful batch-size summary: successful_batch_size={successful_batch_size} successful_request_count={successful_request_count} successful_chunk_count={successful_chunk_count}" ) ]
696+ private static partial void LogEmbeddingBatchSizeSummary (
697+ ILogger logger ,
698+ int successful_batch_size ,
699+ int successful_request_count ,
700+ int successful_chunk_count ) ;
625701}
0 commit comments