-
Notifications
You must be signed in to change notification settings - Fork 862
Expand file tree
/
Copy pathDataIngestor.cs
More file actions
32 lines (28 loc) · 1.23 KB
/
DataIngestor.cs
File metadata and controls
32 lines (28 loc) · 1.23 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
using Microsoft.Extensions.AI;
using Microsoft.Extensions.DataIngestion;
using Microsoft.Extensions.DataIngestion.Chunkers;
using Microsoft.Extensions.VectorData;
using Microsoft.ML.Tokenizers;
namespace AIChatWeb_CSharp.Web.Services.Ingestion;
public class DataIngestor(
ILogger<DataIngestor> logger,
ILoggerFactory loggerFactory,
VectorStoreCollection<Guid, IngestedChunk> vectorCollection,
IEmbeddingGenerator<string, Embedding<float>> embeddingGenerator)
{
public async Task IngestDataAsync(DirectoryInfo directory, string searchPattern)
{
using var writer = new VectorStoreWriter<string, IngestedChunk>(vectorCollection, new()
{
IncrementalIngestion = false,
});
using var pipeline = new IngestionPipeline<string>(
chunker: new SemanticSimilarityChunker(embeddingGenerator, new(TiktokenTokenizer.CreateForModel("gpt-4o"))),
writer: writer,
loggerFactory: loggerFactory);
await foreach (var result in pipeline.ProcessAsync(new DocumentReader(directory), directory, searchPattern))
{
logger.LogInformation("Completed processing '{id}'. Succeeded: '{succeeded}'.", result.DocumentId, result.Succeeded);
}
}
}