forked from managedcode/graphrag
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathTokenTextChunkerBenchmarks.cs
More file actions
83 lines (70 loc) · 2.71 KB
/
TokenTextChunkerBenchmarks.cs
File metadata and controls
83 lines (70 loc) · 2.71 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
using BenchmarkDotNet.Attributes;
using GraphRag.Chunking;
using GraphRag.Config;
namespace ManagedCode.GraphRag.Benchmarks.Chunking;
[MemoryDiagnoser]
[HideColumns("Error", "StdDev", "RatioSD")]
public class TokenTextChunkerBenchmarks
{
private TokenTextChunker _chunker = null!;
private ChunkSlice[] _smallDocument = null!;
private ChunkSlice[] _mediumDocument = null!;
private ChunkSlice[] _largeDocument = null!;
private ChunkingConfig _config = null!;
[Params(512, 1024, 2048)]
public int ChunkSize { get; set; }
[Params(0, 64, 128)]
public int ChunkOverlap { get; set; }
[GlobalSetup]
public void Setup()
{
_chunker = new TokenTextChunker();
_config = new ChunkingConfig
{
Size = ChunkSize,
Overlap = ChunkOverlap,
Strategy = ChunkStrategyType.Tokens
};
// Generate plain text documents of different sizes
_smallDocument = new[] { new ChunkSlice("doc1", GeneratePlainTextDocument(1_000)) };
_mediumDocument = new[] { new ChunkSlice("doc1", GeneratePlainTextDocument(100_000)) };
_largeDocument = new[] { new ChunkSlice("doc1", GeneratePlainTextDocument(1_000_000)) };
}
[Benchmark(Baseline = true)]
public IReadOnlyList<TextChunk> ChunkSmallDocument()
{
return _chunker.Chunk(_smallDocument, _config);
}
[Benchmark]
public IReadOnlyList<TextChunk> ChunkMediumDocument()
{
return _chunker.Chunk(_mediumDocument, _config);
}
[Benchmark]
public IReadOnlyList<TextChunk> ChunkLargeDocument()
{
return _chunker.Chunk(_largeDocument, _config);
}
private static string GeneratePlainTextDocument(int approximateLength)
{
var sentences = new[]
{
"The quick brown fox jumps over the lazy dog. ",
"Lorem ipsum dolor sit amet, consectetur adipiscing elit. ",
"Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. ",
"Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris. ",
"Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore. ",
"Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia. ",
"Nemo enim ipsam voluptatem quia voluptas sit aspernatur aut odit aut fugit. ",
"Neque porro quisquam est, qui dolorem ipsum quia dolor sit amet. "
};
var result = new System.Text.StringBuilder(approximateLength + 200);
var index = 0;
while (result.Length < approximateLength)
{
result.Append(sentences[index % sentences.Length]);
index++;
}
return result.ToString();
}
}