Skip to content

Commit 5e8953f

Browse files
committed
config
1 parent b79bca8 commit 5e8953f

3 files changed

Lines changed: 25 additions & 2 deletions

File tree

src/MarkdownLd.Kb/Pipeline/TiktokenKnowledgeGraphExtractor.cs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,9 @@ public TokenizedKnowledgeExtractionResult Extract(IReadOnlyList<MarkdownDocument
3030
var segments = candidates.Select(candidate => CreateSegment(candidate, vectorSpace)).ToArray();
3131
var topics = _topicExtractor.Extract(candidates);
3232
var entityHints = _entityHintExtractor.Extract(documents);
33-
var relations = BuildRelations(segments).ToArray();
33+
var relations = _options.BuildAutoRelatedSegmentRelations
34+
? BuildRelations(segments).ToArray()
35+
: [];
3436
var facts = TokenizedKnowledgeFactFactory.Build(sections, segments, topics, entityHints, relations);
3537
return new TokenizedKnowledgeExtractionResult(facts, segments, vectorSpace);
3638
}

src/MarkdownLd.Kb/Pipeline/TiktokenKnowledgeGraphOptions.cs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@ public sealed record TiktokenKnowledgeGraphOptions
88

99
public TokenVectorWeighting Weighting { get; init; } = DefaultTokenVectorWeighting;
1010

11+
public bool BuildAutoRelatedSegmentRelations { get; init; } = true;
12+
1113
public int MaxRelatedSegments { get; init; } = DefaultMaxRelatedTokenSegments;
1214

1315
public int MinimumTokenCount { get; init; } = DefaultMinimumTokenCount;

tests/MarkdownLd.Kb.Tests/Integration/TiktokenKnowledgeGraphFlowTests.cs

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -224,13 +224,32 @@ public async Task Tiktoken_mode_builds_token_distance_graph_and_shows_language_d
224224
ukrainianToEnglishHits.ShouldBeLessThanOrEqualTo(ExpectedCrossLanguageMaximumHits);
225225
}
226226

227-
private static Task<MarkdownKnowledgeBuildResult> BuildTokenGraphAsync(string path, string markdown)
227+
[Test]
228+
public async Task Tiktoken_mode_can_disable_auto_related_segment_relations_and_keep_token_distance_search()
229+
{
230+
var result = await BuildTokenGraphAsync(
231+
EnglishPath,
232+
EnglishMarkdown,
233+
buildAutoRelatedSegmentRelations: false);
234+
235+
result.ExtractionMode.ShouldBe(MarkdownKnowledgeExtractionMode.Tiktoken);
236+
result.Graph.CanSearchByTokenDistance.ShouldBeTrue();
237+
238+
var hits = await CountTopHitsAsync(result.Graph, EnglishQueries);
239+
hits.ShouldBeGreaterThanOrEqualTo(ExpectedSameLanguageMinimumHits);
240+
}
241+
242+
private static Task<MarkdownKnowledgeBuildResult> BuildTokenGraphAsync(
243+
string path,
244+
string markdown,
245+
bool buildAutoRelatedSegmentRelations = true)
228246
{
229247
var pipeline = new MarkdownKnowledgePipeline(
230248
BaseUri,
231249
extractionMode: MarkdownKnowledgeExtractionMode.Tiktoken,
232250
tiktokenOptions: new TiktokenKnowledgeGraphOptions
233251
{
252+
BuildAutoRelatedSegmentRelations = buildAutoRelatedSegmentRelations,
234253
MaxRelatedSegments = 2,
235254
});
236255

0 commit comments

Comments
 (0)