Skip to content

Commit ef2f8e3

Browse files
committed
code reveiw fix
1 parent 6c2a6ac commit ef2f8e3

File tree

8 files changed

+40
-46
lines changed

8 files changed

+40
-46
lines changed

AGENTS.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
- Keep default prompts in static C# classes; do not rely on prompt files under `prompts/` for built-in templates.
88
- Register language models through Microsoft.Extensions.AI keyed services; avoid bespoke `LanguageModelConfig` providers.
99
- Always run `dotnet format GraphRag.slnx` before finishing work.
10+
- Always run `dotnet test GraphRag.slnx` before finishing work, after building.
1011

1112
# Conversations
1213
any resulting updates to agents.md should go under the section "## Rules to follow"

Directory.Build.props

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,8 @@
2525
<RepositoryUrl>https://github.com/managedcode/graphrag</RepositoryUrl>
2626
<PackageProjectUrl>https://github.com/managedcode/graphrag</PackageProjectUrl>
2727
<Product>Managed Code GraphRag</Product>
28-
<Version>0.0.3</Version>
29-
<PackageVersion>0.0.3</PackageVersion>
28+
<Version>0.0.4</Version>
29+
<PackageVersion>0.0.4</PackageVersion>
3030

3131
</PropertyGroup>
3232
<PropertyGroup Condition="'$(GITHUB_ACTIONS)' == 'true'">

src/ManagedCode.GraphRag/Community/CommunityBuilder.cs

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -181,12 +181,9 @@ private static List<List<string>> BuildUsingConnectedComponents(
181181
.OrderBy(_ => random.Next())
182182
.ToList();
183183

184-
foreach (var neighbor in orderedNeighbors)
184+
foreach (var neighbor in orderedNeighbors.Where(visited.Add))
185185
{
186-
if (visited.Add(neighbor))
187-
{
188-
queue.Enqueue(neighbor);
189-
}
186+
queue.Enqueue(neighbor);
190187
}
191188
}
192189

src/ManagedCode.GraphRag/Config/HeuristicMaintenanceConfig.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
namespace GraphRag.Config;
22

33
/// <summary>
4-
/// Represents heuristic controls that fine-tune ingestion and graph maintenance behaviour.
4+
/// Represents heuristic controls that fine-tune ingestion and graph maintenance behavior.
55
/// The defaults mirror the semantics implemented in the GraphRag.Net demo service where
66
/// ingestion aggressively deduplicates, trims by token budgets, and repairs sparse graphs.
77
/// </summary>

src/ManagedCode.GraphRag/Indexing/Heuristics/GraphExtractionHeuristics.cs

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -229,12 +229,9 @@ public void Add(RelationshipSeed seed)
229229
_count++;
230230
_description = SelectDescription(_description, seed.Description);
231231

232-
foreach (var textUnit in seed.TextUnitIds)
232+
foreach (var textUnit in seed.TextUnitIds.Where(static id => !string.IsNullOrWhiteSpace(id)))
233233
{
234-
if (!string.IsNullOrWhiteSpace(textUnit))
235-
{
236-
_textUnits.Add(textUnit);
237-
}
234+
_textUnits.Add(textUnit);
238235
}
239236
}
240237

@@ -279,6 +276,7 @@ public RelationshipSeed ToSeed(HeuristicMaintenanceConfig heuristics)
279276
return incoming;
280277
}
281278

279+
// Prefer shorter descriptions to keep summaries concise and token efficient.
282280
return incoming.Length < existing.Length ? incoming : existing;
283281
}
284282
}

src/ManagedCode.GraphRag/Indexing/Heuristics/TextUnitHeuristicProcessor.cs

Lines changed: 13 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,10 @@ public static async Task<IReadOnlyList<TextUnitRecord>> ApplyAsync(
4444
{
4545
return await DeduplicateAsync(filtered, generator, heuristics, cancellationToken).ConfigureAwait(false);
4646
}
47+
catch (OperationCanceledException)
48+
{
49+
throw;
50+
}
4751
catch (Exception ex)
4852
{
4953
logger?.LogWarning(ex, "Failed to execute semantic deduplication heuristics. Retaining filtered text units only.");
@@ -198,17 +202,9 @@ private static double CosineSimilarity(IReadOnlyList<float> left, IReadOnlyList<
198202
private static List<string> DeduplicatePreservingOrder(IEnumerable<string> source)
199203
{
200204
var seen = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
201-
var result = new List<string>();
202-
203-
foreach (var item in source)
204-
{
205-
if (seen.Add(item))
206-
{
207-
result.Add(item);
208-
}
209-
}
210-
211-
return result;
205+
return source
206+
.Where(seen.Add)
207+
.ToList();
212208
}
213209

214210
private static IEmbeddingGenerator<string, Embedding<float>>? ResolveEmbeddingGenerator(
@@ -248,7 +244,8 @@ private sealed class DeduplicationCluster(TextUnitRecord record, float[] vector)
248244
public void Update(TextUnitRecord incoming)
249245
{
250246
var mergedDocuments = MergeLists(Record.DocumentIds, incoming.DocumentIds);
251-
var tokenCount = Math.Min(Record.TokenCount, incoming.TokenCount);
247+
// Sum token counts so merged records reflect their combined budget.
248+
var tokenCount = (int)Math.Min((long)Record.TokenCount + incoming.TokenCount, int.MaxValue);
252249

253250
Record = Record with
254251
{
@@ -259,26 +256,11 @@ public void Update(TextUnitRecord incoming)
259256

260257
private static string[] MergeLists(IReadOnlyList<string> first, IReadOnlyList<string> second)
261258
{
262-
var merged = new List<string>(first.Count + second.Count);
263259
var seen = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
264-
265-
foreach (var item in first)
266-
{
267-
if (seen.Add(item))
268-
{
269-
merged.Add(item);
270-
}
271-
}
272-
273-
foreach (var item in second)
274-
{
275-
if (seen.Add(item))
276-
{
277-
merged.Add(item);
278-
}
279-
}
280-
281-
return merged.ToArray();
260+
return first
261+
.Concat(second)
262+
.Where(seen.Add)
263+
.ToArray();
282264
}
283265
}
284266
}

src/ManagedCode.GraphRag/Indexing/Workflows/CreateBaseTextUnitsWorkflow.cs

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,8 @@ public static WorkflowDelegate Create()
2424

2525
var textUnits = new List<TextUnitRecord>();
2626
var callbacks = context.Callbacks;
27-
var chunkingConfig = config.Chunks;
2827
var heuristicConfig = config.Heuristics ?? new HeuristicMaintenanceConfig();
28+
var chunkingConfig = CloneChunkingConfig(config.Chunks);
2929
if (heuristicConfig.MinimumChunkOverlap > 0 && chunkingConfig.Overlap < heuristicConfig.MinimumChunkOverlap)
3030
{
3131
chunkingConfig.Overlap = heuristicConfig.MinimumChunkOverlap;
@@ -102,6 +102,22 @@ public static WorkflowDelegate Create()
102102
return builder.ToString();
103103
}
104104

105+
private static ChunkingConfig CloneChunkingConfig(ChunkingConfig source)
106+
{
107+
return new ChunkingConfig
108+
{
109+
Size = source.Size,
110+
Overlap = source.Overlap,
111+
EncodingModel = source.EncodingModel,
112+
Strategy = source.Strategy,
113+
PrependMetadata = source.PrependMetadata,
114+
ChunkSizeIncludesMetadata = source.ChunkSizeIncludesMetadata,
115+
GroupByColumns = source.GroupByColumns is { Count: > 0 }
116+
? new List<string>(source.GroupByColumns)
117+
: new List<string>()
118+
};
119+
}
120+
105121
private static ChunkingConfig CreateEffectiveConfig(ChunkingConfig original, int metadataTokens)
106122
{
107123
if (!original.ChunkSizeIncludesMetadata || metadataTokens == 0)

tests/ManagedCode.GraphRag.Tests/Integration/HeuristicMaintenanceIntegrationTests.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,7 @@ public async Task HeuristicMaintenanceWorkflow_AppliesBudgetsAndSemanticDeduplic
126126
Assert.Equal(2, merged.DocumentIds.Count);
127127
Assert.Contains("doc-1", merged.DocumentIds, StringComparer.OrdinalIgnoreCase);
128128
Assert.Contains("doc-2", merged.DocumentIds, StringComparer.OrdinalIgnoreCase);
129-
Assert.Equal(35, merged.TokenCount);
129+
Assert.Equal(75, merged.TokenCount);
130130

131131
var survivor = Assert.Single(processed, unit => unit.Id == "b");
132132
Assert.Single(survivor.DocumentIds);

0 commit comments

Comments
 (0)