Skip to content

Commit ecd1849

Browse files
authored
Merge pull request #1386 from zsogitbe/SequenceIDPooling
Implement Sequence ID pooling in BatchedExecutor to prevent native SeqMax overflow and crashes
2 parents 18b5c8d + 702adf1 commit ecd1849

2 files changed

Lines changed: 51 additions & 6 deletions

File tree

LLama/Batched/BatchedExecutor.cs

Lines changed: 48 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,54 @@ namespace LLama.Batched;
1414
public sealed class BatchedExecutor
1515
: IDisposable
1616
{
17-
private int _nextSequenceId;
17+
/// <summary>
18+
/// Tracks the sequence IDs currently in use by active conversations.
19+
/// This pool ensures that IDs are reused and never exceed the native backend's SeqMax allocation.
20+
/// </summary>
21+
private readonly HashSet<int> _activeSequenceIds = new();
22+
23+
/// <summary>
24+
/// Allocates the lowest available Sequence ID for a new conversation.
25+
/// </summary>
26+
/// <returns>A unique sequence ID bounded by the maximum number of concurrent active conversations.</returns>
27+
/// <exception cref="InvalidOperationException">Thrown if no sequence IDs can be allocated.</exception>
28+
internal LLamaSeqId GetNextSequenceId()
29+
{
30+
// LOCK REQUIRED: Prevent race conditions if multiple conversations are created simultaneously
31+
lock (_activeSequenceIds)
32+
{
33+
// Linearly search for the lowest available ID.
34+
// Because IDs are recycled when conversations are disposed, this will naturally
35+
// stay bounded below the host's maximum concurrency limit (SeqMax).
36+
for (int i = 0; i < int.MaxValue; i++)
37+
{
38+
if (!_activeSequenceIds.Contains(i))
39+
{
40+
_activeSequenceIds.Add(i);
41+
return (LLamaSeqId)i;
42+
}
43+
}
44+
}
45+
46+
// Fallback safety (practically unreachable unless int.MaxValue concurrent users are active)
47+
throw new InvalidOperationException("Failed to allocate a Sequence ID.");
48+
}
49+
50+
/// <summary>
51+
/// Returns a Sequence ID to the pool so it can be reused by future conversations.
52+
/// This should be called exactly once when a Conversation is being disposed.
53+
/// </summary>
54+
/// <param name="id">The sequence ID to release.</param>
55+
internal void ReleaseSequenceId(LLamaSeqId id)
56+
{
57+
// LOCK REQUIRED: Prevent race conditions against GetNextSequenceId
58+
lock (_activeSequenceIds)
59+
{
60+
// Remove the ID from the active set, making it available for the next GetNextSequenceId() call
61+
_activeSequenceIds.Remove((int)id);
62+
}
63+
}
64+
1865
private readonly List<IBatch> _batchQueue = [];
1966
private string? _mtmdMarker;
2067
private int _batchQueueHead;
@@ -244,11 +291,6 @@ public void Dispose()
244291

245292
Context.Dispose();
246293
}
247-
248-
internal LLamaSeqId GetNextSequenceId()
249-
{
250-
return checked((LLamaSeqId)_nextSequenceId++);
251-
}
252294

253295
/// <summary>
254296
/// Get a reference to a batch that tokens can be added to.

LLama/Batched/Conversation.cs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,9 @@ public void Dispose()
135135
// Remove this conversation from the KV cache
136136
Executor.Context.NativeHandle.MemorySequenceRemove(ConversationId, -1, -1);
137137

138+
// Release the ID back to the pool to be reused!
139+
Executor.ReleaseSequenceId(ConversationId);
140+
138141
// Prevent finalizer from running
139142
GC.SuppressFinalize(this);
140143
}

0 commit comments

Comments
 (0)