Skip to content

Commit 369a4ed

Browse files
author
Piotr Stachaczynski
committed
f: release notes and cleanup
1 parent 893e921 commit 369a4ed

2 files changed

Lines changed: 5 additions & 5 deletions

File tree

Releases/0.10.9.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
# 0.10.9 release
2+
3+
- Expands model catalog with latest Claude (Opus 4.7, Sonnet 4.6, Haiku 4.5), OpenAI (GPT-4.1, GPT-5 family, o3, o4-mini, Codex Mini), Gemini (3.x series), Groq (Llama 4, Qwen3, Compound), and Ollama (Llama 4, Gemma 3/4, Qwen3/3.5/3.6/Coder, DeepSeek R1, Phi4, Mistral) models.
4+
- Adds configurable Ollama base URL (`MaIN__OllamaBaseUrl`) for containerized deployments.
5+
- Adds InferPage Docker images published to GHCR with CPU, CUDA, and bundled-Ollama targets.

src/MaIN.Services/Services/LLMService/Memory/Embeddings/LLamaEmbedderMaINClone.cs

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -72,17 +72,14 @@ public async Task<IReadOnlyList<float[]>> GetEmbeddings(string input, Cancellati
7272

7373
private async Task<(IReadOnlyList<float[]> Embeddings, int Tokens)> GetEmbeddingsWithTokenCount(string input, CancellationToken cancellationToken = default)
7474
{
75-
// Create a fresh context for each embedding call (0.26.0 pattern)
7675
using var context = _weights.CreateContext(_params, _logger);
77-
//NativeApi.em.llama_set_embeddings(context.NativeHandle, true);
7876

7977
var tokens = context.Tokenize(input, special: true);
8078
if (tokens.Length > context.ContextSize)
8179
throw new ArgumentException($"Embedding prompt is longer than the context window ({tokens.Length} > {context.ContextSize})", nameof(input));
8280

8381
cancellationToken.ThrowIfCancellationRequested();
8482

85-
// Evaluate prompt in batch-size chunks
8683
var n_past = 0;
8784
var batch = new LLamaBatch();
8885
var batchSize = (int)context.Params.BatchSize;
@@ -120,13 +117,11 @@ public async Task<IReadOnlyList<float[]>> GetEmbeddings(string input, Cancellati
120117
}
121118
}
122119

123-
// Extract results
124120
var poolingType = context.NativeHandle.PoolingType;
125121
var resultsCount = poolingType == LLamaPoolingType.None ? tokens.Length : 1;
126122
var results = new List<float[]>(resultsCount);
127123
results.Add(context.NativeHandle.GetEmbeddingsSeq(LLamaSeqId.Zero).ToArray());
128124

129-
// Normalize the embeddings vector
130125
foreach (var embedding in results)
131126
{
132127
embedding.EuclideanNormalization();

0 commit comments

Comments
 (0)