f: release notes and cleanup

Piotr Stachaczynski · Piotr Stachaczynski · commit 369a4ed93a7a · 2026-05-26T13:51:50.000+02:00
diff --git a/Releases/0.10.9.md b/Releases/0.10.9.md
@@ -0,0 +1,5 @@
+# 0.10.9 release
+
+- Expands model catalog with latest Claude (Opus 4.7, Sonnet 4.6, Haiku 4.5), OpenAI (GPT-4.1, GPT-5 family, o3, o4-mini, Codex Mini), Gemini (3.x series), Groq (Llama 4, Qwen3, Compound), and Ollama (Llama 4, Gemma 3/4, Qwen3/3.5/3.6/Coder, DeepSeek R1, Phi4, Mistral) models. 
+- Adds configurable Ollama base URL (`MaIN__OllamaBaseUrl`) for containerized deployments. 
+- Adds InferPage Docker images published to GHCR with CPU, CUDA, and bundled-Ollama targets.
diff --git a/src/MaIN.Services/Services/LLMService/Memory/Embeddings/LLamaEmbedderMaINClone.cs b/src/MaIN.Services/Services/LLMService/Memory/Embeddings/LLamaEmbedderMaINClone.cs
@@ -72,17 +72,14 @@ public async Task<IReadOnlyList<float[]>> GetEmbeddings(string input, Cancellati
 
     private async Task<(IReadOnlyList<float[]> Embeddings, int Tokens)> GetEmbeddingsWithTokenCount(string input, CancellationToken cancellationToken = default)
     {
-        // Create a fresh context for each embedding call (0.26.0 pattern)
         using var context = _weights.CreateContext(_params, _logger);
-        //NativeApi.em.llama_set_embeddings(context.NativeHandle, true);
 
         var tokens = context.Tokenize(input, special: true);
         if (tokens.Length > context.ContextSize)
             throw new ArgumentException($"Embedding prompt is longer than the context window ({tokens.Length} > {context.ContextSize})", nameof(input));
 
         cancellationToken.ThrowIfCancellationRequested();
 
-        // Evaluate prompt in batch-size chunks
         var n_past = 0;
         var batch = new LLamaBatch();
         var batchSize = (int)context.Params.BatchSize;
@@ -120,13 +117,11 @@ public async Task<IReadOnlyList<float[]>> GetEmbeddings(string input, Cancellati
             }
         }
 
-        // Extract results
         var poolingType = context.NativeHandle.PoolingType;
         var resultsCount = poolingType == LLamaPoolingType.None ? tokens.Length : 1;
         var results = new List<float[]>(resultsCount);
         results.Add(context.NativeHandle.GetEmbeddingsSeq(LLamaSeqId.Zero).ToArray());
 
-        // Normalize the embeddings vector
         foreach (var embedding in results)
         {
             embedding.EuclideanNormalization();