|
| 1 | +using System.Text; |
1 | 2 | using MaIN.Domain.Configuration; |
2 | 3 | using MaIN.Domain.Configuration.BackendInferenceParams; |
3 | 4 | using MaIN.Domain.Entities; |
| 5 | +using MaIN.Domain.Models; |
4 | 6 | using MaIN.Domain.Models.Concrete; |
5 | 7 | using MaIN.Services.Constants; |
6 | 8 | using MaIN.Services.Services.Abstract; |
7 | 9 | using MaIN.Services.Services.LLMService.Auth; |
8 | 10 | using MaIN.Services.Services.LLMService.Memory; |
9 | 11 | using MaIN.Services.Services.Models; |
| 12 | +using MaIN.Services.Utils; |
10 | 13 | using Microsoft.Extensions.Logging; |
11 | 14 |
|
12 | 15 | namespace MaIN.Services.Services.LLMService; |
@@ -97,16 +100,153 @@ protected override void ApplyBackendParams(Dictionary<string, object> requestBod |
97 | 100 | return await base.Send(chat, options, cancellationToken); |
98 | 101 | } |
99 | 102 |
|
100 | | - public new async Task<ChatResult?> AskMemory( |
| 103 | + /// <summary> |
| 104 | + /// Bypasses KernelMemory and sends files directly to Gemini via multimodal API. |
| 105 | + /// PDFs and images are sent inline (Gemini handles OCR natively), |
| 106 | + /// other formats are pre-processed to text via DocumentProcessor. |
| 107 | + /// </summary> |
| 108 | + public override async Task<ChatResult?> AskMemory( |
101 | 109 | Chat chat, |
102 | 110 | ChatMemoryOptions memoryOptions, |
103 | 111 | ChatRequestOptions requestOptions, |
104 | 112 | CancellationToken cancellationToken = default) |
105 | 113 | { |
106 | 114 | ExtractLocation(chat); |
107 | | - return await base.AskMemory(chat, memoryOptions, requestOptions, cancellationToken); |
| 115 | + |
| 116 | + if (!chat.Messages.Any()) |
| 117 | + return null; |
| 118 | + |
| 119 | + var lastMessage = chat.Messages.Last(); |
| 120 | + var originalContent = lastMessage.Content; |
| 121 | + var originalFiles = lastMessage.Files; |
| 122 | + var originalImages = lastMessage.Images; |
| 123 | + |
| 124 | + try |
| 125 | + { |
| 126 | + var inlineBytes = new List<byte[]>(); |
| 127 | + var textContext = new StringBuilder(); |
| 128 | + |
| 129 | + CollectTextData(memoryOptions, textContext); |
| 130 | + await CollectFilesData(memoryOptions, inlineBytes, textContext, cancellationToken); |
| 131 | + await CollectStreamData(memoryOptions, inlineBytes, textContext, cancellationToken); |
| 132 | + CollectMemoryItems(memoryOptions, textContext); |
| 133 | + |
| 134 | + var queryBuilder = new StringBuilder(); |
| 135 | + if (textContext.Length > 0) |
| 136 | + { |
| 137 | + queryBuilder.AppendLine("Use the following document content to answer the question:\n"); |
| 138 | + queryBuilder.Append(textContext); |
| 139 | + queryBuilder.AppendLine(); |
| 140 | + } |
| 141 | + queryBuilder.Append(originalContent); |
| 142 | + |
| 143 | + if (chat.MemoryParams.Grammar != null) |
| 144 | + { |
| 145 | + var jsonGrammar = new GrammarToJsonConverter().ConvertToJson(chat.MemoryParams.Grammar); |
| 146 | + queryBuilder.Append( |
| 147 | + $" | For your next response only, please respond using exactly the following JSON format: \n{jsonGrammar}\n. Do not include any explanations, code blocks, or additional content. After this single JSON response, resume your normal conversational style."); |
| 148 | + } |
| 149 | + |
| 150 | + lastMessage.Content = queryBuilder.ToString(); |
| 151 | + lastMessage.Files = null; |
| 152 | + |
| 153 | + // Merge existing images with inline file bytes (PDFs sent as native multimodal content) |
| 154 | + var allInline = new List<byte[]>(originalImages ?? []); |
| 155 | + allInline.AddRange(inlineBytes); |
| 156 | + lastMessage.Images = allInline.Count > 0 ? allInline : null; |
| 157 | + |
| 158 | + return await Send(chat, requestOptions, cancellationToken); |
| 159 | + } |
| 160 | + finally |
| 161 | + { |
| 162 | + lastMessage.Content = originalContent; |
| 163 | + lastMessage.Files = originalFiles; |
| 164 | + lastMessage.Images = originalImages; |
| 165 | + } |
| 166 | + } |
| 167 | + |
| 168 | + #region Multimodal File Processing |
| 169 | + |
| 170 | + private static readonly HashSet<string> GeminiNativeExtensions = |
| 171 | + [".pdf", ".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp", ".tiff", ".tif", ".heic", ".heif", ".avif"]; |
| 172 | + |
| 173 | + private static bool IsGeminiNativeFile(string fileName) |
| 174 | + => GeminiNativeExtensions.Contains(Path.GetExtension(fileName).ToLowerInvariant()); |
| 175 | + |
| 176 | + private static void CollectTextData(ChatMemoryOptions options, StringBuilder textContext) |
| 177 | + { |
| 178 | + foreach (var (name, content) in options.TextData) |
| 179 | + { |
| 180 | + textContext.AppendLine($"[Document: {name}]"); |
| 181 | + textContext.AppendLine(content); |
| 182 | + textContext.AppendLine(); |
| 183 | + } |
108 | 184 | } |
109 | 185 |
|
| 186 | + private static async Task CollectFilesData( |
| 187 | + ChatMemoryOptions options, List<byte[]> inlineBytes, StringBuilder textContext, |
| 188 | + CancellationToken cancellationToken) |
| 189 | + { |
| 190 | + foreach (var (name, path) in options.FilesData) |
| 191 | + { |
| 192 | + if (IsGeminiNativeFile(name)) |
| 193 | + { |
| 194 | + inlineBytes.Add(await File.ReadAllBytesAsync(path, cancellationToken)); |
| 195 | + } |
| 196 | + else |
| 197 | + { |
| 198 | + textContext.AppendLine($"[Document: {name}]"); |
| 199 | + textContext.AppendLine(DocumentProcessor.ProcessDocument(path)); |
| 200 | + textContext.AppendLine(); |
| 201 | + } |
| 202 | + } |
| 203 | + } |
| 204 | + |
| 205 | + private static async Task CollectStreamData( |
| 206 | + ChatMemoryOptions options, List<byte[]> inlineBytes, StringBuilder textContext, |
| 207 | + CancellationToken cancellationToken) |
| 208 | + { |
| 209 | + foreach (var (name, stream) in options.StreamData) |
| 210 | + { |
| 211 | + using var ms = new MemoryStream(); |
| 212 | + if (stream.CanSeek) stream.Position = 0; |
| 213 | + await stream.CopyToAsync(ms, cancellationToken); |
| 214 | + var bytes = ms.ToArray(); |
| 215 | + |
| 216 | + if (IsGeminiNativeFile(name)) |
| 217 | + { |
| 218 | + inlineBytes.Add(bytes); |
| 219 | + } |
| 220 | + else |
| 221 | + { |
| 222 | + var tempPath = Path.Combine(Path.GetTempPath(), $"vertex_tmp_{Guid.NewGuid()}{Path.GetExtension(name)}"); |
| 223 | + try |
| 224 | + { |
| 225 | + await File.WriteAllBytesAsync(tempPath, bytes, cancellationToken); |
| 226 | + textContext.AppendLine($"[Document: {name}]"); |
| 227 | + textContext.AppendLine(DocumentProcessor.ProcessDocument(tempPath)); |
| 228 | + textContext.AppendLine(); |
| 229 | + } |
| 230 | + finally |
| 231 | + { |
| 232 | + if (File.Exists(tempPath)) File.Delete(tempPath); |
| 233 | + } |
| 234 | + } |
| 235 | + } |
| 236 | + } |
| 237 | + |
| 238 | + private static void CollectMemoryItems(ChatMemoryOptions options, StringBuilder textContext) |
| 239 | + { |
| 240 | + if (options.Memory is not { Count: > 0 }) return; |
| 241 | + foreach (var item in options.Memory) |
| 242 | + { |
| 243 | + textContext.AppendLine(item); |
| 244 | + textContext.AppendLine(); |
| 245 | + } |
| 246 | + } |
| 247 | + |
| 248 | + #endregion |
| 249 | + |
110 | 250 | private void ExtractLocation(Chat chat) |
111 | 251 | { |
112 | 252 | if (chat.BackendParams is VertexInferenceParams vp) |
|
0 commit comments