Skip to content

Commit edfd1cb

Browse files
committed
middleware: tighten image enrichment loops
1 parent bf2c540 commit edfd1cb

1 file changed

Lines changed: 77 additions & 9 deletions

File tree

src/MarkItDown/Conversion/Middleware/AiImageEnrichmentMiddleware.cs

Lines changed: 77 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
using System;
2+
using System.Buffers;
23
using System.Collections.Generic;
34
using System.Globalization;
4-
using System.Linq;
5+
using System.Runtime.InteropServices;
56
using System.Text;
67
using System.Text.Json;
78
using System.Threading;
@@ -35,13 +36,13 @@ public async Task InvokeAsync(ConversionPipelineContext context, CancellationTok
3536
return;
3637
}
3738

38-
foreach (var image in context.Artifacts.Images)
39+
async Task ProcessImageAsync(ImageArtifact image)
3940
{
4041
cancellationToken.ThrowIfCancellationRequested();
4142

4243
if (image.DetailedDescription is not null)
4344
{
44-
continue;
45+
return;
4546
}
4647

4748
var prompt = BuildPrompt(context.StreamInfo, image);
@@ -61,12 +62,12 @@ public async Task InvokeAsync(ConversionPipelineContext context, CancellationTok
6162
catch (Exception ex)
6263
{
6364
context.Logger?.LogWarning(ex, "Image enrichment failed for {Label}", image.Label ?? image.PageNumber?.ToString(CultureInfo.InvariantCulture));
64-
continue;
65+
return;
6566
}
6667

6768
if (response is null)
6869
{
69-
continue;
70+
return;
7071
}
7172

7273
ImageInsight? insight = null;
@@ -90,7 +91,7 @@ public async Task InvokeAsync(ConversionPipelineContext context, CancellationTok
9091

9192
if (string.IsNullOrWhiteSpace(markdown))
9293
{
93-
continue;
94+
return;
9495
}
9596

9697
image.DetailedDescription = markdown.Trim();
@@ -102,6 +103,21 @@ public async Task InvokeAsync(ConversionPipelineContext context, CancellationTok
102103

103104
image.Metadata["detailedDescription"] = image.DetailedDescription;
104105
}
106+
107+
if (context.Artifacts.Images is List<ImageArtifact> list)
108+
{
109+
for (var i = 0; i < list.Count; i++)
110+
{
111+
await ProcessImageAsync(list[i]).ConfigureAwait(false);
112+
}
113+
}
114+
else
115+
{
116+
foreach (var image in context.Artifacts.Images)
117+
{
118+
await ProcessImageAsync(image).ConfigureAwait(false);
119+
}
120+
}
105121
}
106122

107123
private static string BuildPrompt(StreamInfo streamInfo, ImageArtifact image)
@@ -127,11 +143,34 @@ private static string BuildPrompt(StreamInfo streamInfo, ImageArtifact image)
127143
builder.AppendLine($"- MimeType: {image.ContentType ?? "unknown"}");
128144
builder.AppendLine();
129145

130-
var base64 = Convert.ToBase64String(image.Data);
131146
builder.Append("ImagePayload: data:");
132147
builder.Append(image.ContentType ?? "application/octet-stream");
133148
builder.Append(";base64,");
134-
builder.Append(base64);
149+
150+
var base64Length = checked(((image.Data.Length + 2) / 3) * 4);
151+
char[]? rented = null;
152+
Span<char> buffer = base64Length <= 4096
153+
? stackalloc char[base64Length]
154+
: (rented = ArrayPool<char>.Shared.Rent(base64Length));
155+
156+
try
157+
{
158+
if (Convert.TryToBase64Chars(image.Data, buffer, out var charsWritten))
159+
{
160+
builder.Append(buffer[..charsWritten]);
161+
}
162+
else
163+
{
164+
builder.Append(Convert.ToBase64String(image.Data));
165+
}
166+
}
167+
finally
168+
{
169+
if (rented is not null)
170+
{
171+
ArrayPool<char>.Shared.Return(rented);
172+
}
173+
}
135174

136175
return builder.ToString();
137176
}
@@ -300,10 +339,39 @@ public string ToMarkdown()
300339

301340
if (KeyFindings.Count > 0)
302341
{
342+
var originalLength = builder.Length;
343+
var anyFinding = false;
303344
builder.AppendLine().AppendLine("Key findings:");
304-
foreach (var finding in KeyFindings.Where(static f => !string.IsNullOrWhiteSpace(f)))
345+
346+
void AppendFinding(string? finding)
305347
{
348+
if (string.IsNullOrWhiteSpace(finding))
349+
{
350+
return;
351+
}
352+
306353
builder.Append("- ").AppendLine(finding.Trim());
354+
anyFinding = true;
355+
}
356+
357+
if (KeyFindings is List<string> list)
358+
{
359+
foreach (var finding in CollectionsMarshal.AsSpan(list))
360+
{
361+
AppendFinding(finding);
362+
}
363+
}
364+
else
365+
{
366+
foreach (var finding in KeyFindings)
367+
{
368+
AppendFinding(finding);
369+
}
370+
}
371+
372+
if (!anyFinding)
373+
{
374+
builder.Length = originalLength;
307375
}
308376
}
309377

0 commit comments

Comments
 (0)