Skip to content

Commit 876d654

Browse files
authored
Merge pull request #185 from WolframResearch/feature/optimized-wa-image-extraction
Fetch WolframAlpha images in parallel via URLSubmit
2 parents 17f4043 + 7208667 commit 876d654

3 files changed

Lines changed: 83 additions & 51 deletions

File tree

.cspell.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@
6767
"Unparseable",
6868
"USERBASE",
6969
"USERPROFILE",
70+
"webp",
7071
"wolframalpha",
7172
"wolframengine",
7273
"WOLFRAMINIT",

Kernel/StartMCPServer.wl

Lines changed: 68 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,13 @@ Needs[ "Wolfram`Chatbook`" -> "cb`" ];
1212
(* ::**************************************************************************************************************:: *)
1313
(* ::Section::Closed:: *)
1414
(*Configuration*)
15-
$protocolVersion = "2024-11-05";
16-
$toolWarmupDelay = 5; (* seconds *)
17-
$clientName = None;
18-
$clientSupportsUI = False;
19-
$currentMCPServer = None;
20-
$mcpEvaluation = False;
15+
$protocolVersion = "2024-11-05";
16+
$toolWarmupDelay = 5; (* seconds *)
17+
$waImageFetchTimeout = 5; (* seconds, applied to the whole WA image batch via TaskWait *)
18+
$clientName = None;
19+
$clientSupportsUI = False;
20+
$currentMCPServer = None;
21+
$mcpEvaluation = False;
2122

2223
$logTimeStamp := DateString[
2324
{
@@ -726,6 +727,28 @@ graphicsToImageContent[ g_ ] := Enclose[
726727

727728
graphicsToImageContent // endDefinition;
728729

730+
(* ::**************************************************************************************************************:: *)
731+
(* ::Subsubsection::Closed:: *)
732+
(*makeImageContent*)
733+
makeImageContent // beginDefinition;
734+
735+
makeImageContent[
736+
URL[ url_String ],
737+
KeyValuePattern @ {
738+
"StatusCode" -> 200,
739+
"BodyByteArray" -> bytes_ByteArray,
740+
"Headers" -> KeyValuePattern[ "content-type" -> type_String ? (StringStartsQ[ "image/" ]) ]
741+
}
742+
] := {
743+
<| "type" -> "text" , "text" -> "![Image](" <> url <> ")" |>,
744+
<| "type" -> "image", "data" -> BaseEncode @ bytes, "mimeType" -> type |>
745+
};
746+
747+
makeImageContent[ URL[ url_String ], _ ] :=
748+
{ <| "type" -> "text", "text" -> "![Image](" <> url <> ")" |> };
749+
750+
makeImageContent // endDefinition;
751+
729752
(* ::**************************************************************************************************************:: *)
730753
(* ::Subsubsection::Closed:: *)
731754
(*extractWolframAlphaImages*)
@@ -734,7 +757,7 @@ graphicsToImageContent // endDefinition;
734757
(* Matches: public6.wolframalpha.com, www6.wolframalpha.com, etc. *)
735758
$$waImageURLPattern = Shortest[
736759
"![" ~~ Except[ "]" ]... ~~ "](" ~~
737-
url: ("https://" ~~ __ ~~ "wolframalpha.com/files/" ~~ __ ~~ (".gif" | ".png" | ".jpg" | ".jpeg")) ~~
760+
url: ("https://" ~~ __ ~~ "wolframalpha.com/files/" ~~ __ ~~ (".gif"|".png"|".jpg"|".jpeg"|".webp"|".svg")) ~~
738761
")"
739762
];
740763

@@ -744,44 +767,50 @@ extractWolframAlphaImages // beginDefinition;
744767
extractWolframAlphaImages[ str_String ] /; ! $mcpEvaluation := str;
745768

746769
extractWolframAlphaImages[ str_String ] := Enclose[
747-
Catch @ Module[ { parts, hasImages, contentItems },
770+
Catch @ Module[ { parts, urls, fetched, tasks, replaced, contentItems },
771+
772+
(* Split string into text segments and URL[..] tokens *)
773+
parts = StringSplit[ str, $$waImageURLPattern :> URL[ url ] ];
774+
urls = Cases[ parts, _URL ];
775+
776+
(* If no images found, return plain text for backward compatibility *)
777+
If[ urls === { }, Throw @ str ];
778+
779+
(* Pre-fill every URL with a text-only fallback so a timeout still yields the markdown link *)
780+
fetched = AssociationMap[ <| "type" -> "text", "text" -> "![Image](" <> First @ # <> ")" |> &, urls ];
781+
782+
(* Submit all URLs concurrently; each handler overwrites its slot in `fetched` on success.
783+
The outer Function captures the URL in a closure so each handler knows its own key. *)
784+
tasks = Function[ u,
785+
URLSubmit[
786+
u,
787+
HandlerFunctions -> <| "BodyReceived" -> Function[ fetched[ u ] = makeImageContent[ u, # ] ] |>,
788+
HandlerFunctionsKeys -> { "StatusCode", "BodyByteArray", "Headers" }
789+
]
790+
] /@ urls;
748791

749-
(* Split string into text segments and URLs *)
750-
parts = StringSplit[ str, $$waImageURLPattern :> url ];
792+
(* Bound the whole batch, not each request *)
793+
TaskWait[ tasks, TimeConstraint -> $waImageFetchTimeout ];
794+
Quiet[ TaskRemove /@ tasks ];
751795

752-
(* If no images found, return plain text *)
753-
If[ Length @ parts === 1 && StringQ @ First @ parts,
754-
Throw @ str (* Return plain string for backward compatibility *)
796+
replaced = Flatten @ Replace[
797+
parts,
798+
{
799+
"" :> Nothing,
800+
s_String :> <| "type" -> "text", "text" -> s |>,
801+
u_URL :> fetched[ u ]
802+
},
803+
{ 1 }
755804
];
756805

757-
hasImages = False;
758-
contentItems = Flatten @ Map[
759-
Function[ item,
760-
If[ StringQ @ item && ! StringStartsQ[ item, "https://" ],
761-
(* Text segment: create text content *)
762-
If[ StringLength @ item > 0,
763-
{ <| "type" -> "text", "text" -> item |> },
764-
{ }
765-
],
766-
(* URL: import image and create both text + image content *)
767-
hasImages = True;
768-
Module[ { img, imageContent },
769-
img = Quiet @ TimeConstrained[ Import[ item, "Image" ], 5, $Failed ];
770-
imageContent = If[ ImageQ @ img, graphicsToImageContent @ img, $Failed ];
771-
Flatten @ {
772-
(* Always include the markdown link as text *)
773-
<| "type" -> "text", "text" -> "![Image](" <> item <> ")" |>,
774-
(* Add base64 image if import succeeded *)
775-
If[ AssociationQ @ imageContent, imageContent, Nothing ]
776-
}
777-
]
778-
]
779-
],
780-
parts
806+
(* Merge runs of adjacent text items into one *)
807+
contentItems = SequenceReplace[
808+
replaced,
809+
{ as: KeyValuePattern[ "type" -> "text" ].. } :>
810+
<| "type" -> "text", "text" -> StringJoin @ Lookup[ { as }, "text" ] |>
781811
];
782812

783-
(* If we successfully extracted images, return structured content *)
784-
If[ TrueQ @ hasImages && MatchQ[ contentItems, { __Association } ],
813+
If[ MatchQ[ contentItems, { __Association } ],
785814
<| "Content" -> contentItems |>,
786815
str (* Fallback to plain string *)
787816
]

Tests/Graphics.wlt

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -283,14 +283,16 @@ VerificationTest[
283283
]
284284

285285
VerificationTest[
286-
With[
287-
{ result = extractWolframAlphaImages @
288-
"Before ![Image](https://public6.wolframalpha.com/files/test.jpg) After" },
289-
(* Should have at least text content items *)
290-
Length @ result[ "Content" ] >= 2
286+
With[ { result = extractWolframAlphaImages[ "Before ![Image](https://public6.wolframalpha.com/files/test.jpg) After" ] },
287+
And[
288+
AssociationQ @ result,
289+
MatchQ[ result, KeyValuePattern[ "Content" -> { __Association } ] ],
290+
MemberQ[ result[ "Content" ], KeyValuePattern @ { "type" -> "text", "text" -> _? (StringContainsQ[ "Before" ]) } ],
291+
MemberQ[ result[ "Content" ], KeyValuePattern @ { "type" -> "text", "text" -> _? (StringContainsQ[ "After" ]) } ]
292+
]
291293
],
292294
True,
293-
TestID -> "extractWolframAlphaImages-MultipleContentItems@@Tests/Graphics.wlt:285,1-294,2"
295+
TestID -> "extractWolframAlphaImages-MultipleContentItems@@Tests/Graphics.wlt:285,1-296,2"
294296
]
295297

296298
(* ::**************************************************************************************************************:: *)
@@ -306,7 +308,7 @@ VerificationTest[
306308
AssociationQ @ result || StringContainsQ[ result, "wolframalpha.com" ]
307309
],
308310
True,
309-
TestID -> "extractWolframAlphaImages-WWW6Domain@@Tests/Graphics.wlt:301,1-310,2"
311+
TestID -> "extractWolframAlphaImages-WWW6Domain@@Tests/Graphics.wlt:303,1-312,2"
310312
]
311313

312314
VerificationTest[
@@ -317,15 +319,15 @@ VerificationTest[
317319
AssociationQ @ result || StringContainsQ[ result, "wolframalpha.com" ]
318320
],
319321
True,
320-
TestID -> "extractWolframAlphaImages-JpegExtension@@Tests/Graphics.wlt:312,1-321,2"
322+
TestID -> "extractWolframAlphaImages-JpegExtension@@Tests/Graphics.wlt:314,1-323,2"
321323
]
322324

323325
VerificationTest[
324326
extractWolframAlphaImages @
325327
"![Result](https://example.com/files/image.png)",
326328
_String,
327329
SameTest -> MatchQ,
328-
TestID -> "extractWolframAlphaImages-NonWADomain@@Tests/Graphics.wlt:323,1-329,2"
330+
TestID -> "extractWolframAlphaImages-NonWADomain@@Tests/Graphics.wlt:325,1-331,2"
329331
]
330332

331333
(* ::**************************************************************************************************************:: *)
@@ -338,7 +340,7 @@ VerificationTest[
338340
MemberQ[ result[ "Content" ], KeyValuePattern[ { "type" -> "text", "text" -> _? (StringContainsQ[ "Before" ]) } ] ]
339341
],
340342
True,
341-
TestID -> "extractWolframAlphaImages-PreservesTextBefore@@Tests/Graphics.wlt:334,1-342,2"
343+
TestID -> "extractWolframAlphaImages-PreservesTextBefore@@Tests/Graphics.wlt:336,1-344,2"
342344
]
343345

344346
VerificationTest[
@@ -348,7 +350,7 @@ VerificationTest[
348350
MemberQ[ result[ "Content" ], KeyValuePattern[ { "type" -> "text", "text" -> _? (StringContainsQ[ "After" ]) } ] ]
349351
],
350352
True,
351-
TestID -> "extractWolframAlphaImages-PreservesTextAfter@@Tests/Graphics.wlt:344,1-352,2"
353+
TestID -> "extractWolframAlphaImages-PreservesTextAfter@@Tests/Graphics.wlt:346,1-354,2"
352354
]
353355

354356
VerificationTest[
@@ -359,7 +361,7 @@ VerificationTest[
359361
MemberQ[ result[ "Content" ], KeyValuePattern[ { "type" -> "text", "text" -> _? (StringContainsQ[ "wolframalpha.com" ]) } ] ]
360362
],
361363
True,
362-
TestID -> "extractWolframAlphaImages-PreservesURLInText@@Tests/Graphics.wlt:354,1-363,2"
364+
TestID -> "extractWolframAlphaImages-PreservesURLInText@@Tests/Graphics.wlt:356,1-365,2"
363365
]
364366

365367
(* :!CodeAnalysis::EndBlock:: *)

0 commit comments

Comments
 (0)