diff --git a/packages/core/src/common.ts b/packages/core/src/common.ts index b855d7ab9f..addef8f66e 100644 --- a/packages/core/src/common.ts +++ b/packages/core/src/common.ts @@ -561,6 +561,17 @@ export const ifMidsceneLocatorField = (field: any): boolean => { return false; }; +const formatPromptWithImages = ( + promptObj: Exclude, +): string => { + let promptString = promptObj.prompt; + if (Array.isArray(promptObj.images) && promptObj.images.length > 0) { + const imageCount = promptObj.images.length; + promptString += ` (with ${imageCount} image${imageCount > 1 ? 's' : ''})`; + } + return promptString; +}; + export const dumpMidsceneLocatorField = (field: any): string => { assert( ifMidsceneLocatorField(field), @@ -580,7 +591,7 @@ export const dumpMidsceneLocatorField = (field: any): string => { } // If prompt is a TUserPrompt object, extract the prompt string if (typeof field.prompt === 'object' && field.prompt.prompt) { - return field.prompt.prompt; // TODO: dump images if necessary + return formatPromptWithImages(field.prompt); } } @@ -648,7 +659,7 @@ export const dumpActionParam = ( fieldValue.prompt.prompt ) { // If prompt is a TUserPrompt object, extract the prompt string - result[fieldName] = fieldValue.prompt.prompt; + result[fieldName] = formatPromptWithImages(fieldValue.prompt); } } } diff --git a/packages/core/tests/unit-test/utils.test.ts b/packages/core/tests/unit-test/utils.test.ts index 8b87f49660..76a4bf26f6 100644 --- a/packages/core/tests/unit-test/utils.test.ts +++ b/packages/core/tests/unit-test/utils.test.ts @@ -713,6 +713,72 @@ describe('dumpActionParam', () => { `); }); + it('should format locator fields that have image arrays', () => { + const schema = z.object({ + locator: getMidsceneLocationSchema(), + }); + + const inputWithImages = { + locator: { + midscene_location_field_flag: true, + prompt: { + prompt: 'find the button', + images: [ + { name: 'button1.png', url: 'data:image/png;base64,xyz' }, + { name: 'button2.png', url: 'https://example.com/img.png' }, + ], + }, + center: [100, 200], + rect: { left: 50, top: 100, width: 100, height: 50 }, + }, + }; + + const resultWithImages = dumpActionParam(inputWithImages, schema); + expect(resultWithImages).toMatchInlineSnapshot(` + { + "locator": "find the button (with 2 images)", + } + `); + + const inputWithOneImage = { + locator: { + midscene_location_field_flag: true, + prompt: { + prompt: 'find the text', + images: [{ name: 'text.png', url: 'data:image/png;base64,abc' }], + }, + center: [100, 200], + rect: { left: 50, top: 100, width: 100, height: 50 }, + }, + }; + + const resultWithOneImage = dumpActionParam(inputWithOneImage, schema); + expect(resultWithOneImage).toMatchInlineSnapshot(` + { + "locator": "find the text (with 1 image)", + } + `); + + const inputWithEmptyImages = { + locator: { + midscene_location_field_flag: true, + prompt: { + prompt: 'find the link', + images: [], + }, + center: [100, 200], + rect: { left: 50, top: 100, width: 100, height: 50 }, + }, + }; + + const resultWithEmptyImages = dumpActionParam(inputWithEmptyImages, schema); + expect(resultWithEmptyImages).toMatchInlineSnapshot(` + { + "locator": "find the link", + } + `); + }); + it('should handle edge cases and invalid inputs', () => { const schema = z.object({ foo: z.string(),