Skip to content

Commit 40a917c

Browse files
authored
feat(mistal-ocr): added file upload to mistal ocr tool in production (#218)
* added file selector for mistral OCR tool * updated twilio icon
1 parent 79f8f34 commit 40a917c

File tree

5 files changed

+461
-391
lines changed

5 files changed

+461
-391
lines changed

sim/app/w/[id]/components/workflow-block/components/sub-block/components/file-upload.tsx

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -542,25 +542,27 @@ export function FileUpload({
542542

543543
{/* Show upload button if no files and not uploading */}
544544
{!hasFiles && !isUploading && (
545-
<Button
546-
type="button"
547-
variant="outline"
548-
className="w-full justify-center text-center font-normal"
549-
onClick={handleOpenFileDialog}
550-
>
551-
<Upload className="mr-2 h-4 w-4" />
552-
{multiple ? 'Upload Files' : 'Upload File'}
545+
<div className="flex items-center">
546+
<Button
547+
type="button"
548+
variant="outline"
549+
className="w-full justify-center text-center font-normal"
550+
onClick={handleOpenFileDialog}
551+
>
552+
<Upload className="mr-2 h-4 w-4" />
553+
{multiple ? 'Upload Files' : 'Upload File'}
554+
</Button>
553555

554556
<Tooltip>
555-
<TooltipTrigger className="ml-1">
557+
<TooltipTrigger className="ml-2">
556558
<Info className="h-4 w-4 text-muted-foreground" />
557559
</TooltipTrigger>
558560
<TooltipContent>
559561
<p>Max file size: {maxSize}MB</p>
560562
{multiple && <p>You can select multiple files at once</p>}
561563
</TooltipContent>
562564
</Tooltip>
563-
</Button>
565+
</div>
564566
)}
565567
</div>
566568
)

sim/blocks/blocks/mistral-parse.ts

Lines changed: 102 additions & 104 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,73 @@
1-
import { MistralParserOutput } from '@/tools/mistral/parser'
2-
import { BlockConfig } from '../types'
31
import { MistralIcon } from '@/components/icons'
2+
import { MistralParserOutput } from '@/tools/mistral/types'
3+
import { BlockConfig, SubBlockConfig, SubBlockLayout, SubBlockType } from '../types'
4+
5+
const isProduction = process.env.NODE_ENV === 'production'
6+
const isS3Enabled = process.env.USE_S3 === 'true'
7+
const shouldEnableFileUpload = isProduction || isS3Enabled
8+
9+
// Define the input method selector block when needed
10+
const inputMethodBlock: SubBlockConfig = {
11+
id: 'inputMethod',
12+
title: 'Select Input Method',
13+
type: 'dropdown' as SubBlockType,
14+
layout: 'full' as SubBlockLayout,
15+
options: [
16+
{ id: 'url', label: 'PDF Document URL' },
17+
{ id: 'upload', label: 'Upload PDF Document' },
18+
],
19+
}
20+
21+
// Define the file upload block when needed
22+
const fileUploadBlock: SubBlockConfig = {
23+
id: 'fileUpload',
24+
title: 'Upload PDF',
25+
type: 'file-upload' as SubBlockType,
26+
layout: 'full' as SubBlockLayout,
27+
acceptedTypes: 'application/pdf',
28+
condition: {
29+
field: 'inputMethod',
30+
value: 'upload',
31+
},
32+
}
433

534
export const MistralParseBlock: BlockConfig<MistralParserOutput> = {
635
type: 'mistral_parse',
736
name: 'Mistral Parser',
837
description: 'Extract text from PDF documents',
938
longDescription:
10-
'Extract text and structure from PDF documents using Mistral\'s OCR API. Enter a URL to a PDF document (.pdf extension required), configure processing options, and get the content in your preferred format. The URL must be publicly accessible and point to a valid PDF file. Note: Google Drive, Dropbox, and other cloud storage links are not supported; use a direct download URL from a web server instead.',
39+
"Extract text and structure from PDF documents using Mistral's OCR API." +
40+
(shouldEnableFileUpload
41+
? ' Either enter a URL to a PDF document or upload a PDF file directly.'
42+
: ' Enter a URL to a PDF document (.pdf extension required).') +
43+
' Configure processing options and get the content in your preferred format. For URLs, they must be publicly accessible and point to a valid PDF file. Note: Google Drive, Dropbox, and other cloud storage links are not supported; use a direct download URL from a web server instead.',
1144
category: 'tools',
1245
bgColor: '#000000',
1346
icon: MistralIcon,
1447
subBlocks: [
48+
// Show input method selection only if file upload is available
49+
...(shouldEnableFileUpload ? [inputMethodBlock] : []),
50+
51+
// URL input - always shown, but conditional on inputMethod in production
1552
{
1653
id: 'filePath',
1754
title: 'PDF Document URL',
18-
type: 'short-input',
19-
layout: 'full',
55+
type: 'short-input' as SubBlockType,
56+
layout: 'full' as SubBlockLayout,
2057
placeholder: 'Enter full URL to a PDF document (https://example.com/document.pdf)',
58+
...(shouldEnableFileUpload
59+
? {
60+
condition: {
61+
field: 'inputMethod',
62+
value: 'url',
63+
},
64+
}
65+
: {}),
2166
},
67+
68+
// File upload option - only shown in production environments
69+
...(shouldEnableFileUpload ? [fileUploadBlock] : []),
70+
2271
{
2372
id: 'resultType',
2473
title: 'Output Format',
@@ -27,7 +76,7 @@ export const MistralParseBlock: BlockConfig<MistralParserOutput> = {
2776
options: [
2877
{ id: 'markdown', label: 'Markdown (Formatted)' },
2978
{ id: 'text', label: 'Plain Text' },
30-
{ id: 'json', label: 'JSON (Raw)' }
79+
{ id: 'json', label: 'JSON (Raw)' },
3180
],
3281
},
3382
{
@@ -65,8 +114,8 @@ export const MistralParseBlock: BlockConfig<MistralParserOutput> = {
65114
{
66115
id: 'apiKey',
67116
title: 'API Key',
68-
type: 'short-input',
69-
layout: 'full',
117+
type: 'short-input' as SubBlockType,
118+
layout: 'full' as SubBlockLayout,
70119
placeholder: 'Enter your Mistral API key',
71120
password: true,
72121
},
@@ -78,126 +127,75 @@ export const MistralParseBlock: BlockConfig<MistralParserOutput> = {
78127
params: (params) => {
79128
// Basic validation
80129
if (!params || !params.apiKey || params.apiKey.trim() === '') {
81-
throw new Error('Mistral API key is required');
130+
throw new Error('Mistral API key is required')
82131
}
83-
84-
if (!params || !params.filePath || params.filePath.trim() === '') {
85-
throw new Error('PDF Document URL is required');
132+
133+
// Build parameters object - file processing is now handled at the tool level
134+
const parameters: any = {
135+
apiKey: params.apiKey.trim(),
136+
resultType: params.resultType || 'markdown',
86137
}
87-
88-
// Validate URL format
89-
let validatedUrl;
90-
try {
91-
// Try to create a URL object to validate format
92-
validatedUrl = new URL(params.filePath.trim());
93-
94-
// Ensure URL is using HTTP or HTTPS protocol
95-
if (!['http:', 'https:'].includes(validatedUrl.protocol)) {
96-
throw new Error(`URL must use HTTP or HTTPS protocol. Found: ${validatedUrl.protocol}`);
97-
}
98-
99-
// Check for PDF extension and provide specific guidance
100-
const pathname = validatedUrl.pathname.toLowerCase();
101-
if (!pathname.endsWith('.pdf')) {
102-
if (!pathname.includes('pdf')) {
103-
throw new Error(
104-
'The URL does not appear to point to a PDF document. ' +
105-
'Please provide a URL that ends with .pdf extension. ' +
106-
'If your document is not a PDF, please convert it to PDF format first.'
107-
);
108-
} else {
109-
// PDF is in the name but not at the end, so give a warning but proceed
110-
console.warn(
111-
'Warning: URL contains "pdf" but does not end with .pdf extension. ' +
112-
'This might still work if the server returns a valid PDF document.'
113-
);
138+
139+
// Set filePath or fileUpload based on input method (or directly use filePath if no method selector)
140+
if (shouldEnableFileUpload) {
141+
const inputMethod = params.inputMethod || 'url'
142+
if (inputMethod === 'url') {
143+
if (!params.filePath || params.filePath.trim() === '') {
144+
throw new Error('PDF Document URL is required')
145+
}
146+
parameters.filePath = params.filePath.trim()
147+
} else if (inputMethod === 'upload') {
148+
if (!params.fileUpload) {
149+
throw new Error('Please upload a PDF document')
114150
}
151+
// Pass the entire fileUpload object to the tool
152+
parameters.fileUpload = params.fileUpload
153+
}
154+
} else {
155+
// In local development, only URL input is available
156+
if (!params.filePath || params.filePath.trim() === '') {
157+
throw new Error('PDF Document URL is required')
115158
}
116-
} catch (error) {
117-
const errorMessage = error instanceof Error ? error.message : String(error);
118-
throw new Error(`Invalid URL format: ${errorMessage}`);
159+
parameters.filePath = params.filePath.trim()
119160
}
120-
121-
// Process pages input (convert from comma-separated string to array of numbers)
122-
let pagesArray: number[] | undefined = undefined;
161+
162+
// Convert pages input from string to array of numbers if provided
163+
let pagesArray: number[] | undefined = undefined
123164
if (params.pages && params.pages.trim() !== '') {
124165
try {
125166
pagesArray = params.pages
126167
.split(',')
127168
.map((p: string) => p.trim())
128169
.filter((p: string) => p.length > 0)
129170
.map((p: string) => {
130-
const num = parseInt(p, 10);
171+
const num = parseInt(p, 10)
131172
if (isNaN(num) || num < 0) {
132-
throw new Error(`Invalid page number: ${p}`);
173+
throw new Error(`Invalid page number: ${p}`)
133174
}
134-
return num;
135-
});
136-
175+
return num
176+
})
177+
137178
if (pagesArray && pagesArray.length === 0) {
138-
pagesArray = undefined;
179+
pagesArray = undefined
139180
}
140181
} catch (error: any) {
141-
throw new Error(`Page number format error: ${error.message}`);
142-
}
143-
}
144-
145-
// Process numeric inputs
146-
let imageLimit: number | undefined = undefined;
147-
if (params.imageLimit && params.imageLimit.trim() !== '') {
148-
const limit = parseInt(params.imageLimit, 10);
149-
if (!isNaN(limit) && limit > 0) {
150-
imageLimit = limit;
151-
} else {
152-
throw new Error('Image limit must be a positive number');
153-
}
154-
}
155-
156-
let imageMinSize: number | undefined = undefined;
157-
if (params.imageMinSize && params.imageMinSize.trim() !== '') {
158-
const size = parseInt(params.imageMinSize, 10);
159-
if (!isNaN(size) && size > 0) {
160-
imageMinSize = size;
161-
} else {
162-
throw new Error('Minimum image size must be a positive number');
182+
throw new Error(`Page number format error: ${error.message}`)
163183
}
164184
}
165-
166-
// Return structured parameters for the tool
167-
const parameters: any = {
168-
filePath: validatedUrl.toString(),
169-
apiKey: params.apiKey.trim(),
170-
resultType: params.resultType || 'markdown',
171-
};
172-
173-
// Add optional parameters if they're defined
185+
186+
// Add optional parameters
174187
if (pagesArray && pagesArray.length > 0) {
175-
parameters.pages = pagesArray;
188+
parameters.pages = pagesArray
176189
}
177-
178-
/*
179-
* Image-related parameters - temporarily disabled
180-
* Uncomment if PDF image extraction is needed
181-
*
182-
if (typeof params.includeImageBase64 === 'boolean') {
183-
parameters.includeImageBase64 = params.includeImageBase64;
184-
}
185-
186-
if (imageLimit !== undefined) {
187-
parameters.imageLimit = imageLimit;
188-
}
189-
190-
if (imageMinSize !== undefined) {
191-
parameters.imageMinSize = imageMinSize;
192-
}
193-
*/
194-
195-
return parameters;
190+
191+
return parameters
196192
},
197193
},
198194
},
199195
inputs: {
200-
filePath: { type: 'string', required: true },
196+
inputMethod: { type: 'string', required: false },
197+
filePath: { type: 'string', required: !shouldEnableFileUpload },
198+
fileUpload: { type: 'json', required: false },
201199
apiKey: { type: 'string', required: true },
202200
resultType: { type: 'string', required: false },
203201
pages: { type: 'string', required: false },
@@ -214,4 +212,4 @@ export const MistralParseBlock: BlockConfig<MistralParserOutput> = {
214212
},
215213
},
216214
},
217-
}
215+
}

sim/components/icons.tsx

Lines changed: 5 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1736,20 +1736,11 @@ export function ConfluenceIcon(props: SVGProps<SVGSVGElement>) {
17361736

17371737
export function TwilioIcon(props: SVGProps<SVGSVGElement>) {
17381738
return (
1739-
<svg
1740-
{...props}
1741-
xmlns="http://www.w3.org/2000/svg"
1742-
width="24"
1743-
height="24"
1744-
viewBox="0 0 256 256"
1745-
fill="none"
1746-
aria-hidden="true"
1747-
>
1748-
<circle cx="128" cy="128" r="128" fill="none" stroke="white" strokeWidth="21" />
1749-
<circle cx="85" cy="85" r="21" fill="white" />
1750-
<circle cx="171" cy="85" r="21" fill="white" />
1751-
<circle cx="85" cy="171" r="21" fill="white" />
1752-
<circle cx="171" cy="171" r="21" fill="white" />
1739+
<svg {...props} xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 256">
1740+
<path
1741+
fill="currentColor"
1742+
d="M128 0c70.656 0 128 57.344 128 128s-57.344 128-128 128S0 198.656 0 128 57.344 0 128 0zm0 33.792c-52.224 0-94.208 41.984-94.208 94.208S75.776 222.208 128 222.208s94.208-41.984 94.208-94.208S180.224 33.792 128 33.792zm31.744 99.328c14.704 0 26.624 11.92 26.624 26.624 0 14.704-11.92 26.624-26.624 26.624-14.704 0-26.624-11.92-26.624-26.624 0-14.704 11.92-26.624 26.624-26.624zm-63.488 0c14.704 0 26.624 11.92 26.624 26.624 0 14.704-11.92 26.624-26.624 26.624-14.704 0-26.624-11.92-26.624-26.624 0-14.704 11.92-26.624 26.624-26.624zm63.488-63.488c14.704 0 26.624 11.92 26.624 26.624 0 14.704-11.92 26.624-26.624 26.624-14.704 0-26.624-11.92-26.624-26.624 0-14.704 11.92-26.624 26.624-26.624zm-63.488 0c14.704 0 26.624 11.92 26.624 26.624 0 14.704-11.92 26.624-26.624 26.624-14.704 0-26.624-11.92-26.624-26.624 0-14.704 11.92-26.624 26.624-26.624z"
1743+
/>
17531744
</svg>
17541745
)
17551746
}

0 commit comments

Comments
 (0)