@@ -4,33 +4,51 @@ import type { OpenAI } from "openai";
44import { TEXT_MIME_ALLOWLIST } from "$lib/constants/mime" ;
55import type { makeImageProcessor } from "$lib/server/endpoints/images" ;
66
7+ /** MIME types that OpenAI handles natively as file content parts */
8+ const NATIVE_FILE_MIMETYPES = [ "application/pdf" ] as const ;
9+
10+ function matchesMimeAllowlist ( mime : string , allowlist : readonly string [ ] ) : boolean {
11+ const normalizedMime = ( mime || "" ) . toLowerCase ( ) ;
12+ const [ fileType , fileSubtype ] = normalizedMime . split ( "/" ) ;
13+ return allowlist . some ( ( allowed ) => {
14+ const [ type , subtype ] = allowed . toLowerCase ( ) . split ( "/" ) ;
15+ const typeOk = type === "*" || type === fileType ;
16+ const subOk = subtype === "*" || subtype === fileSubtype ;
17+ return typeOk && subOk ;
18+ } ) ;
19+ }
20+
721/**
822 * Prepare chat messages for OpenAI-compatible multimodal payloads.
923 * - Processes images via the provided imageProcessor (resize/convert) when multimodal is enabled.
24+ * - Sends PDFs as native file content parts when the model accepts them.
1025 * - Injects text-file content into the user message text.
1126 * - Leaves messages untouched when no files or multimodal disabled.
1227 */
1328export async function prepareMessagesWithFiles (
1429 messages : EndpointMessage [ ] ,
1530 imageProcessor : ReturnType < typeof makeImageProcessor > ,
16- isMultimodal : boolean
31+ isMultimodal : boolean ,
32+ acceptedFileMimetypes ?: string [ ]
1733) : Promise < OpenAI . Chat . Completions . ChatCompletionMessageParam [ ] > {
1834 return Promise . all (
1935 messages . map ( async ( message ) => {
2036 if ( message . from === "user" && message . files && message . files . length > 0 ) {
21- const { imageParts, textContent } = await prepareFiles (
37+ const { imageParts, fileParts , textContent } = await prepareFiles (
2238 imageProcessor ,
2339 message . files ,
24- isMultimodal
40+ isMultimodal ,
41+ acceptedFileMimetypes
2542 ) ;
2643
2744 let messageText = message . content ;
2845 if ( textContent . length > 0 ) {
2946 messageText = textContent + "\n\n" + message . content ;
3047 }
3148
32- if ( imageParts . length > 0 && isMultimodal ) {
33- const parts = [ { type : "text" as const , text : messageText } , ...imageParts ] ;
49+ const multimodalParts = [ ...imageParts , ...fileParts ] ;
50+ if ( multimodalParts . length > 0 ) {
51+ const parts = [ { type : "text" as const , text : messageText } , ...multimodalParts ] ;
3452 return { role : message . from , content : parts } ;
3553 }
3654
@@ -44,22 +62,25 @@ export async function prepareMessagesWithFiles(
4462async function prepareFiles (
4563 imageProcessor : ReturnType < typeof makeImageProcessor > ,
4664 files : MessageFile [ ] ,
47- isMultimodal : boolean
65+ isMultimodal : boolean ,
66+ acceptedFileMimetypes ?: string [ ]
4867) : Promise < {
4968 imageParts : OpenAI . Chat . Completions . ChatCompletionContentPartImage [ ] ;
69+ fileParts : OpenAI . Chat . Completions . ChatCompletionContentPart . File [ ] ;
5070 textContent : string ;
5171} > {
5272 const imageFiles = files . filter ( ( file ) => file . mime . startsWith ( "image/" ) ) ;
53- const textFiles = files . filter ( ( file ) => {
54- const mime = ( file . mime || "" ) . toLowerCase ( ) ;
55- const [ fileType , fileSubtype ] = mime . split ( "/" ) ;
56- return TEXT_MIME_ALLOWLIST . some ( ( allowed ) => {
57- const [ type , subtype ] = allowed . toLowerCase ( ) . split ( "/" ) ;
58- const typeOk = type === "*" || type === fileType ;
59- const subOk = subtype === "*" || subtype === fileSubtype ;
60- return typeOk && subOk ;
61- } ) ;
62- } ) ;
73+ const textFiles = files . filter ( ( file ) => matchesMimeAllowlist ( file . mime , TEXT_MIME_ALLOWLIST ) ) ;
74+
75+ // Files that the model accepts natively (e.g. PDFs via OpenAI's file content part)
76+ const nativeFiles = files . filter (
77+ ( file ) =>
78+ ! file . mime . startsWith ( "image/" ) &&
79+ ! matchesMimeAllowlist ( file . mime , TEXT_MIME_ALLOWLIST ) &&
80+ acceptedFileMimetypes &&
81+ matchesMimeAllowlist ( file . mime , acceptedFileMimetypes ) &&
82+ matchesMimeAllowlist ( file . mime , NATIVE_FILE_MIMETYPES )
83+ ) ;
6384
6485 let imageParts : OpenAI . Chat . Completions . ChatCompletionContentPartImage [ ] = [ ] ;
6586 if ( isMultimodal && imageFiles . length > 0 ) {
@@ -73,6 +94,17 @@ async function prepareFiles(
7394 } ) ) ;
7495 }
7596
97+ // Send natively-supported files as OpenAI file content parts
98+ const fileParts : OpenAI . Chat . Completions . ChatCompletionContentPart . File [ ] = nativeFiles . map (
99+ ( file ) => ( {
100+ type : "file" as const ,
101+ file : {
102+ filename : file . name ,
103+ file_data : `data:${ file . mime } ;base64,${ file . value } ` ,
104+ } ,
105+ } )
106+ ) ;
107+
76108 let textContent = "" ;
77109 if ( textFiles . length > 0 ) {
78110 const textParts = await Promise . all (
@@ -84,5 +116,5 @@ async function prepareFiles(
84116 textContent = textParts . join ( "\n\n" ) ;
85117 }
86118
87- return { imageParts, textContent } ;
119+ return { imageParts, fileParts , textContent } ;
88120}
0 commit comments