Skip to content

Commit 16d224b

Browse files
authored
feat(files): use multipart/form-data for /api/files/create (#9521)
Switches the HTTP upload path from base64-in-JSON to multipart/form-data, eliminating the 33% size overhead and pairing naturally with File/Blob on the frontend. The WASM/Pyodide bridge keeps its JSON+base64 wire format since the JS<->Py RPC boundary cannot carry multipart; both transports share a single client-side FileCreateInput interface. Also adds: - path-traversal hardening in OSFileSystem.create_file_or_directory - parse_multipart_request helper in marimo/_server/api/utils.py Two followups: - streaming so the full file doesn't need to be loaded into memory in the server - batching uploads on the frontend
1 parent 8cb7deb commit 16d224b

18 files changed

Lines changed: 362 additions & 39 deletions

File tree

frontend/src/components/editor/file-tree/upload.tsx

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
import { type DropzoneOptions, useDropzone } from "react-dropzone";
44
import { toast } from "@/components/ui/use-toast";
55
import { useRequestClient } from "@/core/network/requests";
6-
import { serializeBlob } from "@/utils/blob";
76
import { withLoadingToast } from "@/utils/download";
87
import { Logger } from "@/utils/Logger";
98
import { type FilePath, PathBuilder } from "@/utils/paths";
@@ -69,17 +68,11 @@ export function useFileExplorerUpload(options: DropzoneOptions = {}) {
6968
PathBuilder.guessDeliminator(filePath).dirname(filePath);
7069
}
7170

72-
// File contents are sent base64-encoded to support arbitrary
73-
// bytes data
74-
//
75-
// get the raw base64-encoded data from a string starting with
76-
// data:*/*;base64,
77-
const base64 = (await serializeBlob(file)).split(",")[1];
7871
await sendCreateFileOrFolder({
7972
path: directoryPath,
8073
type: "file",
8174
name: file.name,
82-
contents: base64,
75+
file,
8376
});
8477
progress.increment(1);
8578
}

frontend/src/core/codemirror/markdown/__tests__/commands.test.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -246,7 +246,7 @@ describe("insertImage", () => {
246246
path: "public",
247247
type: "file",
248248
name: "hello.png",
249-
contents: "AQID",
249+
file: expect.any(File),
250250
});
251251

252252
expect(view.state.doc.toString()).toMatchInlineSnapshot(
@@ -291,7 +291,7 @@ describe("insertImage", () => {
291291
path: "nested/public", // store in public folder of notebook directory
292292
type: "file",
293293
name: "hello.png",
294-
contents: "AQID",
294+
file: expect.any(File),
295295
});
296296

297297
expect(view.state.doc.toString()).toMatchInlineSnapshot(
@@ -337,7 +337,7 @@ describe("insertImage", () => {
337337
path: "/Users/user/Development/project/public",
338338
type: "file",
339339
name: "hello.png",
340-
contents: "AQID",
340+
file: expect.any(File),
341341
});
342342

343343
// Should convert absolute path to relative path

frontend/src/core/codemirror/markdown/commands.ts

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -313,7 +313,6 @@ export async function insertImage(view: EditorView, file: File) {
313313
// If the file is base64 encoded, we can save it locally to prevent large file strings
314314
try {
315315
if (dataUrl.startsWith("data:")) {
316-
const base64 = dataUrl.split(",")[1];
317316
let inputFilename = prompt(
318317
"We can save your image as a file. Enter a filename.",
319318
file.name,
@@ -348,7 +347,7 @@ export async function insertImage(view: EditorView, file: File) {
348347
path: publicFolderPath as FilePath,
349348
type: "file",
350349
name: inputFilename,
351-
contents: base64,
350+
file,
352351
});
353352

354353
if (createFileRes.success) {

frontend/src/core/network/requests-network.ts

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,19 @@ import { API, createClientWithRuntimeManager } from "./api";
66
import { waitForConnectionOpen } from "./connection";
77
import type { EditRequests, RunRequests } from "./types";
88

9+
/**
10+
* Options for POSTing FormData via openapi-fetch. openapi-fetch types
11+
* request bodies from the JSON schema, so we bypass the body type and
12+
* override the serializer to pass the FormData through unchanged; the
13+
* browser then sets the multipart Content-Type with boundary.
14+
*/
15+
function multipartInit(formData: FormData) {
16+
return {
17+
body: formData as never,
18+
bodySerializer: (body: unknown) => body as never,
19+
};
20+
}
21+
922
const { handleResponse, handleResponseReturnNull } = API;
1023

1124
export function createNetworkRequests(): EditRequests & RunRequests {
@@ -298,10 +311,15 @@ export function createNetworkRequests(): EditRequests & RunRequests {
298311
},
299312
sendCreateFileOrFolder: async (request) => {
300313
await waitForConnectionOpen();
314+
const formData = new FormData();
315+
formData.append("path", request.path);
316+
formData.append("type", request.type);
317+
formData.append("name", request.name);
318+
if (request.file) {
319+
formData.append("file", request.file, request.name);
320+
}
301321
return getClient()
302-
.POST("/api/files/create", {
303-
body: request,
304-
})
322+
.POST("/api/files/create", multipartInit(formData))
305323
.then(handleResponse);
306324
},
307325
sendDeleteFileOrFolder: async (request) => {

frontend/src/core/network/types.ts

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,17 @@ export type SaveUserConfigurationRequest =
8080
export interface SetCellConfigRequest {
8181
configs: Record<CellId, Partial<CellConfig>>;
8282
}
83+
/**
84+
* Client-side shape for creating a file/directory/notebook. The HTTP
85+
* transport sends this as multipart/form-data; the WASM bridge base64-encodes
86+
* `file` internally and crosses the JS<->Py boundary as JSON.
87+
*/
88+
export interface FileCreateInput {
89+
path: string;
90+
type: "file" | "directory" | "notebook";
91+
name: string;
92+
file?: Blob;
93+
}
8394
export type UpdateUIElementRequest = schemas["UpdateUIElementRequest"];
8495
export type ModelRequest = schemas["ModelRequest"];
8596
export type NotebookDocumentTransactionRequest =
@@ -165,7 +176,7 @@ export interface EditRequests {
165176
sendListFiles: (request: FileListRequest) => Promise<FileListResponse>;
166177
sendSearchFiles: (request: FileSearchRequest) => Promise<FileSearchResponse>;
167178
sendCreateFileOrFolder: (
168-
request: FileCreateRequest,
179+
request: FileCreateInput,
169180
) => Promise<FileCreateResponse>;
170181
sendDeleteFileOrFolder: (
171182
request: FileDeleteRequest,

frontend/src/core/wasm/bridge.ts

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
import { toast } from "@/components/ui/use-toast";
55
import { userConfigAtom } from "@/core/config/config";
6+
import { serializeBlob } from "@/utils/blob";
67
import { Deferred } from "@/utils/Deferred";
78
import { throwNotImplemented } from "@/utils/functions";
89
import { Logger } from "@/utils/Logger";
@@ -431,9 +432,21 @@ export class PyodideBridge implements RunRequests, EditRequests {
431432
sendCreateFileOrFolder: EditRequests["sendCreateFileOrFolder"] = async (
432433
request,
433434
) => {
435+
// The WASM RPC boundary can only carry JSON, so we base64-encode the
436+
// file bytes here. The HTTP transport uses multipart/form-data instead.
437+
let contents: string | null = null;
438+
if (request.file) {
439+
const dataUrl = await serializeBlob(request.file);
440+
contents = dataUrl.split(",")[1] ?? "";
441+
}
434442
const response = await this.rpc.proxy.request.bridge({
435443
functionName: "create_file_or_directory",
436-
payload: request,
444+
payload: {
445+
path: request.path,
446+
type: request.type,
447+
name: request.name,
448+
contents,
449+
},
437450
});
438451
return response as FileCreateResponse;
439452
};

marimo/_cli/development/commands.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -355,6 +355,7 @@ def _generate_server_api_schema() -> dict[str, Any]:
355355
export.ExportAsIPYNBRequest,
356356
export.ExportAsPDFRequest,
357357
export.UpdateCellOutputsRequest,
358+
files.FileCreateMultipartRequest,
358359
files.FileCreateRequest,
359360
files.FileCreateResponse,
360361
files.FileDeleteRequest,

marimo/_server/api/endpoints/file_explorer.py

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,18 @@
11
# Copyright 2026 Marimo. All rights reserved.
22
from __future__ import annotations
33

4-
import base64
54
from typing import TYPE_CHECKING
65

76
from starlette.authentication import requires
87

98
from marimo import _loggers
109
from marimo._server.api.deps import AppState
11-
from marimo._server.api.utils import parse_request
10+
from marimo._server.api.utils import parse_multipart_request, parse_request
1211
from marimo._server.files.os_file_system import OSFileSystem
1312
from marimo._server.models.files import (
1413
FileCopyRequest,
1514
FileCopyResponse,
16-
FileCreateRequest,
15+
FileCreateMultipartRequest,
1716
FileCreateResponse,
1817
FileDeleteRequest,
1918
FileDeleteResponse,
@@ -109,9 +108,9 @@ async def create_file_or_directory(
109108
"""
110109
requestBody:
111110
content:
112-
application/json:
111+
multipart/form-data:
113112
schema:
114-
$ref: "#/components/schemas/FileCreateRequest"
113+
$ref: "#/components/schemas/FileCreateMultipartRequest"
115114
responses:
116115
200:
117116
description: Create a new file or directory
@@ -120,16 +119,15 @@ async def create_file_or_directory(
120119
schema:
121120
$ref: "#/components/schemas/FileCreateResponse"
122121
"""
123-
body = await parse_request(request, cls=FileCreateRequest)
124122
try:
125-
decoded_contents = (
126-
base64.b64decode(body.contents)
127-
if body.contents is not None
128-
else None
123+
parsed = await parse_multipart_request(
124+
request, FileCreateMultipartRequest
129125
)
130-
131126
info = file_system.create_file_or_directory(
132-
body.path, body.type, body.name, decoded_contents
127+
parsed.body.path,
128+
parsed.body.type,
129+
parsed.body.name,
130+
parsed.files.get("file"),
133131
)
134132
return FileCreateResponse(success=True, info=info)
135133
except Exception as e:

marimo/_server/api/utils.py

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,15 +5,20 @@
55
import subprocess
66
import sys
77
import webbrowser
8+
from dataclasses import dataclass
89
from pathlib import Path
910
from shutil import which
1011
from typing import (
1112
TYPE_CHECKING,
13+
Any,
14+
Generic,
1215
Protocol,
1316
TypeVar,
1417
runtime_checkable,
1518
)
1619

20+
import msgspec
21+
1722
from marimo._runtime.commands import CommandMessage
1823
from marimo._server.models.models import SuccessResponse
1924
from marimo._types.ids import ConsumerId
@@ -34,6 +39,47 @@ async def parse_request(
3439
)
3540

3641

42+
S = TypeVar("S", bound=msgspec.Struct)
43+
44+
45+
@dataclass
46+
class MultipartRequest(Generic[S]):
47+
"""Result of parsing a multipart/form-data request body."""
48+
49+
body: S
50+
files: dict[str, bytes]
51+
52+
53+
async def parse_multipart_request(
54+
request: Request, cls: type[S]
55+
) -> MultipartRequest[S]:
56+
"""Parse a multipart/form-data body into a msgspec.Struct + file bytes.
57+
58+
String form fields are validated against `cls`. File upload parts are
59+
read fully into memory and returned in `files`, keyed by form-field
60+
name (callers look them up explicitly rather than via the struct).
61+
62+
Raises msgspec.ValidationError if required string fields are missing
63+
or invalid.
64+
"""
65+
# Imported lazily so this module stays import-safe in environments
66+
# without starlette (e.g. pyodide).
67+
from starlette.datastructures import UploadFile
68+
69+
# Use as an async context manager so any spooled temp files backing
70+
# UploadFile parts are closed after parsing.
71+
async with request.form() as form:
72+
string_payload: dict[str, Any] = {}
73+
files: dict[str, bytes] = {}
74+
for key, value in form.multi_items():
75+
if isinstance(value, UploadFile):
76+
files[key] = await value.read()
77+
elif isinstance(value, str):
78+
string_payload[key] = value
79+
body = msgspec.convert(string_payload, cls, strict=False)
80+
return MultipartRequest(body=body, files=files)
81+
82+
3783
@runtime_checkable
3884
class RequestAsCommand(Protocol):
3985
"""Protocol for requests that can be converted to commands."""

marimo/_server/files/os_file_system.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,19 @@ def create_file_or_directory(
146146
)
147147
if name.strip() == "":
148148
raise ValueError("Cannot create file or directory with empty name")
149+
# Names that traverse out of `path` or escape via separators are
150+
# rejected. Validation belongs here (not in the endpoint) so every
151+
# caller of OSFileSystem — HTTP, WASM bridge, scripts — is covered.
152+
if (
153+
"/" in name
154+
or "\\" in name
155+
or "\x00" in name
156+
or name in (".", "..")
157+
):
158+
raise ValueError(
159+
f"Invalid name {name!r}: must not contain path separators "
160+
"or refer to a parent directory"
161+
)
149162

150163
full_path = Path(path) / name
151164
full_path = _generate_unique_path(full_path)

0 commit comments

Comments
 (0)