diff --git a/frontend/src/components/editor/file-tree/upload.tsx b/frontend/src/components/editor/file-tree/upload.tsx index f1b10271afa..57f9a240d93 100644 --- a/frontend/src/components/editor/file-tree/upload.tsx +++ b/frontend/src/components/editor/file-tree/upload.tsx @@ -3,7 +3,6 @@ import { type DropzoneOptions, useDropzone } from "react-dropzone"; import { toast } from "@/components/ui/use-toast"; import { useRequestClient } from "@/core/network/requests"; -import { serializeBlob } from "@/utils/blob"; import { withLoadingToast } from "@/utils/download"; import { Logger } from "@/utils/Logger"; import { type FilePath, PathBuilder } from "@/utils/paths"; @@ -69,17 +68,11 @@ export function useFileExplorerUpload(options: DropzoneOptions = {}) { PathBuilder.guessDeliminator(filePath).dirname(filePath); } - // File contents are sent base64-encoded to support arbitrary - // bytes data - // - // get the raw base64-encoded data from a string starting with - // data:*/*;base64, - const base64 = (await serializeBlob(file)).split(",")[1]; await sendCreateFileOrFolder({ path: directoryPath, type: "file", name: file.name, - contents: base64, + file, }); progress.increment(1); } diff --git a/frontend/src/core/codemirror/markdown/__tests__/commands.test.ts b/frontend/src/core/codemirror/markdown/__tests__/commands.test.ts index 73dd8fbd823..b16ce889b7a 100644 --- a/frontend/src/core/codemirror/markdown/__tests__/commands.test.ts +++ b/frontend/src/core/codemirror/markdown/__tests__/commands.test.ts @@ -246,7 +246,7 @@ describe("insertImage", () => { path: "public", type: "file", name: "hello.png", - contents: "AQID", + file: expect.any(File), }); expect(view.state.doc.toString()).toMatchInlineSnapshot( @@ -291,7 +291,7 @@ describe("insertImage", () => { path: "nested/public", // store in public folder of notebook directory type: "file", name: "hello.png", - contents: "AQID", + file: expect.any(File), }); expect(view.state.doc.toString()).toMatchInlineSnapshot( @@ -337,7 +337,7 @@ describe("insertImage", () => { path: "/Users/user/Development/project/public", type: "file", name: "hello.png", - contents: "AQID", + file: expect.any(File), }); // Should convert absolute path to relative path diff --git a/frontend/src/core/codemirror/markdown/commands.ts b/frontend/src/core/codemirror/markdown/commands.ts index c66ccf73a4d..4c7ccf5e1f3 100644 --- a/frontend/src/core/codemirror/markdown/commands.ts +++ b/frontend/src/core/codemirror/markdown/commands.ts @@ -313,7 +313,6 @@ export async function insertImage(view: EditorView, file: File) { // If the file is base64 encoded, we can save it locally to prevent large file strings try { if (dataUrl.startsWith("data:")) { - const base64 = dataUrl.split(",")[1]; let inputFilename = prompt( "We can save your image as a file. Enter a filename.", file.name, @@ -348,7 +347,7 @@ export async function insertImage(view: EditorView, file: File) { path: publicFolderPath as FilePath, type: "file", name: inputFilename, - contents: base64, + file, }); if (createFileRes.success) { diff --git a/frontend/src/core/network/requests-network.ts b/frontend/src/core/network/requests-network.ts index e53ae139d91..6a394a54a79 100644 --- a/frontend/src/core/network/requests-network.ts +++ b/frontend/src/core/network/requests-network.ts @@ -6,6 +6,19 @@ import { API, createClientWithRuntimeManager } from "./api"; import { waitForConnectionOpen } from "./connection"; import type { EditRequests, RunRequests } from "./types"; +/** + * Options for POSTing FormData via openapi-fetch. openapi-fetch types + * request bodies from the JSON schema, so we bypass the body type and + * override the serializer to pass the FormData through unchanged; the + * browser then sets the multipart Content-Type with boundary. + */ +function multipartInit(formData: FormData) { + return { + body: formData as never, + bodySerializer: (body: unknown) => body as never, + }; +} + const { handleResponse, handleResponseReturnNull } = API; export function createNetworkRequests(): EditRequests & RunRequests { @@ -298,10 +311,15 @@ export function createNetworkRequests(): EditRequests & RunRequests { }, sendCreateFileOrFolder: async (request) => { await waitForConnectionOpen(); + const formData = new FormData(); + formData.append("path", request.path); + formData.append("type", request.type); + formData.append("name", request.name); + if (request.file) { + formData.append("file", request.file, request.name); + } return getClient() - .POST("/api/files/create", { - body: request, - }) + .POST("/api/files/create", multipartInit(formData)) .then(handleResponse); }, sendDeleteFileOrFolder: async (request) => { diff --git a/frontend/src/core/network/types.ts b/frontend/src/core/network/types.ts index 2a5b6c1e476..c1ade525fb8 100644 --- a/frontend/src/core/network/types.ts +++ b/frontend/src/core/network/types.ts @@ -80,6 +80,17 @@ export type SaveUserConfigurationRequest = export interface SetCellConfigRequest { configs: Record>; } +/** + * Client-side shape for creating a file/directory/notebook. The HTTP + * transport sends this as multipart/form-data; the WASM bridge base64-encodes + * `file` internally and crosses the JS<->Py boundary as JSON. + */ +export interface FileCreateInput { + path: string; + type: "file" | "directory" | "notebook"; + name: string; + file?: Blob; +} export type UpdateUIElementRequest = schemas["UpdateUIElementRequest"]; export type ModelRequest = schemas["ModelRequest"]; export type NotebookDocumentTransactionRequest = @@ -165,7 +176,7 @@ export interface EditRequests { sendListFiles: (request: FileListRequest) => Promise; sendSearchFiles: (request: FileSearchRequest) => Promise; sendCreateFileOrFolder: ( - request: FileCreateRequest, + request: FileCreateInput, ) => Promise; sendDeleteFileOrFolder: ( request: FileDeleteRequest, diff --git a/frontend/src/core/wasm/bridge.ts b/frontend/src/core/wasm/bridge.ts index f82fa0fc5ba..5cc30aa5b68 100644 --- a/frontend/src/core/wasm/bridge.ts +++ b/frontend/src/core/wasm/bridge.ts @@ -3,6 +3,7 @@ import { toast } from "@/components/ui/use-toast"; import { userConfigAtom } from "@/core/config/config"; +import { serializeBlob } from "@/utils/blob"; import { Deferred } from "@/utils/Deferred"; import { throwNotImplemented } from "@/utils/functions"; import { Logger } from "@/utils/Logger"; @@ -431,9 +432,21 @@ export class PyodideBridge implements RunRequests, EditRequests { sendCreateFileOrFolder: EditRequests["sendCreateFileOrFolder"] = async ( request, ) => { + // The WASM RPC boundary can only carry JSON, so we base64-encode the + // file bytes here. The HTTP transport uses multipart/form-data instead. + let contents: string | null = null; + if (request.file) { + const dataUrl = await serializeBlob(request.file); + contents = dataUrl.split(",")[1] ?? ""; + } const response = await this.rpc.proxy.request.bridge({ functionName: "create_file_or_directory", - payload: request, + payload: { + path: request.path, + type: request.type, + name: request.name, + contents, + }, }); return response as FileCreateResponse; }; diff --git a/marimo/_cli/development/commands.py b/marimo/_cli/development/commands.py index 44e56838bad..8a06b019221 100644 --- a/marimo/_cli/development/commands.py +++ b/marimo/_cli/development/commands.py @@ -355,6 +355,7 @@ def _generate_server_api_schema() -> dict[str, Any]: export.ExportAsIPYNBRequest, export.ExportAsPDFRequest, export.UpdateCellOutputsRequest, + files.FileCreateMultipartRequest, files.FileCreateRequest, files.FileCreateResponse, files.FileDeleteRequest, diff --git a/marimo/_server/api/endpoints/file_explorer.py b/marimo/_server/api/endpoints/file_explorer.py index f31ec6c55bb..391779c9194 100644 --- a/marimo/_server/api/endpoints/file_explorer.py +++ b/marimo/_server/api/endpoints/file_explorer.py @@ -1,19 +1,18 @@ # Copyright 2026 Marimo. All rights reserved. from __future__ import annotations -import base64 from typing import TYPE_CHECKING from starlette.authentication import requires from marimo import _loggers from marimo._server.api.deps import AppState -from marimo._server.api.utils import parse_request +from marimo._server.api.utils import parse_multipart_request, parse_request from marimo._server.files.os_file_system import OSFileSystem from marimo._server.models.files import ( FileCopyRequest, FileCopyResponse, - FileCreateRequest, + FileCreateMultipartRequest, FileCreateResponse, FileDeleteRequest, FileDeleteResponse, @@ -109,9 +108,9 @@ async def create_file_or_directory( """ requestBody: content: - application/json: + multipart/form-data: schema: - $ref: "#/components/schemas/FileCreateRequest" + $ref: "#/components/schemas/FileCreateMultipartRequest" responses: 200: description: Create a new file or directory @@ -120,16 +119,15 @@ async def create_file_or_directory( schema: $ref: "#/components/schemas/FileCreateResponse" """ - body = await parse_request(request, cls=FileCreateRequest) try: - decoded_contents = ( - base64.b64decode(body.contents) - if body.contents is not None - else None + parsed = await parse_multipart_request( + request, FileCreateMultipartRequest ) - info = file_system.create_file_or_directory( - body.path, body.type, body.name, decoded_contents + parsed.body.path, + parsed.body.type, + parsed.body.name, + parsed.files.get("file"), ) return FileCreateResponse(success=True, info=info) except Exception as e: diff --git a/marimo/_server/api/utils.py b/marimo/_server/api/utils.py index b3327db9d57..57511497187 100644 --- a/marimo/_server/api/utils.py +++ b/marimo/_server/api/utils.py @@ -5,15 +5,20 @@ import subprocess import sys import webbrowser +from dataclasses import dataclass from pathlib import Path from shutil import which from typing import ( TYPE_CHECKING, + Any, + Generic, Protocol, TypeVar, runtime_checkable, ) +import msgspec + from marimo._runtime.commands import CommandMessage from marimo._server.models.models import SuccessResponse from marimo._types.ids import ConsumerId @@ -34,6 +39,47 @@ async def parse_request( ) +S = TypeVar("S", bound=msgspec.Struct) + + +@dataclass +class MultipartRequest(Generic[S]): + """Result of parsing a multipart/form-data request body.""" + + body: S + files: dict[str, bytes] + + +async def parse_multipart_request( + request: Request, cls: type[S] +) -> MultipartRequest[S]: + """Parse a multipart/form-data body into a msgspec.Struct + file bytes. + + String form fields are validated against `cls`. File upload parts are + read fully into memory and returned in `files`, keyed by form-field + name (callers look them up explicitly rather than via the struct). + + Raises msgspec.ValidationError if required string fields are missing + or invalid. + """ + # Imported lazily so this module stays import-safe in environments + # without starlette (e.g. pyodide). + from starlette.datastructures import UploadFile + + # Use as an async context manager so any spooled temp files backing + # UploadFile parts are closed after parsing. + async with request.form() as form: + string_payload: dict[str, Any] = {} + files: dict[str, bytes] = {} + for key, value in form.multi_items(): + if isinstance(value, UploadFile): + files[key] = await value.read() + elif isinstance(value, str): + string_payload[key] = value + body = msgspec.convert(string_payload, cls, strict=False) + return MultipartRequest(body=body, files=files) + + @runtime_checkable class RequestAsCommand(Protocol): """Protocol for requests that can be converted to commands.""" diff --git a/marimo/_server/files/os_file_system.py b/marimo/_server/files/os_file_system.py index 7be1e5088cd..e835d6e0879 100644 --- a/marimo/_server/files/os_file_system.py +++ b/marimo/_server/files/os_file_system.py @@ -146,6 +146,19 @@ def create_file_or_directory( ) if name.strip() == "": raise ValueError("Cannot create file or directory with empty name") + # Names that traverse out of `path` or escape via separators are + # rejected. Validation belongs here (not in the endpoint) so every + # caller of OSFileSystem — HTTP, WASM bridge, scripts — is covered. + if ( + "/" in name + or "\\" in name + or "\x00" in name + or name in (".", "..") + ): + raise ValueError( + f"Invalid name {name!r}: must not contain path separators " + "or refer to a parent directory" + ) full_path = Path(path) / name full_path = _generate_unique_path(full_path) diff --git a/marimo/_server/models/files.py b/marimo/_server/models/files.py index 9096e3b10d7..d4cd57b26af 100644 --- a/marimo/_server/models/files.py +++ b/marimo/_server/models/files.py @@ -1,13 +1,15 @@ # Copyright 2026 Marimo. All rights reserved. from __future__ import annotations -from typing import Literal +from typing import Annotated, Literal import msgspec from marimo._metadata.opengraph import OpenGraphMetadata from marimo._server.models.models import BaseResponse +FileCreateType = Literal["file", "directory", "notebook"] + class FileInfo(msgspec.Struct, rename="camel"): id: str @@ -46,13 +48,31 @@ class FileCreateRequest(msgspec.Struct, rename="camel"): # The path where to create the file or directory path: str # 'file', 'directory', or 'notebook' - type: Literal["file", "directory", "notebook"] + type: FileCreateType # The name of the file or directory name: str - # The contents of the file, base64-encoded + # The contents of the file, base64-encoded. Used by the WASM/Pyodide RPC + # bridge, which cannot send multipart over the JS<->Py JSON boundary. + # The HTTP endpoint instead receives the raw file bytes via multipart/form-data. contents: str | None = None +class FileCreateMultipartRequest(msgspec.Struct, rename="camel"): + """multipart/form-data body for POST /api/files/create.""" + + # The path where to create the file or directory + path: str + # 'file', 'directory', or 'notebook' + type: FileCreateType + # The name of the file or directory + name: str + # The raw file bytes (optional). When omitted, an empty file is created + # (or, for 'notebook' type, a default notebook template). + file: Annotated[ + str | None, msgspec.Meta(extra_json_schema={"format": "binary"}) + ] = None + + class FileSearchRequest(msgspec.Struct, rename="camel"): # The search query string query: str diff --git a/packages/openapi/api.yaml b/packages/openapi/api.yaml index e49369e8063..e45073164dd 100644 --- a/packages/openapi/api.yaml +++ b/packages/openapi/api.yaml @@ -1664,6 +1664,30 @@ components: - success title: FileCopyResponse type: object + FileCreateMultipartRequest: + description: multipart/form-data body for POST /api/files/create. + properties: + file: + anyOf: + - type: string + - type: 'null' + default: null + format: binary + name: + type: string + path: + type: string + type: + enum: + - directory + - file + - notebook + required: + - path + - type + - name + title: FileCreateMultipartRequest + type: object FileCreateRequest: properties: contents: @@ -6077,9 +6101,9 @@ paths: post: requestBody: content: - application/json: + multipart/form-data: schema: - $ref: '#/components/schemas/FileCreateRequest' + $ref: '#/components/schemas/FileCreateMultipartRequest' responses: 200: content: diff --git a/packages/openapi/src/api.ts b/packages/openapi/src/api.ts index d1218dd6fc4..7fc0cabfa0e 100644 --- a/packages/openapi/src/api.ts +++ b/packages/openapi/src/api.ts @@ -1145,7 +1145,7 @@ export interface paths { }; requestBody?: { content: { - "application/json": components["schemas"]["FileCreateRequest"]; + "multipart/form-data": components["schemas"]["FileCreateMultipartRequest"]; }; }; responses: { @@ -4356,6 +4356,21 @@ export interface components { message?: string | null; success: boolean; }; + /** + * FileCreateMultipartRequest + * @description multipart/form-data body for POST /api/files/create. + */ + FileCreateMultipartRequest: { + /** + * Format: binary + * @default null + */ + file?: string | null; + name: string; + path: string; + /** @enum {unknown} */ + type: "directory" | "file" | "notebook"; + }; /** FileCreateRequest */ FileCreateRequest: { /** @default null */ diff --git a/pyproject.toml b/pyproject.toml index 7258e2bfd4e..1dd5df1eea5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -33,6 +33,8 @@ dependencies = [ # - starlette 0.36.0 introduced a bug # - starlette 0.37.2 fixed httpx 0.28 compatibility "starlette>=0.37.2", + # multipart form parsing for file uploads (Starlette's request.form() requires it) + "python-multipart>=0.0.18", # websockets for use with starlette, and for lsp "websockets >= 14.2.0", # loro for collaborative editing diff --git a/tests/_server/api/endpoints/test_file_explorer.py b/tests/_server/api/endpoints/test_file_explorer.py index 8e954b0ddcc..e64b4b78ff8 100644 --- a/tests/_server/api/endpoints/test_file_explorer.py +++ b/tests/_server/api/endpoints/test_file_explorer.py @@ -71,15 +71,14 @@ def test_file_details_binary(client: TestClient) -> None: def test_create_and_delete_file_or_directory(client: TestClient) -> None: - # Create a file + # Create a file with no contents (empty file) response = client.post( "/api/files/create", headers=HEADERS, - json={ + data={ "path": test_dir, "type": "file", "name": "new_file.txt", - "contents": "", }, ) assert response.status_code == 200, response.text @@ -97,6 +96,89 @@ def test_create_and_delete_file_or_directory(client: TestClient) -> None: assert response.json()["success"] is True +def test_create_file_with_binary_contents(client: TestClient) -> None: + raw_bytes = b"\xff\xfe\xfd\x00\x80marimo" + response = client.post( + "/api/files/create", + headers=HEADERS, + data={ + "path": test_dir, + "type": "file", + "name": "binary_upload.bin", + }, + files={"file": ("binary_upload.bin", raw_bytes)}, + ) + assert response.status_code == 200, response.text + assert response.json()["success"] is True + + created_path = os.path.join(test_dir, "binary_upload.bin") + with open(created_path, "rb") as f: + assert f.read() == raw_bytes + os.remove(created_path) + + +def test_create_directory(client: TestClient) -> None: + response = client.post( + "/api/files/create", + headers=HEADERS, + data={ + "path": test_dir, + "type": "directory", + "name": "new_subdir", + }, + ) + assert response.status_code == 200, response.text + assert response.json()["success"] is True + new_dir = os.path.join(test_dir, "new_subdir") + assert os.path.isdir(new_dir) + os.rmdir(new_dir) + + +def test_create_rejects_invalid_type(client: TestClient) -> None: + response = client.post( + "/api/files/create", + headers=HEADERS, + data={ + "path": test_dir, + "type": "not_a_real_type", + "name": "x.txt", + }, + ) + assert response.status_code == 200, response.text + body = response.json() + assert body["success"] is False + assert "type" in body["message"].lower() + + +def test_create_rejects_missing_fields(client: TestClient) -> None: + response = client.post( + "/api/files/create", + headers=HEADERS, + data={"type": "file"}, + ) + assert response.status_code == 200, response.text + body = response.json() + assert body["success"] is False + + +def test_create_rejects_path_traversal_name(client: TestClient) -> None: + response = client.post( + "/api/files/create", + headers=HEADERS, + data={ + "path": test_dir, + "type": "file", + "name": "../escaped.txt", + }, + ) + assert response.status_code == 200, response.text + body = response.json() + assert body["success"] is False + # Confirm nothing was written outside test_dir + parent_path = os.path.join(os.path.dirname(test_dir), "escaped.txt") + assert not os.path.exists(parent_path) + + def test_update_file(client: TestClient) -> None: response = client.post( "/api/files/update", diff --git a/tests/_server/api/test_api_utils.py b/tests/_server/api/test_api_utils.py new file mode 100644 index 00000000000..5f4a09a4e60 --- /dev/null +++ b/tests/_server/api/test_api_utils.py @@ -0,0 +1,66 @@ +# Copyright 2026 Marimo. All rights reserved. +from __future__ import annotations + +from typing import TYPE_CHECKING + +import msgspec +import pytest +from starlette.applications import Starlette +from starlette.responses import JSONResponse +from starlette.routing import Route +from starlette.testclient import TestClient + +from marimo._server.api.utils import parse_multipart_request + +if TYPE_CHECKING: + from starlette.requests import Request + + +class _SampleForm(msgspec.Struct): + name: str + count: int + + +def _build_app(captured: dict[str, object]) -> TestClient: + async def endpoint(request: Request) -> JSONResponse: + parsed = await parse_multipart_request(request, _SampleForm) + captured["body"] = parsed.body + captured["files"] = parsed.files + return JSONResponse({"ok": True}) + + app = Starlette(routes=[Route("/test", endpoint, methods=["POST"])]) + return TestClient(app) + + +def test_parse_multipart_request_strings_and_file_bytes() -> None: + captured: dict[str, object] = {} + client = _build_app(captured) + response = client.post( + "/test", + data={"name": "marimo", "count": "42"}, + files={"upload": ("hello.bin", b"\x00\x01\x02\xff")}, + ) + assert response.status_code == 200 + body = captured["body"] + assert isinstance(body, _SampleForm) + assert body.name == "marimo" + assert body.count == 42 + assert captured["files"] == {"upload": b"\x00\x01\x02\xff"} + + +def test_parse_multipart_request_omitted_file_yields_empty_dict() -> None: + captured: dict[str, object] = {} + client = _build_app(captured) + response = client.post( + "/test", + data={"name": "marimo", "count": "1"}, + ) + assert response.status_code == 200 + assert captured["files"] == {} + + +def test_parse_multipart_request_raises_on_missing_field() -> None: + captured: dict[str, object] = {} + client = _build_app(captured) + with pytest.raises(msgspec.ValidationError): + client.post("/test", data={"name": "marimo"}) diff --git a/tests/_server/files/test_os_file_system.py b/tests/_server/files/test_os_file_system.py index 55f57293cf8..9b7f0e508d3 100644 --- a/tests/_server/files/test_os_file_system.py +++ b/tests/_server/files/test_os_file_system.py @@ -112,6 +112,27 @@ def test_create_with_disallowed_name(test_dir: Path, fs: OSFileSystem) -> None: fs.create_file_or_directory(str(test_dir), "file", ".", None) +@pytest.mark.parametrize( + "name", + [ + "../escaped.txt", + "../../escaped.txt", + "subdir/nested.txt", + "..", + ".", + "with\\backslash.txt", + "embed\x00null.txt", + ], +) +def test_create_rejects_path_traversal( + test_dir: Path, fs: OSFileSystem, name: str +) -> None: + with pytest.raises(ValueError): + fs.create_file_or_directory(str(test_dir), "file", name, b"data") + # No file should have been written anywhere in or above test_dir + assert not (test_dir.parent / "escaped.txt").exists() + + def test_list_files(test_dir: Path, fs: OSFileSystem) -> None: test_create_file(test_dir, fs) test_create_directory(test_dir, fs) diff --git a/tests/snapshots/dependencies.txt b/tests/snapshots/dependencies.txt index 93eea7ae2af..80ca4c96bd4 100644 --- a/tests/snapshots/dependencies.txt +++ b/tests/snapshots/dependencies.txt @@ -10,6 +10,7 @@ packaging psutil>=5.0 pygments>=2.19,<3 pymdown-extensions>=10.21.2,<11 +python-multipart>=0.0.18 pyyaml>=6.0.1 pyzmq>=27.1.0; python_version < '3.15' starlette>=0.37.2