From 48921812dda75df4c819eaf2dbb1a94923d9fb72 Mon Sep 17 00:00:00 2001 From: Joohan Lee Date: Tue, 12 May 2026 23:16:54 -0400 Subject: [PATCH] fix: recognize kind field for skill priming, add storage_session_id LibreChat's batchUploadCodeEnvFiles sends kind/id/version form fields (not entity_id) and expects storage_session_id in the response. Two bugs in upload_files_batch caused skill priming to always fail: - is_agent_file checked only entity_id, so kind=skill uploads were treated as user files and .xsd schemas were rejected by the whitelist - response returned session_id only; LibreChat throws if storage_session_id is absent Add kind=skill/agent recognition alongside the existing entity_id path. Add storage_session_id as an alias for session_id in the response. Both changes are backward compatible. --- src/api/files.py | 9 ++++- tests/integration/test_librechat_compat.py | 40 ++++++++++++++++++++++ 2 files changed, 48 insertions(+), 1 deletion(-) diff --git a/src/api/files.py b/src/api/files.py index 59f85fa..7d5a9d4 100644 --- a/src/api/files.py +++ b/src/api/files.py @@ -249,7 +249,13 @@ async def upload_files_batch( entity_id: Optional[str] = ( entity_id_raw if isinstance(entity_id_raw, str) and entity_id_raw else None ) - is_agent_file = entity_id is not None + # LibreChat sends kind=skill/agent (not entity_id) for skill-priming uploads. + # Treat these as agent files so skill bundles bypass the user-facing extension + # whitelist and are correctly tagged read-only in the sandbox. + kind_raw = form.get("kind") + is_agent_file = entity_id is not None or ( + isinstance(kind_raw, str) and kind_raw in ("skill", "agent") + ) read_only_raw = form.get("read_only") is_read_only = isinstance(read_only_raw, str) and read_only_raw.lower() in ( @@ -340,6 +346,7 @@ async def upload_files_batch( return { "message": message, "session_id": session_id, + "storage_session_id": session_id, # LibreChat alias for session_id "files": results, "succeeded": succeeded, "failed": failed, diff --git a/tests/integration/test_librechat_compat.py b/tests/integration/test_librechat_compat.py index 093e96e..d27377a 100644 --- a/tests/integration/test_librechat_compat.py +++ b/tests/integration/test_librechat_compat.py @@ -2038,6 +2038,46 @@ def test_nested_filename_preserved_in_response( # The stored filename also preserves the path so S3/sandbox round-trip works. assert "skills/weather_lookup/SKILL.md" in setup_mocks["stored"] + def test_kind_skill_marks_files_as_agent(self, client, auth_headers, setup_mocks): + """LibreChat sends kind=skill (not entity_id) for skill-priming uploads. + + appendCodeEnvFileIdentity() in LibreChat appends kind/id/version fields + to the multipart form — entity_id is never sent. The endpoint must + recognise kind=skill as an agent-file upload so that skill bundle files + with non-standard extensions (.xsd schemas, .toml configs, etc.) bypass + the user-facing extension whitelist and are tagged read-only in the sandbox. + """ + files = [("file", ("schema.xsd", io.BytesIO(b""), "application/xml"))] + data = {"kind": "skill", "id": "skill_abc123", "version": "3"} + response = client.post( + "/upload/batch", files=files, data=data, headers=auth_headers + ) + + assert response.status_code == 200 + store = setup_mocks["file_service"].store_uploaded_file + assert store.await_count == 1 + kwargs = store.await_args.kwargs + assert kwargs["is_agent_file"] is True + + def test_batch_response_includes_storage_session_id( + self, client, auth_headers, setup_mocks + ): + """LibreChat's batchUploadCodeEnvFiles validates storage_session_id in the response. + + crud.js throws if the field is absent: + if (!result.storage_session_id || !Array.isArray(result.files)) { + throw new Error(`Unexpected batch upload response: ...`) + } + The field must equal session_id (same underlying value, different name). + """ + files = [("file", ("data.csv", io.BytesIO(b"a,b"), "text/csv"))] + response = client.post("/upload/batch", files=files, headers=auth_headers) + + assert response.status_code == 200 + body = response.json() + assert "storage_session_id" in body + assert body["storage_session_id"] == body["session_id"] + # ============================================================================= # GET /sessions/{session_id}/objects/{file_id} — liveness probe