Skip to content

Commit b4aab21

Browse files
improvement(tables): mount snapshots by presigned URL so the sandbox fetches directly (raise cap to 500MB)
1 parent c340659 commit b4aab21

6 files changed

Lines changed: 280 additions & 74 deletions

File tree

apps/sim/lib/copilot/tools/handlers/function-execute.test.ts

Lines changed: 65 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ const {
1010
mockQueryRows,
1111
mockGetOrCreateTableSnapshot,
1212
mockDownloadFile,
13+
mockGeneratePresignedDownloadUrl,
14+
mockHasCloudStorage,
1315
mockExecuteTool,
1416
} = vi.hoisted(() => ({
1517
mockIsFeatureEnabled: vi.fn(),
@@ -18,6 +20,8 @@ const {
1820
mockQueryRows: vi.fn(),
1921
mockGetOrCreateTableSnapshot: vi.fn(),
2022
mockDownloadFile: vi.fn(),
23+
mockGeneratePresignedDownloadUrl: vi.fn(),
24+
mockHasCloudStorage: vi.fn(),
2125
mockExecuteTool: vi.fn(),
2226
}))
2327

@@ -29,8 +33,13 @@ vi.mock('@/lib/table/service', () => ({
2933
vi.mock('@/lib/table/rows/service', () => ({ queryRows: mockQueryRows }))
3034
vi.mock('@/lib/table/snapshot-cache', () => ({
3135
getOrCreateTableSnapshot: mockGetOrCreateTableSnapshot,
36+
SNAPSHOT_MAX_BYTES: 500 * 1024 * 1024,
37+
}))
38+
vi.mock('@/lib/uploads/core/storage-service', () => ({
39+
downloadFile: mockDownloadFile,
40+
generatePresignedDownloadUrl: mockGeneratePresignedDownloadUrl,
41+
hasCloudStorage: mockHasCloudStorage,
3242
}))
33-
vi.mock('@/lib/uploads/core/storage-service', () => ({ downloadFile: mockDownloadFile }))
3443
vi.mock('@/tools', () => ({ executeTool: mockExecuteTool }))
3544
// Workspace-file + VFS surfaces are unused on the tables-only path; stub to avoid heavy loads.
3645
vi.mock('@/lib/uploads/contexts/workspace/workspace-file-manager', () => ({
@@ -67,18 +76,22 @@ const context = { workspaceId: 'ws_1', userId: 'u1' }
6776

6877
function mountedFiles() {
6978
const params = mockExecuteTool.mock.calls[0][1] as {
70-
_sandboxFiles?: Array<{ path: string; content: string }>
79+
_sandboxFiles?: Array<{ path: string; type?: string; content?: string; url?: string }>
7180
}
7281
return params._sandboxFiles ?? []
7382
}
7483

84+
const snapshotCacheOn = (flag: string) => Promise.resolve(flag === 'table-snapshot-cache')
85+
7586
describe('executeFunctionExecute table mounts', () => {
7687
beforeEach(() => {
7788
vi.clearAllMocks()
7889
mockExecuteTool.mockResolvedValue({ success: true })
7990
mockGetTableById.mockResolvedValue(table)
8091
mockIsFeatureEnabled.mockResolvedValue(false)
8192
mockQueryRows.mockResolvedValue({ rows: [{ data: { name: 'Ada' } }] })
93+
mockHasCloudStorage.mockReturnValue(true)
94+
mockGeneratePresignedDownloadUrl.mockResolvedValue('https://s3.example/presigned?sig=abc')
8295
})
8396

8497
it('flag OFF: drains the table inline via queryRows (existing path)', async () => {
@@ -91,33 +104,55 @@ describe('executeFunctionExecute table mounts', () => {
91104
expect(files[0].content).toBe('name\nAda')
92105
})
93106

94-
it('flag ON + large table: mounts by reference from the snapshot, no row drain', async () => {
95-
mockIsFeatureEnabled.mockImplementation((flag: string) =>
96-
Promise.resolve(flag === 'table-snapshot-cache')
97-
)
107+
it('flag ON + cloud storage: mounts by presigned URL, no bytes through web', async () => {
108+
mockIsFeatureEnabled.mockImplementation(snapshotCacheOn)
98109
mockGetOrCreateTableSnapshot.mockResolvedValue({
99110
key: 'table-snapshots/ws_1/tbl_1/v5.csv',
100111
size: 9,
101112
version: 5,
102113
})
103-
mockDownloadFile.mockResolvedValue(Buffer.from('name\nAda\n'))
104114

105115
await executeFunctionExecute({ inputTables: ['tbl_1'] }, context as never)
106116

107117
expect(mockGetOrCreateTableSnapshot).toHaveBeenCalledTimes(1)
108118
expect(mockQueryRows).not.toHaveBeenCalled()
119+
expect(mockDownloadFile).not.toHaveBeenCalled()
120+
expect(mockGeneratePresignedDownloadUrl).toHaveBeenCalledWith(
121+
'table-snapshots/ws_1/tbl_1/v5.csv',
122+
'execution',
123+
expect.any(Number)
124+
)
125+
expect(mountedFiles()[0]).toEqual({
126+
type: 'url',
127+
path: '/home/user/tables/tbl_1.csv',
128+
url: 'https://s3.example/presigned?sig=abc',
129+
})
130+
})
131+
132+
it('flag ON + local storage: falls back to a buffered content mount', async () => {
133+
mockIsFeatureEnabled.mockImplementation(snapshotCacheOn)
134+
mockHasCloudStorage.mockReturnValue(false)
135+
mockGetOrCreateTableSnapshot.mockResolvedValue({
136+
key: 'table-snapshots/ws_1/tbl_1/v5.csv',
137+
size: 9,
138+
version: 5,
139+
})
140+
mockDownloadFile.mockResolvedValue(Buffer.from('name\nAda\n'))
141+
142+
await executeFunctionExecute({ inputTables: ['tbl_1'] }, context as never)
143+
144+
expect(mockGeneratePresignedDownloadUrl).not.toHaveBeenCalled()
109145
expect(mockDownloadFile).toHaveBeenCalledWith(
110146
expect.objectContaining({ key: 'table-snapshots/ws_1/tbl_1/v5.csv', context: 'execution' })
111147
)
112-
const files = mountedFiles()
113-
expect(files[0].path).toBe('/home/user/tables/tbl_1.csv')
114-
expect(files[0].content).toBe('name\nAda\n')
148+
const file = mountedFiles()[0]
149+
expect(file.path).toBe('/home/user/tables/tbl_1.csv')
150+
expect(file.content).toBe('name\nAda\n')
151+
expect(file.type).toBeUndefined()
115152
})
116153

117154
it('flag ON but small table stays on the inline path', async () => {
118-
mockIsFeatureEnabled.mockImplementation((flag: string) =>
119-
Promise.resolve(flag === 'table-snapshot-cache')
120-
)
155+
mockIsFeatureEnabled.mockImplementation(snapshotCacheOn)
121156
mockGetTableById.mockResolvedValue({ ...table, rowCount: 10 })
122157

123158
await executeFunctionExecute({ inputTables: ['tbl_1'] }, context as never)
@@ -126,10 +161,23 @@ describe('executeFunctionExecute table mounts', () => {
126161
expect(mockQueryRows).toHaveBeenCalledTimes(1)
127162
})
128163

129-
it('flag ON: throws when the snapshot exceeds the per-file mount limit', async () => {
130-
mockIsFeatureEnabled.mockImplementation((flag: string) =>
131-
Promise.resolve(flag === 'table-snapshot-cache')
132-
)
164+
it('flag ON + cloud: throws when the snapshot exceeds the table mount limit', async () => {
165+
mockIsFeatureEnabled.mockImplementation(snapshotCacheOn)
166+
mockGetOrCreateTableSnapshot.mockResolvedValue({
167+
key: 'table-snapshots/ws_1/tbl_1/v5.csv',
168+
size: 600 * 1024 * 1024,
169+
version: 5,
170+
})
171+
172+
await expect(
173+
executeFunctionExecute({ inputTables: ['tbl_1'] }, context as never)
174+
).rejects.toThrow(/table mount limit/)
175+
expect(mockGeneratePresignedDownloadUrl).not.toHaveBeenCalled()
176+
})
177+
178+
it('flag ON + local: throws when the snapshot exceeds the per-file mount limit', async () => {
179+
mockIsFeatureEnabled.mockImplementation(snapshotCacheOn)
180+
mockHasCloudStorage.mockReturnValue(false)
133181
mockGetOrCreateTableSnapshot.mockResolvedValue({
134182
key: 'table-snapshots/ws_1/tbl_1/v5.csv',
135183
size: 20 * 1024 * 1024,

apps/sim/lib/copilot/tools/handlers/function-execute.ts

Lines changed: 37 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5,15 +5,19 @@ import { isPlanAliasPath, workflowAliasSandboxPath } from '@/lib/copilot/vfs/wor
55
import { isFeatureEnabled } from '@/lib/core/config/feature-flags'
66
import { queryRows } from '@/lib/table/rows/service'
77
import { getTableById, listTables } from '@/lib/table/service'
8-
import { getOrCreateTableSnapshot } from '@/lib/table/snapshot-cache'
8+
import { getOrCreateTableSnapshot, SNAPSHOT_MAX_BYTES } from '@/lib/table/snapshot-cache'
99
import { listWorkspaceFileFolders } from '@/lib/uploads/contexts/workspace/workspace-file-folder-manager'
1010
import {
1111
fetchWorkspaceFileBuffer,
1212
findWorkspaceFileRecord,
1313
getSandboxWorkspaceFilePath,
1414
listWorkspaceFiles,
1515
} from '@/lib/uploads/contexts/workspace/workspace-file-manager'
16-
import { downloadFile } from '@/lib/uploads/core/storage-service'
16+
import {
17+
downloadFile,
18+
generatePresignedDownloadUrl,
19+
hasCloudStorage,
20+
} from '@/lib/uploads/core/storage-service'
1721
import { executeTool as executeAppTool } from '@/tools'
1822
import type { ToolExecutionContext, ToolExecutionResult } from '../../tool-executor/types'
1923

@@ -30,11 +34,15 @@ const MAX_MOUNTED_FILES = 500
3034
*/
3135
const SNAPSHOT_MIN_ROWS = 500
3236

33-
interface SandboxFile {
34-
path: string
35-
content: string
36-
encoding?: 'base64'
37-
}
37+
/**
38+
* Lifetime of the presigned URL handed to the sandbox to fetch a snapshot. Long enough to download
39+
* a large file at sandbox startup; the URL grants read to only that one version-pinned object.
40+
*/
41+
const SNAPSHOT_URL_TTL_SECONDS = 600
42+
43+
type SandboxFile =
44+
| { type?: 'content'; path: string; content: string; encoding?: 'base64' }
45+
| { type: 'url'; path: string; url: string }
3846

3947
interface CanonicalFileInput {
4048
path: string
@@ -279,10 +287,30 @@ async function resolveInputFiles(
279287
: undefined
280288
const mountPath = sandboxPath || `/home/user/tables/${table.id}.csv`
281289

282-
// Large/hot tables mount by reference from a version-keyed CSV snapshot in object storage —
283-
// size is known before bytes are pulled (like workspace files), bounding web-process memory.
290+
// Large/hot tables mount by reference from a version-keyed CSV snapshot in object storage.
284291
if (snapshotCacheEnabled && table.rowCount >= SNAPSHOT_MIN_ROWS) {
285292
const snapshot = await getOrCreateTableSnapshot(table, 'copilot-fn-exec')
293+
294+
if (hasCloudStorage()) {
295+
// Mount by reference: the sandbox fetches the snapshot straight from storage via a
296+
// presigned URL, so the bytes never pass through the web process — the only ceiling is
297+
// sandbox disk (enforced at materialization by SNAPSHOT_MAX_BYTES).
298+
if (snapshot.size > SNAPSHOT_MAX_BYTES) {
299+
throw new Error(
300+
`Input table "${tableId}" is ${Math.round(snapshot.size / 1024 / 1024)}MB, over the ${SNAPSHOT_MAX_BYTES / 1024 / 1024}MB table mount limit.`
301+
)
302+
}
303+
const url = await generatePresignedDownloadUrl(
304+
snapshot.key,
305+
'execution',
306+
SNAPSHOT_URL_TTL_SECONDS
307+
)
308+
sandboxFiles.push({ type: 'url', path: mountPath, url })
309+
continue
310+
}
311+
312+
// Local storage: a presigned URL is an app-internal serve path a remote sandbox can't
313+
// reach, so fall back to buffering the bytes through the web process (file-mount guards).
286314
if (snapshot.size > MAX_FILE_SIZE) {
287315
throw new Error(
288316
`Input table "${tableId}" is ${Math.round(snapshot.size / 1024 / 1024)}MB, over the ${MAX_FILE_SIZE / 1024 / 1024}MB per-file mount limit.`

apps/sim/lib/execution/e2b.test.ts

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
/**
2+
* @vitest-environment node
3+
*/
4+
import { beforeEach, describe, expect, it, vi } from 'vitest'
5+
import { CodeLanguage } from '@/lib/execution/languages'
6+
7+
const { mockCreate, mockRunCode, mockCommandsRun, mockFilesWrite, mockKill } = vi.hoisted(() => ({
8+
mockCreate: vi.fn(),
9+
mockRunCode: vi.fn(),
10+
mockCommandsRun: vi.fn(),
11+
mockFilesWrite: vi.fn(),
12+
mockKill: vi.fn(),
13+
}))
14+
15+
vi.mock('@e2b/code-interpreter', () => ({ Sandbox: { create: mockCreate } }))
16+
vi.mock('@/lib/core/config/env', () => ({ env: { E2B_API_KEY: 'test-key' } }))
17+
18+
import { executeInE2B, executeShellInE2B } from '@/lib/execution/e2b'
19+
20+
describe('e2b sandbox inputs', () => {
21+
beforeEach(() => {
22+
vi.clearAllMocks()
23+
mockCreate.mockResolvedValue({
24+
sandboxId: 'sb_1',
25+
files: { write: mockFilesWrite },
26+
commands: { run: mockCommandsRun },
27+
runCode: mockRunCode,
28+
kill: mockKill,
29+
})
30+
mockRunCode.mockResolvedValue({
31+
error: null,
32+
text: '',
33+
logs: { stdout: [], stderr: [] },
34+
results: [],
35+
})
36+
// Default: shell code run + any fetch succeed.
37+
mockCommandsRun.mockResolvedValue({ stdout: '', stderr: '', exitCode: 0 })
38+
})
39+
40+
it('fetches a url entry via curl with URL/DST/DIR passed as envs (no inline write)', async () => {
41+
await executeInE2B({
42+
code: 'x',
43+
language: CodeLanguage.JavaScript,
44+
timeoutMs: 1000,
45+
sandboxFiles: [
46+
{ type: 'url', path: '/home/user/tables/t.csv', url: 'https://s3.example/p?a=1&b=2' },
47+
],
48+
})
49+
50+
expect(mockCommandsRun).toHaveBeenCalledTimes(1)
51+
const [cmd, opts] = mockCommandsRun.mock.calls[0]
52+
expect(cmd).toContain('curl')
53+
expect(cmd).toContain('mkdir -p')
54+
// URL/path go through envs, never interpolated into the command string.
55+
expect(cmd).not.toContain('https://s3.example')
56+
expect(opts.envs).toEqual({
57+
URL: 'https://s3.example/p?a=1&b=2',
58+
DST: '/home/user/tables/t.csv',
59+
DIR: '/home/user/tables',
60+
})
61+
expect(opts.user).toBeUndefined() // code sandbox runs as default user
62+
expect(mockFilesWrite).not.toHaveBeenCalled()
63+
})
64+
65+
it('writes a content entry inline (no fetch)', async () => {
66+
await executeInE2B({
67+
code: 'x',
68+
language: CodeLanguage.JavaScript,
69+
timeoutMs: 1000,
70+
sandboxFiles: [{ path: '/home/user/f.txt', content: 'hi' }],
71+
})
72+
73+
expect(mockFilesWrite).toHaveBeenCalledWith('/home/user/f.txt', 'hi')
74+
expect(mockCommandsRun).not.toHaveBeenCalled()
75+
})
76+
77+
it('fetches as root in the shell sandbox', async () => {
78+
await executeShellInE2B({
79+
code: 'echo hi',
80+
envs: {},
81+
timeoutMs: 1000,
82+
sandboxFiles: [{ type: 'url', path: '/home/user/tables/t.csv', url: 'https://s3.example/p' }],
83+
})
84+
85+
const fetchCall = mockCommandsRun.mock.calls.find((c) => c[1]?.envs?.URL)
86+
expect(fetchCall).toBeDefined()
87+
expect(fetchCall?.[0]).toContain('curl')
88+
expect(fetchCall?.[1].user).toBe('root')
89+
})
90+
91+
it('throws a clear error and kills the sandbox when the fetch fails', async () => {
92+
mockCommandsRun.mockRejectedValueOnce(new Error('curl: (22) 403'))
93+
94+
await expect(
95+
executeInE2B({
96+
code: 'x',
97+
language: CodeLanguage.JavaScript,
98+
timeoutMs: 1000,
99+
sandboxFiles: [
100+
{ type: 'url', path: '/home/user/tables/t.csv', url: 'https://s3.example/p' },
101+
],
102+
})
103+
).rejects.toThrow(/Failed to fetch mounted file into sandbox/)
104+
105+
expect(mockKill).toHaveBeenCalled()
106+
expect(mockRunCode).not.toHaveBeenCalled()
107+
})
108+
})

0 commit comments

Comments
 (0)