Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions apps/sim/background/table-export.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@ import { runTableExport, type TableExportPayload } from '@/lib/table/export-runn

/**
* Trigger.dev wrapper around `runTableExport`. Retry-safe: a retried attempt regenerates the file
* from scratch (failures clean up their partial upload), and the `table_jobs` ownership gate
* stops a run that lost the job. `medium-1x` — the serialized file is buffered in memory before
* the single-shot storage upload (~hundreds of MB worst case for enterprise 1M-row tables).
* from scratch (failures abort/clean up their partial upload), and the `table_jobs` ownership gate
* stops a run that lost the job. The file streams to storage in bounded multipart chunks (no longer
* buffered whole), so `medium-1x` is now headroom rather than a hard requirement.
*/
export const tableExportTask = task({
id: 'table-export',
Expand Down
201 changes: 201 additions & 0 deletions apps/sim/lib/copilot/tools/handlers/function-execute.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,201 @@
/**
* @vitest-environment node
*/
import { beforeEach, describe, expect, it, vi } from 'vitest'

const {
mockIsFeatureEnabled,
mockGetTableById,
mockListTables,
mockQueryRows,
mockGetOrCreateTableSnapshot,
mockDownloadFile,
mockGeneratePresignedDownloadUrl,
mockHasCloudStorage,
mockExecuteTool,
} = vi.hoisted(() => ({
mockIsFeatureEnabled: vi.fn(),
mockGetTableById: vi.fn(),
mockListTables: vi.fn(),
mockQueryRows: vi.fn(),
mockGetOrCreateTableSnapshot: vi.fn(),
mockDownloadFile: vi.fn(),
mockGeneratePresignedDownloadUrl: vi.fn(),
mockHasCloudStorage: vi.fn(),
mockExecuteTool: vi.fn(),
}))

vi.mock('@/lib/core/config/feature-flags', () => ({ isFeatureEnabled: mockIsFeatureEnabled }))
vi.mock('@/lib/table/service', () => ({
getTableById: mockGetTableById,
listTables: mockListTables,
}))
vi.mock('@/lib/table/rows/service', () => ({ queryRows: mockQueryRows }))
vi.mock('@/lib/table/snapshot-cache', () => ({
getOrCreateTableSnapshot: mockGetOrCreateTableSnapshot,
SNAPSHOT_MAX_BYTES: 500 * 1024 * 1024,
}))
vi.mock('@/lib/uploads/core/storage-service', () => ({
downloadFile: mockDownloadFile,
generatePresignedDownloadUrl: mockGeneratePresignedDownloadUrl,
hasCloudStorage: mockHasCloudStorage,
}))
vi.mock('@/tools', () => ({ executeTool: mockExecuteTool }))
// Workspace-file + VFS surfaces are unused on the tables-only path; stub to avoid heavy loads.
vi.mock('@/lib/uploads/contexts/workspace/workspace-file-manager', () => ({
fetchWorkspaceFileBuffer: vi.fn(),
findWorkspaceFileRecord: vi.fn(),
getSandboxWorkspaceFilePath: vi.fn(),
listWorkspaceFiles: vi.fn(),
}))
vi.mock('@/lib/uploads/contexts/workspace/workspace-file-folder-manager', () => ({
listWorkspaceFileFolders: vi.fn(),
}))
vi.mock('@/lib/copilot/vfs/path-utils', () => ({
decodeVfsPathSegments: (p: string) => p.split('/'),
encodeVfsPathSegments: (s: string[]) => s.join('/'),
}))
vi.mock('@/lib/copilot/vfs/workflow-alias-resolver', () => ({
resolveWorkflowAliasForWorkspace: vi.fn().mockResolvedValue(null),
}))
vi.mock('@/lib/copilot/vfs/workflow-aliases', () => ({
isPlanAliasPath: () => false,
workflowAliasSandboxPath: (p: string) => p,
}))

import { executeFunctionExecute } from '@/lib/copilot/tools/handlers/function-execute'

const table = {
id: 'tbl_1',
workspaceId: 'ws_1',
rowCount: 1000,
schema: { columns: [{ id: 'col_name', name: 'name', type: 'string' }] },
}

const context = { workspaceId: 'ws_1', userId: 'u1' }

function mountedFiles() {
const params = mockExecuteTool.mock.calls[0][1] as {
_sandboxFiles?: Array<{ path: string; type?: string; content?: string; url?: string }>
}
return params._sandboxFiles ?? []
}

const snapshotCacheOn = (flag: string) => Promise.resolve(flag === 'table-snapshot-cache')

describe('executeFunctionExecute table mounts', () => {
beforeEach(() => {
vi.clearAllMocks()
mockExecuteTool.mockResolvedValue({ success: true })
mockGetTableById.mockResolvedValue(table)
mockIsFeatureEnabled.mockResolvedValue(false)
mockQueryRows.mockResolvedValue({ rows: [{ data: { name: 'Ada' } }] })
mockHasCloudStorage.mockReturnValue(true)
mockGeneratePresignedDownloadUrl.mockResolvedValue('https://s3.example/presigned?sig=abc')
})

it('flag OFF: drains the table inline via queryRows (existing path)', async () => {
await executeFunctionExecute({ inputTables: ['tbl_1'] }, context as never)

expect(mockQueryRows).toHaveBeenCalledTimes(1)
expect(mockGetOrCreateTableSnapshot).not.toHaveBeenCalled()
const files = mountedFiles()
expect(files[0].path).toBe('/home/user/tables/tbl_1.csv')
expect(files[0].content).toBe('name\nAda')
})

it('flag ON + cloud storage: mounts by presigned URL, no bytes through web', async () => {
mockIsFeatureEnabled.mockImplementation(snapshotCacheOn)
mockGetOrCreateTableSnapshot.mockResolvedValue({
key: 'table-snapshots/ws_1/tbl_1/v5.csv',
size: 9,
version: 5,
})

await executeFunctionExecute({ inputTables: ['tbl_1'] }, context as never)

expect(mockGetOrCreateTableSnapshot).toHaveBeenCalledTimes(1)
expect(mockQueryRows).not.toHaveBeenCalled()
expect(mockDownloadFile).not.toHaveBeenCalled()
expect(mockGeneratePresignedDownloadUrl).toHaveBeenCalledWith(
'table-snapshots/ws_1/tbl_1/v5.csv',
'execution',
expect.any(Number)
)
expect(mountedFiles()[0]).toEqual({
type: 'url',
path: '/home/user/tables/tbl_1.csv',
url: 'https://s3.example/presigned?sig=abc',
})
})

it('flag ON + local storage: falls back to a buffered content mount', async () => {
mockIsFeatureEnabled.mockImplementation(snapshotCacheOn)
mockHasCloudStorage.mockReturnValue(false)
mockGetOrCreateTableSnapshot.mockResolvedValue({
key: 'table-snapshots/ws_1/tbl_1/v5.csv',
size: 9,
version: 5,
})
mockDownloadFile.mockResolvedValue(Buffer.from('name\nAda\n'))

await executeFunctionExecute({ inputTables: ['tbl_1'] }, context as never)

expect(mockGeneratePresignedDownloadUrl).not.toHaveBeenCalled()
expect(mockDownloadFile).toHaveBeenCalledWith(
expect.objectContaining({ key: 'table-snapshots/ws_1/tbl_1/v5.csv', context: 'execution' })
)
const file = mountedFiles()[0]
expect(file.path).toBe('/home/user/tables/tbl_1.csv')
expect(file.content).toBe('name\nAda\n')
expect(file.type).toBeUndefined()
})

it('flag ON but small table stays on the inline path', async () => {
mockIsFeatureEnabled.mockImplementation(snapshotCacheOn)
mockGetTableById.mockResolvedValue({ ...table, rowCount: 10 })

await executeFunctionExecute({ inputTables: ['tbl_1'] }, context as never)

expect(mockGetOrCreateTableSnapshot).not.toHaveBeenCalled()
expect(mockQueryRows).toHaveBeenCalledTimes(1)
})

it('flag ON + cloud: throws when the snapshot exceeds the table mount limit', async () => {
mockIsFeatureEnabled.mockImplementation(snapshotCacheOn)
mockGetOrCreateTableSnapshot.mockResolvedValue({
key: 'table-snapshots/ws_1/tbl_1/v5.csv',
size: 600 * 1024 * 1024,
version: 5,
})

await expect(
executeFunctionExecute({ inputTables: ['tbl_1'] }, context as never)
).rejects.toThrow(/table mount limit/)
expect(mockGeneratePresignedDownloadUrl).not.toHaveBeenCalled()
})

it('flag ON + local: throws when the snapshot exceeds the per-file mount limit', async () => {
mockIsFeatureEnabled.mockImplementation(snapshotCacheOn)
mockHasCloudStorage.mockReturnValue(false)
mockGetOrCreateTableSnapshot.mockResolvedValue({
key: 'table-snapshots/ws_1/tbl_1/v5.csv',
size: 20 * 1024 * 1024,
version: 5,
})

await expect(
executeFunctionExecute({ inputTables: ['tbl_1'] }, context as never)
).rejects.toThrow(/per-file mount limit/)
expect(mockDownloadFile).not.toHaveBeenCalled()
})

it('rejects a table that belongs to another workspace (tenant isolation)', async () => {
mockGetTableById.mockResolvedValue({ ...table, workspaceId: 'ws_2' })

await expect(
executeFunctionExecute({ inputTables: ['tbl_1'] }, context as never)
).rejects.toThrow(/Input table not found/)
expect(mockGetOrCreateTableSnapshot).not.toHaveBeenCalled()
})
})
87 changes: 74 additions & 13 deletions apps/sim/lib/copilot/tools/handlers/function-execute.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,19 @@ import { isPlanAliasPath, workflowAliasSandboxPath } from '@/lib/copilot/vfs/wor
import { isFeatureEnabled } from '@/lib/core/config/feature-flags'
import { queryRows } from '@/lib/table/rows/service'
import { getTableById, listTables } from '@/lib/table/service'
import { getOrCreateTableSnapshot, SNAPSHOT_MAX_BYTES } from '@/lib/table/snapshot-cache'
import { listWorkspaceFileFolders } from '@/lib/uploads/contexts/workspace/workspace-file-folder-manager'
import {
fetchWorkspaceFileBuffer,
findWorkspaceFileRecord,
getSandboxWorkspaceFilePath,
listWorkspaceFiles,
} from '@/lib/uploads/contexts/workspace/workspace-file-manager'
import {
downloadFile,
generatePresignedDownloadUrl,
hasCloudStorage,
} from '@/lib/uploads/core/storage-service'
import { executeTool as executeAppTool } from '@/tools'
import type { ToolExecutionContext, ToolExecutionResult } from '../../tool-executor/types'

Expand All @@ -21,11 +27,22 @@ const MAX_FILE_SIZE = 10 * 1024 * 1024
const MAX_TOTAL_SIZE = 50 * 1024 * 1024
const MAX_MOUNTED_FILES = 500

interface SandboxFile {
path: string
content: string
encoding?: 'base64'
}
/**
* Below this row count a table mounts via the direct inline CSV path — the version-keyed snapshot
* cache (storage round-trip) only pays off for larger/hot tables. Behind the feature flag either
* way; this just keeps tiny one-shot tables on the cheaper path.
*/
const SNAPSHOT_MIN_ROWS = 500

/**
* Lifetime of the presigned URL handed to the sandbox to fetch a snapshot. Long enough to download
* a large file at sandbox startup; the URL grants read to only that one version-pinned object.
*/
const SNAPSHOT_URL_TTL_SECONDS = 600

type SandboxFile =
| { type?: 'content'; path: string; content: string; encoding?: 'base64' }
| { type: 'url'; path: string; url: string }

interface CanonicalFileInput {
path: string
Expand Down Expand Up @@ -249,6 +266,7 @@ async function resolveInputFiles(
const tablePathLookup = hasTablePathRefs
? new Map((await listTables(workspaceId)).map((table) => [table.name, table]))
: undefined
const snapshotCacheEnabled = await isFeatureEnabled('table-snapshot-cache')
for (const tableRef of inputTables) {
const tableId =
typeof tableRef === 'string'
Expand All @@ -263,6 +281,56 @@ async function resolveInputFiles(
`Input table not found: "${tableId}". Pass the table id (tbl_...) from tables/{name}/meta.json, or a tables/{name}/meta.json path.`
)
}
const sandboxPath =
typeof tableRef === 'object' && tableRef !== null
? (tableRef as CanonicalTableInput).sandboxPath
: undefined
const mountPath = sandboxPath || `/home/user/tables/${table.id}.csv`

// Large/hot tables mount by reference from a version-keyed CSV snapshot in object storage.
if (snapshotCacheEnabled && table.rowCount >= SNAPSHOT_MIN_ROWS) {
const snapshot = await getOrCreateTableSnapshot(table, 'copilot-fn-exec')

if (hasCloudStorage()) {
// Mount by reference: the sandbox fetches the snapshot straight from storage via a
// presigned URL, so the bytes never pass through the web process — the only ceiling is
// sandbox disk (enforced at materialization by SNAPSHOT_MAX_BYTES).
if (snapshot.size > SNAPSHOT_MAX_BYTES) {
throw new Error(
`Input table "${tableId}" is ${Math.round(snapshot.size / 1024 / 1024)}MB, over the ${SNAPSHOT_MAX_BYTES / 1024 / 1024}MB table mount limit.`
)
}
const url = await generatePresignedDownloadUrl(
snapshot.key,
'execution',
SNAPSHOT_URL_TTL_SECONDS
)
sandboxFiles.push({ type: 'url', path: mountPath, url })
Comment thread
cursor[bot] marked this conversation as resolved.
continue
}

// Local storage: a presigned URL is an app-internal serve path a remote sandbox can't
// reach, so fall back to buffering the bytes through the web process (file-mount guards).
if (snapshot.size > MAX_FILE_SIZE) {
throw new Error(
`Input table "${tableId}" is ${Math.round(snapshot.size / 1024 / 1024)}MB, over the ${MAX_FILE_SIZE / 1024 / 1024}MB per-file mount limit.`
)
}
if (totalSize + snapshot.size > MAX_TOTAL_SIZE) {
throw new Error(
`Mounting "${tableId}" would exceed the ${MAX_TOTAL_SIZE / 1024 / 1024}MB total mount limit. Mount fewer or smaller tables.`
)
}
const buffer = await downloadFile({
key: snapshot.key,
context: 'execution',
maxBytes: MAX_FILE_SIZE,
})
totalSize += buffer.length
sandboxFiles.push({ path: mountPath, content: buffer.toString('utf-8') })
continue
}

Comment thread
TheodoreSpeaks marked this conversation as resolved.
const rows = await queryRows(table, {}, 'copilot-fn-exec')

const allKeys = new Set(table.schema.columns.map((column) => column.name))
Expand Down Expand Up @@ -290,14 +358,7 @@ async function resolveInputFiles(
)
}
const csvContent = csvLines.join('\n')
const sandboxPath =
typeof tableRef === 'object' && tableRef !== null
? (tableRef as CanonicalTableInput).sandboxPath
: undefined
sandboxFiles.push({
path: sandboxPath || `/home/user/tables/${table.id}.csv`,
content: csvContent,
})
sandboxFiles.push({ path: mountPath, content: csvContent })
}
}

Expand Down
1 change: 1 addition & 0 deletions apps/sim/lib/core/config/env.ts
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ export const env = createEnv({
BILLING_ENABLED: z.boolean().optional(), // Enable billing enforcement and usage tracking
FREE_API_DEPLOYMENT_GATE_ENABLED: z.boolean().optional(), // Block free-plan accounts from programmatic execution (API/MCP/A2A/generic webhooks/chat embeds). Requires BILLING_ENABLED. Off by default for dark rollout
TABLES_FRACTIONAL_ORDERING: z.boolean().optional(), // Order table rows by fractional order_key (O(1) insert/delete) instead of integer position
TABLE_SNAPSHOT_CACHE: z.boolean().optional(), // Mount tables into sandboxes by reference via a version-keyed CSV snapshot in object storage instead of draining the whole table into web-process heap

// Table feature limits (per plan). Apply when billing is disabled (free tier defaults) or for billed plans.
FREE_TABLES_LIMIT: z.number().optional(), // Max user tables per workspace on free tier (default: 3)
Expand Down
8 changes: 8 additions & 0 deletions apps/sim/lib/core/config/feature-flags.ts
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,14 @@ const FEATURE_FLAGS = {
'user context — use enabled:true for global rollout rather than per-user targeting.',
fallback: 'MOTHERSHIP_BETA_FEATURES',
},
'table-snapshot-cache': {
description:
'Mount Sim tables into code sandboxes by reference via a version-keyed CSV snapshot in ' +
'object storage (reused across runs until the table mutates) instead of draining the whole ' +
'table into web-process heap. resolveInputFiles evaluates without user context — use ' +
'enabled:true for global rollout rather than per-user targeting.',
fallback: 'TABLE_SNAPSHOT_CACHE',
},
} satisfies Record<string, FeatureFlagDefinition>

/**
Expand Down
Loading
Loading