Skip to content

Commit 3807ada

Browse files
fix(dotcom): increase zero-cache volume sizes and scope kill switch reload (tldraw#8407)
Production zero-cache (replication manager) was crash-looping due to `SQLITE_FULL` — the 1GB default Fly.io volume filled up when the backup replicator tried to write. This PR sets explicit volume sizes in the Fly.io deploy templates (8GB for production, 1GB for staging/preview) and narrows the zero kill switch reload to only affect users actually running proper Zero. ### Change type - [x] `bugfix` ### Test plan 1. Deploy to staging and verify zero-cache volumes are created with correct `initial_size` 2. Enable `zero_kill_switch` flag and verify only users with `zero_enabled` or localStorage override get reloaded ### Release notes - Fix zero-cache crash loop caused by undersized Fly.io volumes in production ### Code changes | Section | LOC change | | -------------- | ---------- | | Apps | +14 / -4 | | Config/tooling | +5 / -1 |
1 parent 88d590e commit 3807ada

6 files changed

Lines changed: 51 additions & 10 deletions

File tree

apps/dotcom/client/src/tla/utils/FeatureFlagPoller.test.ts

Lines changed: 32 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,12 @@ import { beforeAll, beforeEach, describe, expect, it, vi } from 'vitest'
22
import type { FeatureFlags } from './FeatureFlagPoller'
33

44
const mockFetch = vi.fn()
5+
let mockLocalStorage: Record<string, string> = {}
56
vi.mock('tldraw', () => {
6-
return { fetch: (...args: any[]) => mockFetch(...args) }
7+
return {
8+
fetch: (...args: any[]) => mockFetch(...args),
9+
getFromLocalStorage: (key: string) => mockLocalStorage[key] ?? null,
10+
}
711
})
812

913
function makeFlags(overrides: Partial<FeatureFlags> = {}): FeatureFlags {
@@ -33,19 +37,42 @@ describe('shouldReloadForFlagChange', () => {
3337
})
3438

3539
describe('zero_kill_switch transitions', () => {
36-
it('reloads when kill switch goes false → true', () => {
37-
const prev = makeFlags({ zero_kill_switch: { enabled: false } })
40+
it('reloads when kill switch goes false → true and user had zero enabled', () => {
41+
const prev = makeFlags({
42+
zero_kill_switch: { enabled: false },
43+
zero_enabled: { enabled: true },
44+
})
3845
const next = makeFlags({ zero_kill_switch: { enabled: true } })
3946
expect(shouldReloadForFlagChange(prev, next)).toBe(true)
4047
})
4148

42-
it('reloads when kill switch goes undefined → true', () => {
43-
const prev = makeFlags()
49+
it('reloads when kill switch goes undefined → true and user had zero enabled', () => {
50+
const prev = makeFlags({ zero_enabled: { enabled: true } })
4451
delete (prev as any).zero_kill_switch
4552
const next = makeFlags({ zero_kill_switch: { enabled: true } })
4653
expect(shouldReloadForFlagChange(prev, next)).toBe(true)
4754
})
4855

56+
it('does NOT reload when kill switch activates but user did not have zero enabled', () => {
57+
const prev = makeFlags({
58+
zero_kill_switch: { enabled: false },
59+
zero_enabled: { enabled: false },
60+
})
61+
const next = makeFlags({ zero_kill_switch: { enabled: true } })
62+
expect(shouldReloadForFlagChange(prev, next)).toBe(false)
63+
})
64+
65+
it('reloads when kill switch activates and user has localStorage override', () => {
66+
mockLocalStorage = { useProperZero: 'true' }
67+
const prev = makeFlags({
68+
zero_kill_switch: { enabled: false },
69+
zero_enabled: { enabled: false },
70+
})
71+
const next = makeFlags({ zero_kill_switch: { enabled: true } })
72+
expect(shouldReloadForFlagChange(prev, next)).toBe(true)
73+
mockLocalStorage = {}
74+
})
75+
4976
it('does NOT reload when kill switch stays true → true', () => {
5077
const prev = makeFlags({ zero_kill_switch: { enabled: true } })
5178
const next = makeFlags({ zero_kill_switch: { enabled: true } })

apps/dotcom/client/src/tla/utils/FeatureFlagPoller.tsx

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import { EvaluatedFeatureFlag, FeatureFlagKey } from '@tldraw/dotcom-shared'
22
import { useEffect } from 'react'
3-
import { fetch } from 'tldraw'
3+
import { fetch, getFromLocalStorage } from 'tldraw'
44

55
export type FeatureFlags = Record<FeatureFlagKey, EvaluatedFeatureFlag>
66

@@ -52,12 +52,16 @@ const REFETCH_INTERVAL = 60000 // 1 minute
5252

5353
/**
5454
* Determines whether a flag change should trigger a page reload.
55-
* Only reloads when zero_kill_switch transitions to true from any non-true state.
55+
* Only reloads when zero_kill_switch transitions to true AND the user
56+
* was actually using proper Zero (no point reloading polyfill users).
5657
*/
5758
export function shouldReloadForFlagChange(prev: FeatureFlags, next: FeatureFlags): boolean {
5859
const prevKillSwitch = prev.zero_kill_switch?.enabled
5960
const nextKillSwitch = next.zero_kill_switch?.enabled
60-
return nextKillSwitch === true && prevKillSwitch !== true
61+
if (nextKillSwitch !== true || prevKillSwitch === true) return false
62+
// Only reload if this user was actually on proper Zero
63+
const wasUsingZero = prev.zero_enabled?.enabled || getFromLocalStorage('useProperZero') === 'true'
64+
return !!wasUsingZero
6165
}
6266

6367
/**

apps/dotcom/zero-cache/flyio-replication-manager.template.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ cpus = __VM_CPUS
2828
[mounts]
2929
source = "sqlite_repl"
3030
destination = "/data"
31+
initial_size = "__VOLUME_SIZE"
3132

3233
[[files]]
3334
guest_path = "/etc/litestream.yml"

apps/dotcom/zero-cache/flyio-view-syncer.template.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ cpus = __VM_CPUS
3131
[mounts]
3232
source = "sqlite_db"
3333
destination = "/data"
34+
initial_size = "__VOLUME_SIZE"
3435

3536
[[files]]
3637
guest_path = "/etc/litestream.yml"

apps/dotcom/zero-cache/flyio.template.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ cpus = 2
2525
[mounts]
2626
source = "sqlite_db"
2727
destination = "/data"
28+
initial_size = "1gb"
2829

2930
[env]
3031
ZERO_REPLICA_FILE = "/data/sync-replica.db"

internal/scripts/deploy-dotcom.ts

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -285,8 +285,12 @@ const zeroQueryUrl = `${env.MULTIPLAYER_SERVER.replace(/^ws/, 'http')}/app/zero/
285285
// Fly.io shared-cpu VM sizes per environment.
286286
// Max shared-cpu is 8x (8 CPUs, 16 GB RAM).
287287
const zeroVmSizes = {
288-
staging: { rm: { cpus: 1, memory: '2gb' }, vs: { cpus: 2, memory: '4gb' } },
289-
production: { rm: { cpus: 4, memory: '8gb' }, vs: { cpus: 8, memory: '16gb' } },
288+
staging: { rm: { cpus: 1, memory: '2gb' }, vs: { cpus: 2, memory: '4gb' }, volumeSize: '1gb' },
289+
production: {
290+
rm: { cpus: 4, memory: '8gb' },
291+
vs: { cpus: 8, memory: '16gb' },
292+
volumeSize: '8gb',
293+
},
290294
preview: { single: { cpus: 2, memory: '2gb' } },
291295
} as const
292296

@@ -329,6 +333,7 @@ interface SingleNodeVmSizes {
329333
interface MultiNodeVmSizes {
330334
rm: VmSize
331335
vs: VmSize
336+
volumeSize: string
332337
}
333338
const zeroVm = zeroVmSizes[env.TLDRAW_ENV as keyof typeof zeroVmSizes] as
334339
| SingleNodeVmSizes
@@ -732,6 +737,7 @@ function updateFlyioReplicationManagerToml(appName: string, backupPath: string):
732737
.replaceAll('__RM_CHANGE_MAX_CONNS', String(zeroConns.rm.change))
733738
.replaceAll('__VM_CPUS', String(zeroVm.rm.cpus))
734739
.replaceAll('__VM_MEMORY', zeroVm.rm.memory)
740+
.replaceAll('__VOLUME_SIZE', zeroVm.volumeSize)
735741

736742
fs.writeFileSync(flyioTomlFile, updatedContent, 'utf-8')
737743
}
@@ -767,6 +773,7 @@ function updateFlyioViewSyncerToml(
767773
.replaceAll('__VS_CHANGE_MAX_CONNS', String(zeroConns.vs.change))
768774
.replaceAll('__VM_CPUS', String(zeroVm.vs.cpus))
769775
.replaceAll('__VM_MEMORY', zeroVm.vs.memory)
776+
.replaceAll('__VOLUME_SIZE', zeroVm.volumeSize)
770777

771778
fs.writeFileSync(flyioTomlFile, updatedContent, 'utf-8')
772779

0 commit comments

Comments
 (0)