Skip to content

Commit 00a8c03

Browse files
fix(infra): add retry logic for transient R2 and service binding errors (tldraw#8280)
Sentry shows multiple transient Cloudflare R2 errors (`internal error (10001)`, `connectivity issue`, `network connection lost`, `unspecified error (0)`) and service binding errors (`proxy request failed`) in the tldrawusercontent worker. Found these when investigating user reports of image uploads not working for them. This PR wraps R2 `head`, `put`, and `get` calls in `retry()` (3 attempts, 500ms wait) with an error matcher for known transient patterns. The upload body is buffered to `ArrayBuffer` before retries since `ReadableStream` is single-use. Service binding calls (`validateUpload`, `confirmUpload`) get the same treatment. ### Change type - [x] `bugfix` ### Test plan - Cannot be manually tested (transient infra errors) ### Release notes - Fix transient R2 upload/download failures by adding retry logic ### Code changes | Section | LOC change | | ---------- | ---------- | | Core code | +30 / -7 | | Apps | +14 / -4 |
1 parent ebb51b2 commit 00a8c03

4 files changed

Lines changed: 46 additions & 12 deletions

File tree

apps/dotcom/sync-worker/src/worker.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -318,7 +318,8 @@ export default class Worker extends WorkerEntrypoint<Environment> {
318318
await db
319319
.insertInto('asset')
320320
.values({ objectName, fileId, userId })
321-
.executeTakeFirstOrThrow()
321+
.onConflict((oc) => oc.column('objectName').doNothing())
322+
.execute()
322323
message.ack()
323324
} catch (_e) {
324325
message.retry({

apps/dotcom/tldrawusercontent-worker/src/worker.ts

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@ import {
99
handleUserAssetUpload,
1010
isAllowedOrigin,
1111
notFound,
12+
retry,
13+
TRANSIENT_RETRY_OPTIONS,
1214
} from '@tldraw/worker-shared'
1315
import { WorkerEntrypoint } from 'cloudflare:workers'
1416
import { cors } from 'itty-router'
@@ -43,7 +45,10 @@ export default class Worker extends WorkerEntrypoint<Environment> {
4345
let userId: string | null = null
4446
if (fileId) {
4547
const authHeader = request.headers.get('Authorization')
46-
const validation = await this.env.SYNC_WORKER.validateUpload(fileId, authHeader)
48+
const validation = await retry(
49+
() => this.env.SYNC_WORKER.validateUpload(fileId, authHeader),
50+
TRANSIENT_RETRY_OPTIONS
51+
)
4752
if (!validation.ok) {
4853
const status = validation.error === 'File not found' ? 404 : 403
4954
return Response.json({ error: validation.error }, { status })
@@ -64,7 +69,10 @@ export default class Worker extends WorkerEntrypoint<Environment> {
6469
// will pick up the association later if this fails.
6570
if (res.status === 200 && fileId) {
6671
try {
67-
await this.env.SYNC_WORKER.confirmUpload(objectName, fileId, userId)
72+
await retry(
73+
() => this.env.SYNC_WORKER.confirmUpload(objectName, fileId, userId),
74+
TRANSIENT_RETRY_OPTIONS
75+
)
6876
} catch (e) {
6977
console.error('Failed to queue asset association', e)
7078
}

packages/worker-shared/src/index.ts

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
/// <reference no-default-lib="true"/>
22
/// <reference types="@cloudflare/workers-types" />
33

4+
export { retry } from '@tldraw/utils'
45
export { handleExtractBookmarkMetadataRequest } from './bookmarks'
56
export { forbidden, notFound } from './errors'
67
export {
@@ -12,4 +13,9 @@ export {
1213
} from './handleRequest'
1314
export { blockUnknownOrigins, isAllowedOrigin } from './origins'
1415
export { createSentry } from './sentry'
15-
export { handleUserAssetGet, handleUserAssetUpload, type R2BucketLike } from './userAssetUploads'
16+
export {
17+
TRANSIENT_RETRY_OPTIONS,
18+
handleUserAssetGet,
19+
handleUserAssetUpload,
20+
type R2BucketLike,
21+
} from './userAssetUploads'

packages/worker-shared/src/userAssetUploads.ts

Lines changed: 27 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,20 @@
1+
import { retry } from '@tldraw/utils'
12
import { IRequest } from 'itty-router'
23
import { notFound } from './errors'
34

5+
function isTransientWorkerError(error: unknown): boolean {
6+
const msg = String(error)
7+
return /internal error|connectivity|network connection lost|service temporarily unavailable|proxy request failed|unspecified error|connection (refused|reset|timed?\s?out)/i.test(
8+
msg
9+
)
10+
}
11+
12+
export const TRANSIENT_RETRY_OPTIONS = {
13+
attempts: 3,
14+
waitDuration: 500,
15+
matchError: isTransientWorkerError,
16+
} as const
17+
418
// Minimal interface for R2Bucket operations used in this file
519
// This avoids type conflicts between ambient and imported Cloudflare types
620
// Using 'any' for return types to allow compatibility with different R2Bucket type definitions
@@ -57,13 +71,18 @@ export async function handleUserAssetUpload({
5771
body: ReadableStream | null
5872
headers: Headers
5973
}): Promise<Response> {
60-
if (await bucket.head(objectName)) {
74+
const existing = await retry(() => bucket.head(objectName), TRANSIENT_RETRY_OPTIONS)
75+
if (existing) {
6176
return Response.json({ error: 'Asset already exists' }, { status: 409 })
6277
}
6378

64-
const object = await bucket.put(objectName, body, {
65-
httpMetadata: headers,
66-
})
79+
// Buffer body so retries can re-send (ReadableStream is single-use)
80+
const buffer = body ? await new Response(body).arrayBuffer() : null
81+
82+
const object = await retry(
83+
() => bucket.put(objectName, buffer, { httpMetadata: headers }),
84+
TRANSIENT_RETRY_OPTIONS
85+
)
6786

6887
return Response.json({ object: objectName }, { headers: { etag: object.httpEtag } })
6988
}
@@ -115,10 +134,10 @@ export async function handleUserAssetGet({
115134
return cachedResponse
116135
}
117136

118-
const object = await bucket.get(objectName, {
119-
range: request.headers,
120-
onlyIf: request.headers,
121-
})
137+
const object = await retry(
138+
() => bucket.get(objectName, { range: request.headers, onlyIf: request.headers }),
139+
TRANSIENT_RETRY_OPTIONS
140+
)
122141

123142
if (!object) {
124143
return notFound()

0 commit comments

Comments
 (0)