Skip to content

Commit 0f3be28

Browse files
isaacrowntreeclaude
andcommitted
refactor: Address review feedback — use unstable_startWorker, move handler to workers/
- Replace manual spawn + stdout parsing with wrangler's unstable_startWorker API - Pass R2 bindings programmatically, eliminating temp config files - Move handler from templates/ to workers/r2-cache-populate-handler.ts - Separate fetch routing from populateCache logic for testability - Reduce default batch size from 100 to 5 (local server, lower memory) - Update compatibility_date to 2026-01-01 - Simplify remote binding config Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 07c3ce9 commit 0f3be28

File tree

5 files changed

+184
-210
lines changed

5 files changed

+184
-210
lines changed

.changeset/fix-r2-cache-upload.md

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,14 @@
55
fix: Use remote R2 binding for cache population to avoid API rate limits
66

77
For deployments with prerendered pages, the R2 incremental cache is now populated
8-
using a local wrangler dev worker with a remote R2 binding instead of `wrangler r2 bulk put`.
8+
using `unstable_startWorker` with a remote R2 binding instead of `wrangler r2 bulk put`.
99
This bypasses the Cloudflare API rate limit of 1,200 requests per 5 minutes that caused
1010
failures for large applications with thousands of prerendered pages.
1111

1212
The new approach:
13-
1. Derives a temporary wrangler config from the project's config with the R2 binding set to `remote: true`
14-
2. Starts a local worker via `wrangler dev` with a POST endpoint
15-
3. Sends batched cache entries to the local worker, which writes to R2 via the binding
16-
4. No deployment or authentication tokens required
13+
1. Starts a local worker via `unstable_startWorker` with the R2 binding configured programmatically
14+
2. Sends cache entries to the local worker a few at a time for low memory usage
15+
3. The worker writes entries to R2 via the binding (no API rate limits)
16+
4. No deployment, temp config files, or authentication tokens required
1717

1818
Closes #1088

packages/cloudflare/src/cli/commands/populate-cache.spec.ts

Lines changed: 69 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,20 @@ vi.mock("./helpers.js", () => ({
7878
quoteShellMeta: vi.fn((s) => s),
7979
}));
8080

81+
const mockWorkerFetch = vi.fn();
82+
const mockWorkerDispose = vi.fn();
83+
84+
vi.mock("wrangler", () => ({
85+
unstable_startWorker: vi.fn(() =>
86+
Promise.resolve({
87+
ready: Promise.resolve(),
88+
url: Promise.resolve(new URL("http://localhost:12345")),
89+
fetch: mockWorkerFetch,
90+
dispose: mockWorkerDispose,
91+
})
92+
),
93+
}));
94+
8195
describe("populateCache", () => {
8296
const setupMockFileSystem = () => {
8397
mockFs({
@@ -100,45 +114,66 @@ describe("populateCache", () => {
100114
({ target }) => {
101115
afterEach(() => {
102116
mockFs.restore();
117+
vi.clearAllMocks();
103118
});
104119

105-
// Note: R2 cache population now uses wrangler dev with remote bindings
106-
// instead of `wrangler r2 bulk put`. Integration tests would be needed
107-
// to fully test the wrangler dev flow. These unit tests verify the
108-
// populate dispatch logic still routes R2 correctly.
109-
test(`${target} - routes to R2 handler`, async () => {
120+
test(`${target} - starts worker and sends cache entries`, async () => {
110121
setupMockFileSystem();
111122

112-
// The R2 populate function now starts wrangler dev internally.
113-
// We can't easily unit test the full flow without mocking spawn,
114-
// so we verify the dispatch logic reaches the R2 case.
115-
// Full integration testing should cover the actual wrangler dev flow.
116-
await expect(
117-
populateCache(
118-
{
119-
outputDir: "/test/output",
120-
} as BuildOptions,
121-
{
122-
default: {
123-
override: {
124-
incrementalCache: "cf-r2-incremental-cache",
125-
},
123+
// Mock fetch to return a successful response for each batch
124+
global.fetch = vi.fn().mockResolvedValue(
125+
new Response(JSON.stringify({ written: 1, failed: 0 }), {
126+
status: 200,
127+
headers: { "Content-Type": "application/json" },
128+
})
129+
);
130+
131+
await populateCache(
132+
{
133+
outputDir: "/test/output",
134+
} as BuildOptions,
135+
{
136+
default: {
137+
override: {
138+
incrementalCache: "cf-r2-incremental-cache",
126139
},
127-
} as any, // eslint-disable-line @typescript-eslint/no-explicit-any
128-
{
129-
r2_buckets: [
130-
{
131-
binding: "NEXT_INC_CACHE_R2_BUCKET",
132-
bucket_name: "test-bucket",
133-
},
134-
],
135-
} as any, // eslint-disable-line @typescript-eslint/no-explicit-any
136-
{ target, shouldUsePreviewId: false },
137-
{} as any // eslint-disable-line @typescript-eslint/no-explicit-any
138-
)
139-
).rejects.toThrow();
140-
// This will throw because wrangler dev can't actually start in test,
141-
// but it confirms the R2 cache path is reached.
140+
},
141+
} as any, // eslint-disable-line @typescript-eslint/no-explicit-any
142+
{
143+
r2_buckets: [
144+
{
145+
binding: "NEXT_INC_CACHE_R2_BUCKET",
146+
bucket_name: "test-bucket",
147+
},
148+
],
149+
} as any, // eslint-disable-line @typescript-eslint/no-explicit-any
150+
{ target, shouldUsePreviewId: false },
151+
{} as any // eslint-disable-line @typescript-eslint/no-explicit-any
152+
);
153+
154+
const { unstable_startWorker: startWorker } = await import("wrangler");
155+
expect(startWorker).toHaveBeenCalledWith(
156+
expect.objectContaining({
157+
name: "open-next-cache-populate",
158+
compatibilityDate: "2026-01-01",
159+
bindings: expect.objectContaining({
160+
NEXT_INC_CACHE_R2_BUCKET: expect.objectContaining({
161+
type: "r2_bucket",
162+
bucket_name: "test-bucket",
163+
remote: target === "remote",
164+
}),
165+
}),
166+
})
167+
);
168+
169+
// Verify fetch was called with the /populate URL
170+
expect(global.fetch).toHaveBeenCalledWith(
171+
"http://localhost:12345/populate",
172+
expect.objectContaining({ method: "POST" })
173+
);
174+
175+
// Verify worker was disposed
176+
expect(mockWorkerDispose).toHaveBeenCalled();
142177
});
143178
}
144179
);

packages/cloudflare/src/cli/commands/populate-cache.ts

Lines changed: 39 additions & 113 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
import { type ChildProcess, spawn } from "node:child_process";
21
import fs from "node:fs";
32
import fsp from "node:fs/promises";
43
import os from "node:os";
@@ -16,6 +15,7 @@ import type {
1615
import type { IncrementalCache, TagCache } from "@opennextjs/aws/types/overrides.js";
1716
import { globSync } from "glob";
1817
import { tqdm } from "ts-tqdm";
18+
import { unstable_startWorker } from "wrangler";
1919
import type { Unstable_Config as WranglerConfig } from "wrangler";
2020
import type yargs from "yargs";
2121

@@ -198,9 +198,9 @@ type PopulateCacheOptions = {
198198
*/
199199
wranglerConfigPath?: string;
200200
/**
201-
* Chunk sizes to use when populating KV cache. Ignored for R2.
201+
* Chunk sizes to use when populating the cache.
202202
*
203-
* @default 25 for KV, 50 for R2
203+
* @default 25 for KV, 5 for R2
204204
*/
205205
cacheChunkSize?: number;
206206
/**
@@ -210,26 +210,23 @@ type PopulateCacheOptions = {
210210
};
211211

212212
/**
213-
* Resolves the path to the cache populate handler file.
213+
* Resolves the path to the R2 cache populate handler file.
214214
*
215-
* The handler is a standalone worker that wrangler dev can run directly.
216-
* It's located in the templates directory relative to this file.
215+
* The handler is a standalone worker used with `unstable_startWorker`.
216+
* It's located in the workers directory relative to this file.
217217
*/
218218
function getCachePopulateHandlerPath(): string {
219219
const currentDir = path.dirname(fileURLToPath(import.meta.url));
220-
return path.join(currentDir, "../templates/cache-populate-handler.js");
220+
return path.join(currentDir, "../workers/r2-cache-populate-handler.js");
221221
}
222222

223223
/**
224-
* Populates the R2 incremental cache using a local wrangler dev worker
225-
* with a remote R2 binding.
224+
* Populates the R2 incremental cache using a local worker with a remote R2 binding.
226225
*
227226
* This approach:
228-
* 1. Derives a temporary wrangler config from the project's config with the
229-
* R2 cache binding set to `remote: true` (for remote targets).
230-
* 2. Starts a local worker via `wrangler dev` with a POST endpoint.
231-
* 3. Sends batched cache entries to the local worker.
232-
* 4. The worker writes entries to R2 using the binding (no API rate limits).
227+
* 1. Starts a local worker via `unstable_startWorker` with the R2 cache binding.
228+
* 2. Sends cache entries to the local worker a few at a time.
229+
* 3. The worker writes entries to R2 using the binding (no API rate limits).
233230
*
234231
* This bypasses the Cloudflare API rate limit of 1,200 requests per 5 minutes
235232
* that affects `wrangler r2 bulk put`.
@@ -260,122 +257,51 @@ async function populateR2IncrementalCache(
260257
const useRemote = populateCacheOptions.target === "remote";
261258
const handlerPath = getCachePopulateHandlerPath();
262259

263-
// Create a temporary wrangler config derived from the project's config.
264-
// Only the R2 cache binding is propagated, with `remote` set appropriately.
265-
const tempDir = await fsp.mkdtemp(path.join(os.tmpdir(), "open-next-r2-populate-"));
260+
const worker = await unstable_startWorker({
261+
name: "open-next-cache-populate",
262+
entrypoint: handlerPath,
263+
compatibilityDate: "2026-01-01",
264+
bindings: {
265+
[R2_CACHE_BINDING_NAME]: {
266+
type: "r2_bucket",
267+
bucket_name: binding.bucket_name,
268+
...(binding.jurisdiction && { jurisdiction: binding.jurisdiction }),
269+
remote: useRemote,
270+
},
271+
},
272+
dev: {
273+
server: { port: 0 },
274+
inspector: false,
275+
watch: false,
276+
liveReload: false,
277+
},
278+
});
266279

267280
try {
268-
const tempWranglerConfig = {
269-
name: "open-next-cache-populate",
270-
main: handlerPath,
271-
compatibility_date: "2024-12-01",
272-
r2_buckets: [
273-
{
274-
binding: R2_CACHE_BINDING_NAME,
275-
bucket_name: binding.bucket_name,
276-
...(binding.jurisdiction && { jurisdiction: binding.jurisdiction }),
277-
...(useRemote && { remote: true }),
278-
},
279-
],
280-
};
281-
282-
const configPath = path.join(tempDir, "wrangler.json");
283-
fs.writeFileSync(configPath, JSON.stringify(tempWranglerConfig, null, 2));
284-
285-
// Start a local worker via wrangler dev
286-
const { url, stop } = await startWranglerDev(buildOpts, configPath);
287-
288-
try {
289-
await sendCacheEntries(url, assets, prefix, populateCacheOptions.cacheChunkSize);
290-
} finally {
291-
stop();
292-
}
281+
await worker.ready;
282+
const url = await worker.url;
283+
const populateUrl = new URL("/populate", url).href;
284+
await sendCacheEntries(populateUrl, assets, prefix, populateCacheOptions.cacheChunkSize);
293285
} finally {
294-
fs.rmSync(tempDir, { recursive: true, force: true });
286+
await worker.dispose();
295287
}
296288

297289
logger.info(`Successfully populated cache with ${assets.length} assets`);
298290
}
299291

300292
/**
301-
* Starts `wrangler dev` with the given config and waits for it to be ready.
293+
* Sends cache entries to the local populate worker a few at a time.
302294
*
303-
* @returns The local URL and a function to stop the worker.
304-
*/
305-
function startWranglerDev(
306-
buildOpts: BuildOptions,
307-
configPath: string
308-
): Promise<{ url: string; stop: () => void }> {
309-
return new Promise((resolve, reject) => {
310-
const proc: ChildProcess = spawn(
311-
buildOpts.packager,
312-
[
313-
buildOpts.packager === "bun" ? "x" : "exec",
314-
"wrangler",
315-
"dev",
316-
"--config",
317-
configPath,
318-
"--port",
319-
"0",
320-
],
321-
{
322-
shell: true,
323-
stdio: ["ignore", "pipe", "pipe"],
324-
env: {
325-
...process.env,
326-
CLOUDFLARE_LOAD_DEV_VARS_FROM_DOT_ENV: "false",
327-
},
328-
}
329-
);
330-
331-
let output = "";
332-
const timeout = setTimeout(() => {
333-
proc.kill();
334-
reject(new Error(`wrangler dev timed out waiting for ready signal.\nOutput:\n${output}`));
335-
}, 60_000);
336-
337-
const onData = (data: Buffer) => {
338-
output += data.toString();
339-
const match = output.match(/http:\/\/(?:localhost|0\.0\.0\.0|127\.0\.0\.1):(\d+)/);
340-
if (match?.[1]) {
341-
clearTimeout(timeout);
342-
const url = `http://localhost:${match[1]}`;
343-
resolve({
344-
url,
345-
stop: () => {
346-
proc.kill();
347-
},
348-
});
349-
}
350-
};
351-
352-
proc.stdout?.on("data", onData);
353-
proc.stderr?.on("data", onData);
354-
355-
proc.on("error", (err) => {
356-
clearTimeout(timeout);
357-
reject(err);
358-
});
359-
360-
proc.on("exit", (code) => {
361-
clearTimeout(timeout);
362-
if (code !== 0 && code !== null) {
363-
reject(new Error(`wrangler dev exited with code ${code}\nOutput:\n${output}`));
364-
}
365-
});
366-
});
367-
}
368-
369-
/**
370-
* Sends cache entries to the local populate worker in batches.
295+
* Since the worker is local, small batches avoid excessive memory usage
296+
* without meaningful overhead.
371297
*/
372298
async function sendCacheEntries(
373299
workerUrl: string,
374300
assets: CacheAsset[],
375301
prefix: string | undefined,
376302
cacheChunkSize?: number
377303
): Promise<void> {
378-
const batchSize = Math.max(1, cacheChunkSize ?? 100);
304+
const batchSize = Math.max(1, cacheChunkSize ?? 5);
379305
const totalBatches = Math.ceil(assets.length / batchSize);
380306

381307
logger.info(`Populating ${assets.length} cache entries in batches of ${batchSize}`);

0 commit comments

Comments
 (0)