diff --git a/.mypy.ini b/.mypy.ini index 5d02772f..7d2fe13f 100644 --- a/.mypy.ini +++ b/.mypy.ini @@ -3,6 +3,9 @@ [mypy-docker] ignore_missing_imports = True +[mypy-docker.*] +ignore_missing_imports = True + [mypy-tzlocal] ignore_missing_imports = True diff --git a/benchmarks/000.microbenchmarks/040.server-reply/config.json b/benchmarks/000.microbenchmarks/040.server-reply/config.json index 93ce2f56..8ff6eec5 100644 --- a/benchmarks/000.microbenchmarks/040.server-reply/config.json +++ b/benchmarks/000.microbenchmarks/040.server-reply/config.json @@ -1,6 +1,6 @@ { "timeout": 120, "memory": 128, - "languages": ["python", "nodejs"], + "languages": ["python"], "modules": [] } diff --git a/benchmarks/100.webapps/110.dynamic-html/config.json b/benchmarks/100.webapps/110.dynamic-html/config.json index 7e317037..dc516039 100644 --- a/benchmarks/100.webapps/110.dynamic-html/config.json +++ b/benchmarks/100.webapps/110.dynamic-html/config.json @@ -1,6 +1,22 @@ { "timeout": 10, "memory": 128, - "languages": ["python", "nodejs", "java"], + "languages": [ + { + "language": "python", + "variants": { + "default": "default", + "cloudflare": "default" + } + }, + { + "language": "nodejs", + "variants": { + "default": "default", + "cloudflare": "default" + } + }, + "java" + ], "modules": [] } diff --git a/benchmarks/100.webapps/120.uploader/config.json b/benchmarks/100.webapps/120.uploader/config.json index 49e614ac..23b7293d 100644 --- a/benchmarks/100.webapps/120.uploader/config.json +++ b/benchmarks/100.webapps/120.uploader/config.json @@ -1,6 +1,21 @@ { "timeout": 60, "memory": 128, - "languages": ["python", "nodejs"], + "languages": [ + { + "language": "python", + "variants": { + "default": "default", + "cloudflare": {"workers": "cloudflare", "containers": "default"} + } + }, + { + "language": "nodejs", + "variants": { + "default": "default", + "cloudflare": {"workers": "cloudflare", "containers": "default"} + } + } + ], "modules": ["storage"] } diff --git a/benchmarks/100.webapps/120.uploader/nodejs/cloudflare/function.js b/benchmarks/100.webapps/120.uploader/nodejs/cloudflare/function.js new file mode 100644 index 00000000..58612982 --- /dev/null +++ b/benchmarks/100.webapps/120.uploader/nodejs/cloudflare/function.js @@ -0,0 +1,35 @@ +// Copyright 2020-2025 ETH Zurich and the SeBS authors. All rights reserved. +// Cloudflare Workers differ from the default Node.js version: Workers require +// ES module syntax (no CommonJS `require`) and do not ship the `request` npm +// package, so we use the platform-native `fetch` API and buffer the response +// into /tmp instead of piping a stream. +import * as fs from 'node:fs'; +import * as path from 'node:path'; +import { storage } from './storage'; + +let storage_handler = new storage(); + +export const handler = async function(event) { + let bucket = event.bucket.bucket; + let output_prefix = event.bucket.output; + let url = event.object.url; + let upload_key = path.basename(url); + let download_path = path.join('/tmp', upload_key); + + const response = await fetch(url, { + headers: { + 'User-Agent': 'SeBS/1.2 (https://github.com/spcl/serverless-benchmarks) SeBS Benchmark Suite/1.2' + } + }); + const buffer = await response.arrayBuffer(); + fs.writeFileSync(download_path, Buffer.from(buffer)); + + let [keyName, uploadPromise] = storage_handler.upload( + bucket, + path.join(output_prefix, upload_key), + download_path + ); + await uploadPromise; + + return {result: {bucket: bucket, url: url, key: keyName}}; +}; diff --git a/benchmarks/100.webapps/120.uploader/python/cloudflare/function.py b/benchmarks/100.webapps/120.uploader/python/cloudflare/function.py new file mode 100644 index 00000000..e4028b14 --- /dev/null +++ b/benchmarks/100.webapps/120.uploader/python/cloudflare/function.py @@ -0,0 +1,61 @@ +# Copyright 2020-2025 ETH Zurich and the SeBS authors. All rights reserved. +# Cloudflare Workers differ from the default Python version: the Workers +# Python runtime is Pyodide-based and does not support `urllib.request`, so +# we download via Pyodide's async `pyfetch` and wrap it with `run_sync` to +# keep the synchronous handler signature. + +import datetime +import os + +from pyodide.ffi import run_sync +from pyodide.http import pyfetch + +from . import storage +client = storage.storage.get_instance() + +SEBS_USER_AGENT = "SeBS/1.2 (https://github.com/spcl/serverless-benchmarks) SeBS Benchmark Suite/1.2" + +async def do_request(url, download_path): + headers = {'User-Agent': SEBS_USER_AGENT} + + res = await pyfetch(url, headers=headers) + bs = await res.bytes() + + with open(download_path, 'wb') as f: + f.write(bs) + +def handler(event): + + bucket = event.get('bucket').get('bucket') + output_prefix = event.get('bucket').get('output') + url = event.get('object').get('url') + name = os.path.basename(url) + download_path = '/tmp/{}'.format(name) + + process_begin = datetime.datetime.now() + + run_sync(do_request(url, download_path)) + + size = os.path.getsize(download_path) + process_end = datetime.datetime.now() + + upload_begin = datetime.datetime.now() + key_name = client.upload(bucket, os.path.join(output_prefix, name), download_path) + upload_end = datetime.datetime.now() + + process_time = (process_end - process_begin) / datetime.timedelta(microseconds=1) + upload_time = (upload_end - upload_begin) / datetime.timedelta(microseconds=1) + return { + 'result': { + 'bucket': bucket, + 'url': url, + 'key': key_name + }, + 'measurement': { + 'download_time': 0, + 'download_size': 0, + 'upload_time': upload_time, + 'upload_size': size, + 'compute_time': process_time + } + } diff --git a/benchmarks/100.webapps/130.crud-api/config.json b/benchmarks/100.webapps/130.crud-api/config.json index 25c6cb05..66532ad8 100644 --- a/benchmarks/100.webapps/130.crud-api/config.json +++ b/benchmarks/100.webapps/130.crud-api/config.json @@ -2,8 +2,20 @@ "timeout": 30, "memory": 128, "languages": [ - "python", - "nodejs" + { + "language": "python", + "variants": { + "default": "default", + "cloudflare": "default" + } + }, + { + "language": "nodejs", + "variants": { + "default": "default", + "cloudflare": "default" + } + } ], "modules": [ "nosql" diff --git a/benchmarks/200.multimedia/210.thumbnailer/config.json b/benchmarks/200.multimedia/210.thumbnailer/config.json index 7ba71f4d..6bb025a9 100644 --- a/benchmarks/200.multimedia/210.thumbnailer/config.json +++ b/benchmarks/200.multimedia/210.thumbnailer/config.json @@ -1,7 +1,23 @@ { "timeout": 60, "memory": 256, - "languages": ["python", "nodejs", "cpp"], + "languages": [ + { + "language": "python", + "variants": { + "default": "default", + "cloudflare": "default" + } + }, + { + "language": "nodejs", + "variants": { + "default": "default", + "cloudflare": "default" + } + }, + "cpp" + ], "modules": ["storage"], "cpp_dependencies": ["sdk", "opencv", "libjpeg-turbo", "boost"] } diff --git a/benchmarks/200.multimedia/220.video-processing/config.json b/benchmarks/200.multimedia/220.video-processing/config.json index 94ede792..d9596b9a 100644 --- a/benchmarks/200.multimedia/220.video-processing/config.json +++ b/benchmarks/200.multimedia/220.video-processing/config.json @@ -1,6 +1,14 @@ { "timeout": 60, "memory": 512, - "languages": ["python"], + "languages": [ + { + "language": "python", + "variants": { + "default": "default", + "cloudflare": "default" + } + } + ], "modules": ["storage"] } diff --git a/benchmarks/300.utilities/311.compression/config.json b/benchmarks/300.utilities/311.compression/config.json index 8edb99e5..d69311ae 100644 --- a/benchmarks/300.utilities/311.compression/config.json +++ b/benchmarks/300.utilities/311.compression/config.json @@ -1,6 +1,22 @@ { "timeout": 60, "memory": 256, - "languages": ["python", "nodejs"], + "languages": [ + { + "language": "python", + "variants": { + "default": "default", + "cloudflare": "default" + } + }, + { + "language": "nodejs", + "variants": { + "default": "default", + "cloudflare": {"workers": "default", "containers": "default"} + } + } + ], "modules": ["storage"] } + diff --git a/benchmarks/400.inference/411.image-recognition/config.json b/benchmarks/400.inference/411.image-recognition/config.json index 8c5010fc..61299bfd 100644 --- a/benchmarks/400.inference/411.image-recognition/config.json +++ b/benchmarks/400.inference/411.image-recognition/config.json @@ -1,7 +1,16 @@ { "timeout": 60, "memory": 768, - "languages": ["python", "cpp"], + "languages": [ + { + "language": "python", + "variants": { + "default": "default", + "cloudflare": {"workers": "default", "containers": "cloudflare"} + } + }, + "cpp" + ], "modules": ["storage"], "cpp_dependencies": ["sdk", "torch", "opencv"] } diff --git a/benchmarks/400.inference/411.image-recognition/python/cloudflare/requirements.txt.3.11 b/benchmarks/400.inference/411.image-recognition/python/cloudflare/requirements.txt.3.11 new file mode 100644 index 00000000..c3e648b7 --- /dev/null +++ b/benchmarks/400.inference/411.image-recognition/python/cloudflare/requirements.txt.3.11 @@ -0,0 +1,6 @@ +# Copyright 2020-2025 ETH Zurich and the SeBS authors. All rights reserved. +pillow==10.3.0 +torch==2.0.0 +torchvision==0.15.1 +# prevent installing numpy 2.0 +numpy==1.24.0 diff --git a/benchmarks/400.inference/411.image-recognition/python/cloudflare/requirements.txt.3.12 b/benchmarks/400.inference/411.image-recognition/python/cloudflare/requirements.txt.3.12 new file mode 100644 index 00000000..c3e648b7 --- /dev/null +++ b/benchmarks/400.inference/411.image-recognition/python/cloudflare/requirements.txt.3.12 @@ -0,0 +1,6 @@ +# Copyright 2020-2025 ETH Zurich and the SeBS authors. All rights reserved. +pillow==10.3.0 +torch==2.0.0 +torchvision==0.15.1 +# prevent installing numpy 2.0 +numpy==1.24.0 diff --git a/benchmarks/500.scientific/501.graph-pagerank/config.json b/benchmarks/500.scientific/501.graph-pagerank/config.json index 90e8c7f8..2fbbec8c 100644 --- a/benchmarks/500.scientific/501.graph-pagerank/config.json +++ b/benchmarks/500.scientific/501.graph-pagerank/config.json @@ -1,7 +1,16 @@ { "timeout": 120, "memory": 512, - "languages": ["python", "cpp"], + "languages": [ + { + "language": "python", + "variants": { + "default": "default", + "cloudflare": "default" + } + }, + "cpp" + ], "modules": [], "cpp_dependencies": ["igraph"] } diff --git a/benchmarks/500.scientific/502.graph-mst/config.json b/benchmarks/500.scientific/502.graph-mst/config.json index e80fb435..9749feb3 100644 --- a/benchmarks/500.scientific/502.graph-mst/config.json +++ b/benchmarks/500.scientific/502.graph-mst/config.json @@ -1,6 +1,14 @@ { "timeout": 120, "memory": 512, - "languages": ["python"], + "languages": [ + { + "language": "python", + "variants": { + "default": "default", + "cloudflare": "default" + } + } + ], "modules": [] } diff --git a/benchmarks/500.scientific/503.graph-bfs/config.json b/benchmarks/500.scientific/503.graph-bfs/config.json index 90e8c7f8..2fbbec8c 100644 --- a/benchmarks/500.scientific/503.graph-bfs/config.json +++ b/benchmarks/500.scientific/503.graph-bfs/config.json @@ -1,7 +1,16 @@ { "timeout": 120, "memory": 512, - "languages": ["python", "cpp"], + "languages": [ + { + "language": "python", + "variants": { + "default": "default", + "cloudflare": "default" + } + }, + "cpp" + ], "modules": [], "cpp_dependencies": ["igraph"] } diff --git a/benchmarks/500.scientific/504.dna-visualisation/config.json b/benchmarks/500.scientific/504.dna-visualisation/config.json index ff297ac5..158e2ff5 100644 --- a/benchmarks/500.scientific/504.dna-visualisation/config.json +++ b/benchmarks/500.scientific/504.dna-visualisation/config.json @@ -1,6 +1,14 @@ { "timeout": 60, "memory": 2048, - "languages": ["python"], + "languages": [ + { + "language": "python", + "variants": { + "default": "default", + "cloudflare": "default" + } + } + ], "modules": ["storage"] } diff --git a/benchmarks/wrappers/cloudflare/nodejs/build.js b/benchmarks/wrappers/cloudflare/nodejs/build.js new file mode 100644 index 00000000..886955a9 --- /dev/null +++ b/benchmarks/wrappers/cloudflare/nodejs/build.js @@ -0,0 +1,190 @@ +/** + * build.js — Convert a Node.js benchmark into a Cloudflare Workers-compatible bundle. + * + * Why this exists: + * Cloudflare Workers do not ship a Node.js runtime or a filesystem at deploy + * time: there is no `node_modules` directory, no `require()` resolution, and + * only a curated subset of Node built-ins is available (and only when opted + * in via the `node:` prefix and the `nodejs_compat` compatibility flag). + * Our SeBS benchmarks, however, are authored as regular Node.js code. This + * script bridges that gap by bundling the benchmark + its dependencies into + * a single ESM module that the Workers runtime can load. + * + * High-level pipeline: + * 1. Discover source files under the wrapper directory (skipping tests, + * node_modules, dotfiles, and the previous build output). + * 2. Run esbuild on every JS/TS entry point with a Workers-friendly config + * (ESM output, neutral platform, ES2020 target, tree-shaking). + * 3. Apply the `nodeBuiltinsPlugin` to rewrite imports so that: + * - Node built-ins always use the `node:` prefix required by Workers. + * - `cloudflare:*` imports stay external (resolved by the runtime). + * - The legacy `request` npm module is swapped for a fetch-based + * polyfill, since it cannot run under Workers. + * 4. Copy any non-code assets (templates, SQL, etc.) into `dist/` unchanged. + */ + +const { build } = require('esbuild'); +const fs = require('fs'); +const { join, extname, dirname, relative } = require('path'); + +// Recursively collect every file that should be part of the Workers bundle. +// Excludes test directories, node_modules, build artifacts, and this script +// itself so that only benchmark sources and the wrapper code get processed. +function getAllFiles(dir, fileList = []) { + const files = fs.readdirSync(dir, { withFileTypes: true }); + for (const file of files) { + const filePath = join(dir, file.name); + if (file.isDirectory()) { + if (file.name !== 'node_modules' && + file.name !== 'test' && + file.name !== 'tests' && + file.name !== '__tests__' && + file.name !== 'dist' && + !file.name.startsWith('.')) { + getAllFiles(filePath, fileList); + } + } else { + if (!file.name.includes('.test.') && + !file.name.includes('.spec.') && + file.name !== 'build.js' && + file.name !== 'wrangler.toml') { + fileList.push(filePath); + } + } + } + return fileList; +} + +function copyFile(src, dest) { + const destDir = dirname(dest); + if (!fs.existsSync(destDir)) { + fs.mkdirSync(destDir, { recursive: true }); + } + fs.copyFileSync(src, dest); +} + +// esbuild plugin that rewrites module imports so the output works on the +// Cloudflare Workers runtime. Workers only accept Node built-ins via the +// `node:` prefix (with the `nodejs_compat` flag enabled on the Worker), do +// not support arbitrary npm packages that rely on Node's networking stack, +// and resolve their own `cloudflare:*` imports at runtime. +const nodeBuiltinsPlugin = { + name: 'node-builtins-external', + setup(build) { + const { resolve } = require('path'); + + // Imports already using the `node:` or `cloudflare:` prefix are provided + // by the Workers runtime itself — leave them external so esbuild does not + // try to bundle them (which would fail, since they are not on disk). + build.onResolve({ filter: /^(node:|cloudflare:)/ }, (args) => { + return { path: args.path, external: true }; + }); + + // Benchmarks commonly `require('fs')`, `require('path')`, etc. Workers + // reject those bare specifiers; rewrite them to the `node:`-prefixed + // form and mark them external so the runtime resolves them. + build.onResolve({ filter: /^(fs|querystring|path|crypto|stream|buffer|util|events|http|https|net|tls|zlib|os|child_process|tty|assert|url|constants)$/ }, (args) => { + return { path: 'node:' + args.path, external: true }; + }); + + // The `request` npm module depends on Node's http/https clients and is + // incompatible with Workers. Redirect every `require('request')` to our + // fetch-based shim so benchmark code can keep the same call sites. + build.onResolve({ filter: /^request$/ }, (args) => { + const wrapperDir = __dirname; + return { + path: resolve(wrapperDir, 'request-polyfill.js') + }; + }); + + // `graceful-fs` monkey-patches the `fs` module at runtime, which Workers + // rejects ("object is not extensible"). Redirect it straight to node:fs + // so the patching never runs and consumers get the same API. + build.onResolve({ filter: /^graceful-fs$/ }, () => { + return { path: 'node:fs', external: true }; + }); + } +}; + + +async function customBuild() { + const srcDir = './'; + const outDir = './dist'; + + // Start from a clean output directory so stale artifacts from a previous + // build cannot leak into the Worker upload. + if (fs.existsSync(outDir)) { + fs.rmSync(outDir, { recursive: true }); + } + fs.mkdirSync(outDir, { recursive: true }); + + try { + const files = getAllFiles(srcDir); + + // Split discovered files: code goes through esbuild, everything else + // (JSON fixtures, templates, SQL, binary assets, ...) is copied verbatim. + const jsFiles = files.filter(f => + ['.js', '.ts', '.jsx', '.tsx'].includes(extname(f)) + ); + + const otherFiles = files.filter(f => + !['.js', '.ts', '.jsx', '.tsx'].includes(extname(f)) + ); + + console.log('Building JS files:', jsFiles); + + if (jsFiles.length > 0) { + // esbuild options chosen for Workers compatibility: + // - format: 'esm' Workers modules must be ES modules. + // - platform: 'neutral' Avoid Node- or browser-specific resolution; + // the plugin above handles Node built-ins + // explicitly. + // - target: 'es2020' Matches the V8 version used by Workers. + // - bundle + treeShaking Flattens dependencies into one module and + // drops dead code to stay under Workers' + // script size limit. + // - define.__dirname Node's `__dirname` does not exist in + // Workers; stub it with a harmless constant + // so benchmark code that references it still + // compiles. + // - define.global Workers expose `globalThis` rather than + // `global`; alias the two for compatibility. + await build({ + entryPoints: jsFiles, + bundle: true, + format: 'esm', + outdir: outDir, + outbase: srcDir, + platform: 'neutral', + target: 'es2020', + sourcemap: true, + allowOverwrite: true, + plugins: [nodeBuiltinsPlugin], + define: { + 'process.env.NODE_ENV': '"production"', + 'global': 'globalThis', + '__dirname': '"/bundle"' + }, + mainFields: ['module', 'main'], + treeShaking: true, + }); + } + + // Non-code assets (e.g. HTML/CSS templates, JSON payloads) need to ship + // alongside the bundle at their original relative paths so the worker + // can read them via the runtime's asset APIs. + for (const file of otherFiles) { + const relativePath = relative(srcDir, file); + const destPath = join(outDir, relativePath); + copyFile(file, destPath); + console.log(`Copied: ${relativePath}`); + } + + console.log('✓ Build completed successfully'); + } catch (error) { + console.error('Build failed:', error); + process.exit(1); + } +} + +customBuild(); \ No newline at end of file diff --git a/benchmarks/wrappers/cloudflare/nodejs/container/handler.js b/benchmarks/wrappers/cloudflare/nodejs/container/handler.js new file mode 100644 index 00000000..dd4df731 --- /dev/null +++ b/benchmarks/wrappers/cloudflare/nodejs/container/handler.js @@ -0,0 +1,189 @@ +// Container handler for Cloudflare Workers - Node.js +// This handler is used when deploying as a container worker + +const http = require('http'); +const crypto = require('crypto'); +const Module = require('module'); +const debug = require('util').debuglog('sebs'); + +// Monkey-patch the 'request' library to always include a User-Agent header +// This is needed because Wikimedia (and other sites) require a User-Agent +try { + const originalRequire = Module.prototype.require; + + Module.prototype.require = function(id) { + const module = originalRequire.apply(this, arguments); + + if (id === 'request') { + // Wrap the request function to inject default headers + const originalRequest = module; + const wrappedRequest = function(options, callback) { + if (typeof options === 'string') { + options = { uri: options }; + } + if (!options.headers) { + options.headers = {}; + } + if (!options.headers['User-Agent'] && !options.headers['user-agent']) { + options.headers['User-Agent'] = 'SeBS/1.2 (https://github.com/spcl/serverless-benchmarks) SeBS Benchmark Suite/1.2'; + } + return originalRequest(options, callback); + }; + // Copy all properties from original request + Object.keys(originalRequest).forEach(key => { + wrappedRequest[key] = originalRequest[key]; + }); + return wrappedRequest; + } + + return module; + }; +} catch (e) { + console.error('Failed to patch request module:', e); +} + +// Import the benchmark function +const { handler: benchmarkHandler } = require('./function'); + +// Import storage and nosql if they exist +let storage, nosql; +try { + storage = require('./storage'); +} catch (e) { + console.log('Storage module not available'); +} +try { + nosql = require('./nosql'); +} catch (e) { + console.log('NoSQL module not available'); +} + +const PORT = process.env.PORT || 8080; + +const server = http.createServer(async (req, res) => { + try { + // Get unique request ID from Cloudflare (CF-Ray header) + const reqId = req.headers['cf-ray'] || crypto.randomUUID(); + + // Extract Worker URL from header for R2 and NoSQL proxy. + // + // Containers run in a separate runtime from Workers and cannot access R2 or + // KV bindings directly — those bindings only exist in the Worker's `env`. + // To let the benchmark code reach storage, worker.js injects its own public + // origin into the X-Worker-URL header before forwarding the request here. + // The container-side storage/nosql modules use this URL to call back into + // the Worker over HTTP (e.g. POST ${workerUrl}/r2/upload), and worker.js + // intercepts those paths (/r2/*, /nosql/*) and performs the binding call + // on the container's behalf. + const workerUrl = req.headers['x-worker-url']; + if (workerUrl) { + if (storage && storage.storage && storage.storage.set_worker_url) { + storage.storage.set_worker_url(workerUrl); + } + if (nosql && nosql.nosql && nosql.nosql.set_worker_url) { + nosql.nosql.set_worker_url(workerUrl); + } + console.log(`Set worker URL for R2/NoSQL proxy: ${workerUrl}`); + } + + // Start timing measurements + const begin = Date.now() / 1000; + const start = performance.now(); + + // Read request body + let body = ''; + for await (const chunk of req) { + body += chunk; + } + + // Parse event from JSON body or URL params + let event = {}; + if (body && body.length > 0) { + try { + event = JSON.parse(body); + } catch (e) { + console.error('Failed to parse JSON body:', e); + res.writeHead(400, { 'Content-Type': 'application/json' }); + res.end(JSON.stringify({ error: 'Invalid JSON body', message: e.message })); + return; + } + } + + // Parse URL parameters + const url = new URL(req.url, `http://${req.headers.host}`); + for (const [key, value] of url.searchParams) { + if (!event[key]) { + const intValue = parseInt(value); + event[key] = isNaN(intValue) ? value : intValue; + } + } + + // Add request metadata + const incomeTimestamp = Math.floor(Date.now() / 1000); + event['request-id'] = reqId; + event['income-timestamp'] = incomeTimestamp; + + // Call the benchmark function + const ret = await benchmarkHandler(event); + + + // Calculate elapsed time + const end = Date.now() / 1000; + const elapsed = performance.now() - start; + const micro = elapsed * 1000; // Convert milliseconds to microseconds + + // Build log_data similar to native handler + const log_data = { result: ret && ret.result !== undefined ? ret.result : ret }; + if (ret && ret.measurement !== undefined) { + log_data.measurement = ret.measurement; + } else { + log_data.measurement = {}; + } + + // Add memory usage to measurement + const memUsage = process.memoryUsage(); + const memory_mb = memUsage.heapUsed / 1024 / 1024; + log_data.measurement.memory_used_mb = memory_mb; + + // Gated behind Node.js' built-in debuglog — enable with NODE_DEBUG=sebs + debug('Sending response with log_data: %o', log_data); + + // Send response matching Python handler format exactly + if (event.html) { + res.writeHead(200, { 'Content-Type': 'text/html; charset=utf-8' }); + res.end(String(ret && ret.result !== undefined ? ret.result : ret)); + } else { + const responseBody = JSON.stringify({ + begin: begin, + end: end, + results_time: 0, + result: log_data, + is_cold: false, + is_cold_worker: false, + container_id: '0', + environ_container_id: 'no_id', + request_id: reqId, + }); + res.writeHead(200, { 'Content-Type': 'application/json' }); + res.end(responseBody); + } + + } catch (error) { + console.error('Error processing request:', error); + console.error('Stack trace:', error.stack); + + const errorPayload = JSON.stringify({ + error: error.message, + stack: error.stack + }); + + res.writeHead(500, { 'Content-Type': 'application/json' }); + res.end(errorPayload); + } +}); + +// Ensure server is listening before handling requests +server.listen(PORT, '0.0.0.0', () => { + console.log(`Container server listening on 0.0.0.0:${PORT}`); + console.log('Server ready to accept connections'); +}); diff --git a/benchmarks/wrappers/cloudflare/nodejs/container/nosql.js b/benchmarks/wrappers/cloudflare/nodejs/container/nosql.js new file mode 100644 index 00000000..f529e682 --- /dev/null +++ b/benchmarks/wrappers/cloudflare/nodejs/container/nosql.js @@ -0,0 +1,121 @@ +/** + * NoSQL module for Cloudflare Node.js Containers. + * + * On Cloudflare, NoSQL storage is mapped to KVStore. KVStore + * bindings only exist inside the Worker runtime, so a container cannot talk + * to them directly. Instead, the container forwards each operation over HTTP + * to the parent Worker (see worker.js), which holds the KVStore + * binding and performs the actual read/write. + * + * Because of this, the HTTP endpoint depends on the Worker's URL, which is + * not known ahead of time. The handler receives it via the X-Worker-URL + * header on the incoming request and installs it here through + * set_worker_url() before any NoSQL call is made. + */ + +class nosql { + constructor() {} + + static worker_url = null; // Set by handler from X-Worker-URL header + + static init_instance(entry) { + if (!nosql.instance) { + nosql.instance = new nosql(); + } + return nosql.instance; + } + + static set_worker_url(url) { + nosql.worker_url = url; + } + + async _make_request(operation, params) { + if (!nosql.worker_url) { + throw new Error('Worker URL not set - cannot access NoSQL'); + } + + const url = `${nosql.worker_url}/nosql/${operation}`; + const data = JSON.stringify(params); + + try { + const response = await fetch(url, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: data, + }); + + if (!response.ok) { + let errorMsg; + try { + const errorData = await response.json(); + errorMsg = errorData.error || await response.text(); + } catch { + errorMsg = await response.text(); + } + throw new Error(`NoSQL operation failed: ${errorMsg}`); + } + + return await response.json(); + } catch (error) { + throw new Error(`NoSQL operation failed: ${error.message}`); + } + } + + async insert(tableName, primaryKey, secondaryKey, data) { + const params = { + table_name: tableName, + primary_key: primaryKey, + secondary_key: secondaryKey, + data: data, + }; + return this._make_request('insert', params); + } + + async get(tableName, primaryKey, secondaryKey) { + const params = { + table_name: tableName, + primary_key: primaryKey, + secondary_key: secondaryKey, + }; + const result = await this._make_request('get', params); + return result.data || null; + } + + async update(tableName, primaryKey, secondaryKey, updates) { + const params = { + table_name: tableName, + primary_key: primaryKey, + secondary_key: secondaryKey, + data: updates, + }; + return this._make_request('update', params); + } + + async query(tableName, primaryKey, secondaryKeyName) { + const params = { + table_name: tableName, + primary_key: primaryKey, + secondary_key_name: secondaryKeyName, + }; + const result = await this._make_request('query', params); + return result.items || []; + } + + async delete(tableName, primaryKey, secondaryKey) { + const params = { + table_name: tableName, + primary_key: primaryKey, + secondary_key: secondaryKey, + }; + return this._make_request('delete', params); + } + + static get_instance() { + if (!nosql.instance) { + nosql.instance = new nosql(); + } + return nosql.instance; + } +} + +module.exports.nosql = nosql; diff --git a/benchmarks/wrappers/cloudflare/nodejs/container/package.json b/benchmarks/wrappers/cloudflare/nodejs/container/package.json new file mode 100644 index 00000000..729c56fd --- /dev/null +++ b/benchmarks/wrappers/cloudflare/nodejs/container/package.json @@ -0,0 +1,10 @@ +{ + "name": "cloudflare-container-worker", + "version": "1.0.0", + "description": "Cloudflare Container Worker wrapper", + "main": "worker.js", + "type": "module", + "dependencies": { + "@cloudflare/containers": "^1.0.0" + } +} diff --git a/benchmarks/wrappers/cloudflare/nodejs/container/storage.js b/benchmarks/wrappers/cloudflare/nodejs/container/storage.js new file mode 100644 index 00000000..f69aa574 --- /dev/null +++ b/benchmarks/wrappers/cloudflare/nodejs/container/storage.js @@ -0,0 +1,357 @@ +const fs = require('fs'); +const path = require('path'); +const uuid = require('uuid'); +const debug = require('util').debuglog('sebs'); + +// Cloudflare Workers enforce a 100 MB request body limit at the edge. +// Use multipart upload for payloads larger than this threshold so that +// each individual request stays well below that limit. R2 requires parts +// of at least 5 MB. +const MULTIPART_THRESHOLD = 10 * 1024 * 1024; +const PART_SIZE = 10 * 1024 * 1024; + +function isRetryableSingleUploadError(error) { + const message = error?.message || ''; + return /HTTP 4(?:08|13|29)|request body|payload|too large|content length|body size|stream/i.test(message); +} + +/** + * Storage module for Cloudflare Node.js Containers. + * + * On Cloudflare, object storage (R2) is normally accessed through a Worker + * binding (`env.R2_BUCKET`). That binding only exists inside the Worker + * runtime, so a container cannot talk to R2 directly the way a Lambda or + * Cloud Function talks to S3/GCS with a regular SDK. Instead, the container + * forwards each storage operation over HTTP to the parent Worker (see + * worker.js), which holds the R2 binding and performs the actual + * get/put/list/multipart calls. + * + * R2 does expose an S3-compatible HTTPS API that a container could call + * without a Worker proxy, but that path requires provisioning and injecting + * R2 access keys into the container and diverges from how the Worker-based + * benchmarks access R2. Routing through the Worker keeps a single code path + * and credential model for both deployment types. + * + * Because of this, the HTTP endpoint depends on the Worker's URL, which is + * not known ahead of time. The handler receives it via the X-Worker-URL + * header on the incoming request and installs it here through + * set_worker_url() before any storage call is made. + */ + +class storage { + constructor() { + this.r2_enabled = true; + } + + static worker_url = null; // Set by handler from X-Worker-URL header + + static unique_name(name) { + const parsed = path.parse(name); + const uuid_name = uuid.v4().split('-')[0]; + return path.join(parsed.dir, `${parsed.name}.${uuid_name}${parsed.ext}`); + } + + static init_instance(entry) { + if (!storage.instance) { + storage.instance = new storage(); + } + return storage.instance; + } + + static set_worker_url(url) { + storage.worker_url = url; + } + + static get_instance() { + if (!storage.instance) { + storage.init_instance(); + } + return storage.instance; + } + + _toBuffer(data) { + if (Buffer.isBuffer(data)) { + return data; + } + if (typeof data === 'string') { + return Buffer.from(data, 'utf-8'); + } + if (data instanceof ArrayBuffer) { + return Buffer.from(data); + } + return Buffer.from(String(data), 'utf-8'); + } + + async _postJson(url, body = Buffer.alloc(0), contentType = null) { + const options = { + method: 'POST', + body, + }; + + if (contentType) { + options.headers = { 'Content-Type': contentType }; + } + + const response = await fetch(url, options); + + if (!response.ok) { + throw new Error(`HTTP ${response.status}: ${await response.text()}`); + } + + return response.json(); + } + + async _single_upload(key, buffer) { + const params = new URLSearchParams({ key }); + const url = `${storage.worker_url}/r2/upload?${params}`; + const result = await this._postJson(url, buffer); + return result.key; + } + + async _multipart_upload(key, buffer) { + const initParams = new URLSearchParams({ key }); + const initUrl = `${storage.worker_url}/r2/multipart-init?${initParams}`; + const init = await this._postJson(initUrl); + const uploadId = init.uploadId; + const uploadKey = init.key; + const completedParts = []; + + for (let offset = 0, partNumber = 1; offset < buffer.length; offset += PART_SIZE, partNumber += 1) { + const chunk = buffer.subarray(offset, offset + PART_SIZE); + const partParams = new URLSearchParams({ + key: uploadKey, + uploadId, + partNumber: String(partNumber), + }); + const partUrl = `${storage.worker_url}/r2/multipart-part?${partParams}`; + const part = await this._postJson(partUrl, chunk, 'application/octet-stream'); + completedParts.push({ partNumber: part.partNumber, etag: part.etag }); + } + + const completeParams = new URLSearchParams({ key: uploadKey, uploadId }); + const completeUrl = `${storage.worker_url}/r2/multipart-complete?${completeParams}`; + const result = await this._postJson( + completeUrl, + Buffer.from(JSON.stringify({ parts: completedParts }), 'utf-8'), + 'application/json' + ); + return result.key; + } + + async _upload_bytes(key, buffer) { + if (buffer.length > MULTIPART_THRESHOLD) { + return this._multipart_upload(key, buffer); + } + + try { + return await this._single_upload(key, buffer); + } catch (error) { + if (!isRetryableSingleUploadError(error)) { + throw error; + } + + debug( + '[storage] single upload failed for %s; retrying with multipart upload: %s', + key, + error.message + ); + return this._multipart_upload(key, buffer); + } + } + + async upload_stream(bucket, key, data) { + if (!this.r2_enabled) { + debug('R2 not configured, skipping upload'); + return key; + } + + if (!storage.worker_url) { + throw new Error('Worker URL not set - cannot access R2'); + } + + const unique_key = storage.unique_name(key); + const buffer = this._toBuffer(data); + + try { + return await this._upload_bytes(unique_key, buffer); + } catch (error) { + debug('R2 upload error: %o', error); + throw new Error(`Failed to upload to R2: ${error.message}`); + } + } + + async download_stream(bucket, key) { + if (!this.r2_enabled) { + throw new Error('R2 not configured'); + } + + if (!storage.worker_url) { + throw new Error('Worker URL not set - cannot access R2'); + } + + // Download via worker proxy + const params = new URLSearchParams({ bucket, key }); + const url = `${storage.worker_url}/r2/download?${params}`; + + try { + const response = await fetch(url); + + if (response.status === 404) { + throw new Error(`Object not found: ${key}`); + } + + if (!response.ok) { + throw new Error(`HTTP ${response.status}: ${await response.text()}`); + } + + const arrayBuffer = await response.arrayBuffer(); + return Buffer.from(arrayBuffer); + } catch (error) { + debug('R2 download error: %o', error); + throw new Error(`Failed to download from R2: ${error.message}`); + } + } + + upload(bucket, key, filepath) { + // Generate unique key synchronously so it can be returned immediately + const unique_key = storage.unique_name(key); + + // Read file from disk and upload + if (fs.existsSync(filepath)) { + const data = fs.readFileSync(filepath); + // Call internal version that doesn't generate another unique key + const uploadPromise = this._upload_stream_with_key(bucket, unique_key, data); + return [unique_key, uploadPromise]; + } + + debug('[storage.upload] File not found: %s', filepath); + throw new Error(`upload(): file not found: ${filepath}`); + } + + async _upload_stream_with_key(bucket, key, data) { + debug( + '[storage._upload_stream_with_key] Starting upload: bucket=%s, key=%s, data_size=%d', + bucket, + key, + data.length + ); + + if (!this.r2_enabled) { + debug('R2 not configured, skipping upload'); + return key; + } + + if (!storage.worker_url) { + throw new Error('Worker URL not set - cannot access R2'); + } + + debug('[storage._upload_stream_with_key] Worker URL: %s', storage.worker_url); + + const buffer = this._toBuffer(data); + debug('[storage._upload_stream_with_key] Uploading key=%s, buffer size: %d', key, buffer.length); + + try { + const resultKey = await this._upload_bytes(key, buffer); + debug('[storage._upload_stream_with_key] Upload successful, returned key: %s', resultKey); + return resultKey; + } catch (error) { + debug('R2 upload error: %o', error); + throw new Error(`Failed to upload to R2: ${error.message}`); + } + } + + async download(bucket, key, filepath) { + const data = await this.download_stream(bucket, key); + + let real_fp = filepath; + if (!filepath.startsWith('/tmp')) { + real_fp = path.join('/tmp', path.resolve(filepath)); + } + + // Write data to file + fs.mkdirSync(path.dirname(real_fp), { recursive: true }); + fs.writeFileSync(real_fp, data); + } + + async download_directory(bucket, prefix, out_path) { + if (!this.r2_enabled) { + debug('R2 not configured, skipping download_directory'); + return; + } + + if (!storage.worker_url) { + throw new Error('Worker URL not set - cannot access R2'); + } + + // List objects via worker proxy + const listParams = new URLSearchParams({ bucket, prefix }); + const listUrl = `${storage.worker_url}/r2/list?${listParams}`; + + try { + const response = await fetch(listUrl, { + method: 'GET', + headers: { 'Content-Type': 'application/json' }, + }); + + if (!response.ok) { + const errorText = await response.text(); + throw new Error(`HTTP ${response.status}: ${errorText}`); + } + + const result = await response.json(); + const objects = result.objects || []; + + for (const obj of objects) { + const file_name = obj.key; + const path_to_file = path.dirname(file_name); + fs.mkdirSync(path.join(out_path, path_to_file), { recursive: true }); + await this.download(bucket, file_name, path.join(out_path, file_name)); + } + } catch (error) { + debug('R2 download_directory error: %o', error); + throw new Error(`Failed to download directory from R2: ${error.message}`); + } + } + + async downloadDirectory(bucket, prefix, out_path) { + return this.download_directory(bucket, prefix, out_path); + } + + uploadStream(bucket, key) { + // Return [stream, promise, unique_key] to match native wrapper API + const unique_key = storage.unique_name(key); + + const stream = require('stream'); + const passThrough = new stream.PassThrough(); + const chunks = []; + + passThrough.on('data', (chunk) => chunks.push(chunk)); + + const upload = new Promise((resolve, reject) => { + passThrough.on('end', async () => { + try { + const buffer = Buffer.concat(chunks); + await this._upload_stream_with_key(bucket, unique_key, buffer); + resolve(); + } catch (err) { + reject(err); + } + }); + passThrough.on('error', reject); + }); + + return [passThrough, upload, unique_key]; + } + + async downloadStream(bucket, key) { + // Return a Promise that resolves to a readable stream + const data = await this.download_stream(bucket, key); + const stream = require('stream'); + const readable = new stream.Readable(); + readable.push(data); + readable.push(null); // Signal end of stream + return readable; + } +} + +module.exports.storage = storage; diff --git a/benchmarks/wrappers/cloudflare/nodejs/container/worker.js b/benchmarks/wrappers/cloudflare/nodejs/container/worker.js new file mode 100644 index 00000000..bd47ea53 --- /dev/null +++ b/benchmarks/wrappers/cloudflare/nodejs/container/worker.js @@ -0,0 +1,407 @@ +// Shared container orchestrator for both Node.js and Python container benchmarks. +// @cloudflare/containers is Node.js-only, so this single worker.js fronts the +// Durable-Object-backed container regardless of the in-container handler +// language. See sebs/cloudflare/containers.py, which copies this file from +// benchmarks/wrappers/cloudflare/nodejs/container/ into every container build +// directory (Python builds included). +import { Container, getContainer } from "@cloudflare/containers"; + +// Container wrapper class +export class ContainerWorker extends Container { + defaultPort = 8080; + sleepAfter = "30m"; +} + +export default { + async fetch(request, env) { + const url = new URL(request.url); + + try { + // Handle NoSQL proxy requests - intercept BEFORE forwarding to container + if (url.pathname.startsWith('/nosql/')) { + return await handleNoSQLRequest(request, env); + } + + // Handle R2 proxy requests - intercept BEFORE forwarding to container + if (url.pathname.startsWith('/r2/')) { + return await handleR2Request(request, env); + } + + // Get or create container instance + const containerId = request.headers.get('x-container-id') || 'default'; + const id = env.CONTAINER_WORKER.idFromName(containerId); + const stub = env.CONTAINER_WORKER.get(id); + + // Clone request and add Worker URL as header so container knows where to proxy R2 requests + const modifiedRequest = new Request(request); + modifiedRequest.headers.set('X-Worker-URL', url.origin); + + // Forward the request to the container + return await stub.fetch(modifiedRequest); + + } catch (error) { + console.error('Worker error:', error); + + const errorMessage = error.message || String(error); + + // Handle container not ready errors with 503 + if (errorMessage.includes('Container failed to start') || + errorMessage.includes('no container instance') || + errorMessage.includes('Durable Object') || + errorMessage.includes('provisioning')) { + + return new Response(JSON.stringify({ + error: 'Container failed to start', + details: 'there is no container instance that can be provided to this durable object', + message: errorMessage + }), { + status: 503, + headers: { 'Content-Type': 'application/json' } + }); + } + + // Other errors get 500 + return new Response(JSON.stringify({ + error: 'Internal server error', + details: errorMessage, + stack: error.stack + }), { + status: 500, + headers: { 'Content-Type': 'application/json' } + }); + } + } +}; + +/** + * Handle NoSQL (KV namespace) requests proxied from the container + * Routes: + * - POST /nosql/insert - insert item + * - POST /nosql/update - update item + * - POST /nosql/get - get item + * - POST /nosql/query - query items + * - POST /nosql/delete - delete item + */ +async function handleNoSQLRequest(request, env) { + try { + const url = new URL(request.url); + const operation = url.pathname.split('/').pop(); + + // Parse request body + const params = await request.json(); + const { table_name, primary_key, secondary_key, secondary_key_name, data } = params; + + const table = env[table_name]; + if (!table || typeof table.get !== 'function' || typeof table.put !== 'function') { + return new Response(JSON.stringify({ + error: `KV namespace binding '${table_name}' not found` + }), { + status: 500, + headers: { 'Content-Type': 'application/json' } + }); + } + + const indexKey = `__sebs_idx__${primary_key[1]}`; + const readIndex = async () => { + const raw = await table.get(indexKey); + if (!raw) { + return []; + } + try { + const parsed = JSON.parse(raw); + return Array.isArray(parsed) ? parsed : []; + } catch { + return []; + } + }; + const writeIndex = async (values) => { + await table.put(indexKey, JSON.stringify(values)); + }; + + const prefix = `${primary_key[1]}#`; + + let result; + switch (operation) { + case 'insert': { + const compositeKey = `${primary_key[1]}#${secondary_key[1]}`; + const keyData = { ...data }; + keyData[primary_key[0]] = primary_key[1]; + keyData[secondary_key[0]] = secondary_key[1]; + await table.put(compositeKey, JSON.stringify(keyData)); + const index = await readIndex(); + if (!index.includes(secondary_key[1])) { + index.push(secondary_key[1]); + await writeIndex(index); + } + result = { success: true }; + break; + } + case 'update': { + const compositeKey = `${primary_key[1]}#${secondary_key[1]}`; + const existingRaw = await table.get(compositeKey); + let existing = {}; + if (existingRaw) { + try { + existing = JSON.parse(existingRaw); + } catch { + existing = {}; + } + } + const merged = { ...existing, ...data }; + merged[primary_key[0]] = primary_key[1]; + merged[secondary_key[0]] = secondary_key[1]; + await table.put(compositeKey, JSON.stringify(merged)); + const index = await readIndex(); + if (!index.includes(secondary_key[1])) { + index.push(secondary_key[1]); + await writeIndex(index); + } + result = { success: true }; + break; + } + case 'get': { + const compositeKey = `${primary_key[1]}#${secondary_key[1]}`; + const raw = await table.get(compositeKey); + if (raw === null) { + result = { data: null }; + } else { + try { + result = { data: JSON.parse(raw) }; + } catch { + result = { data: raw }; + } + } + break; + } + case 'query': { + let secondaryKeys = await readIndex(); + if (secondaryKeys.length === 0) { + const list = await table.list({ prefix }); + secondaryKeys = (list.keys || []).map((k) => k.name.split('#').slice(1).join('#')); + } + const items = []; + for (const secondaryValue of secondaryKeys) { + const raw = await table.get(`${primary_key[1]}#${secondaryValue}`); + if (raw === null) { + continue; + } + try { + items.push(JSON.parse(raw)); + } catch { + items.push(raw); + } + } + result = { items }; + break; + } + case 'delete': { + const compositeKey = `${primary_key[1]}#${secondary_key[1]}`; + await table.delete(compositeKey); + const index = await readIndex(); + const next = index.filter((v) => v !== secondary_key[1]); + if (next.length !== index.length) { + await writeIndex(next); + } + result = { success: true }; + break; + } + default: + return new Response(JSON.stringify({ + error: 'Unknown NoSQL operation' + }), { + status: 404, + headers: { 'Content-Type': 'application/json' } + }); + } + + return new Response(JSON.stringify(result || {}), { + headers: { 'Content-Type': 'application/json' } + }); + + } catch (error) { + console.error('NoSQL proxy error:', error); + return new Response(JSON.stringify({ + error: error.message, + stack: error.stack + }), { + status: 500, + headers: { 'Content-Type': 'application/json' } + }); + } +} + +/** + * Handle R2 storage requests proxied from the container + * Routes: + * - GET /r2/download?bucket=X&key=Y - download object + * - POST /r2/upload?bucket=X&key=Y - upload object (body contains data) + */ +async function handleR2Request(request, env) { + try { + const url = new URL(request.url); + const bucket = url.searchParams.get('bucket'); + const key = url.searchParams.get('key'); + + // Check if R2 binding exists + if (!env.R2) { + return new Response(JSON.stringify({ + error: 'R2 binding not configured' + }), { + status: 500, + headers: { 'Content-Type': 'application/json' } + }); + } + + if (url.pathname === '/r2/list') { + // List objects in R2 with a prefix (only needs bucket) + if (!bucket) { + return new Response(JSON.stringify({ + error: 'Missing bucket parameter' + }), { + status: 400, + headers: { 'Content-Type': 'application/json' } + }); + } + + try { + const prefix = url.searchParams.get('prefix') || ''; + const list_res = await env.R2.list({ prefix }); + + return new Response(JSON.stringify({ + objects: list_res.objects || [] + }), { + headers: { 'Content-Type': 'application/json' } + }); + } catch (error) { + console.error('[worker.js /r2/list] Error:', error); + return new Response(JSON.stringify({ + error: error.message + }), { + status: 500, + headers: { 'Content-Type': 'application/json' } + }); + } + } + + // Multipart upload routes only need 'key' (bucket is implicit in the R2 binding) + if (url.pathname === '/r2/multipart-init') { + // Initiate a multipart upload; returns { key, uploadId } + console.log(`[worker.js /r2/multipart-init] key=${key}`); + const multipart = await env.R2.createMultipartUpload(key); + console.log(`[worker.js /r2/multipart-init] uploadId=${multipart.uploadId}`); + return new Response(JSON.stringify({ + key: multipart.key, + uploadId: multipart.uploadId + }), { headers: { 'Content-Type': 'application/json' } }); + + } else if (url.pathname === '/r2/multipart-part') { + // Upload one part; returns { partNumber, etag } + const uploadId = url.searchParams.get('uploadId'); + const partNumber = parseInt(url.searchParams.get('partNumber'), 10); + console.log(`[worker.js /r2/multipart-part] key=${key}, uploadId=${uploadId}, partNumber=${partNumber}`); + const multipart = env.R2.resumeMultipartUpload(key, uploadId); + const part = await multipart.uploadPart(partNumber, request.body); + console.log(`[worker.js /r2/multipart-part] uploaded part ${part.partNumber}, etag=${part.etag}`); + return new Response(JSON.stringify({ + partNumber: part.partNumber, + etag: part.etag + }), { headers: { 'Content-Type': 'application/json' } }); + + } else if (url.pathname === '/r2/multipart-complete') { + // Complete a multipart upload; body is JSON { parts: [{ partNumber, etag }] } + const uploadId = url.searchParams.get('uploadId'); + console.log(`[worker.js /r2/multipart-complete] key=${key}, uploadId=${uploadId}`); + const { parts } = await request.json(); + const multipart = env.R2.resumeMultipartUpload(key, uploadId); + const obj = await multipart.complete(parts); + console.log(`[worker.js /r2/multipart-complete] completed, size=${obj ? obj.size : '?'}`); + return new Response(JSON.stringify({ key: key }), { + headers: { 'Content-Type': 'application/json' } + }); + } + + // Download and upload require a key (bucket is implicit in the R2 binding) + if (!key) { + return new Response(JSON.stringify({ + error: 'Missing key parameter' + }), { + status: 400, + headers: { 'Content-Type': 'application/json' } + }); + } + + if (url.pathname === '/r2/download') { + const object = await env.R2.get(key); + + if (!object) { + return new Response(JSON.stringify({ + error: 'Object not found' + }), { + status: 404, + headers: { 'Content-Type': 'application/json' } + }); + } + + // Return the object data + return new Response(object.body, { + headers: { + 'Content-Type': object.httpMetadata?.contentType || 'application/octet-stream', + 'Content-Length': object.size.toString() + } + }); + + } else if (url.pathname === '/r2/upload') { + // Upload to R2 — stream request.body directly to avoid buffering large payloads in Worker memory + console.log(`[worker.js /r2/upload] bucket=${bucket}, key=${key}`); + console.log(`[worker.js /r2/upload] env.R2 exists:`, !!env.R2); + const contentLength = request.headers.get('Content-Length'); + console.log(`[worker.js /r2/upload] Content-Length: ${contentLength}`); + + // Use the key as-is (container already generates unique keys if needed) + try { + const putResult = await env.R2.put(key, request.body); + const size = putResult ? putResult.size : '(unknown)'; + console.log(`[worker.js /r2/upload] R2.put() succeeded, size=${size}`); + console.log(`[worker.js /r2/upload] Successfully uploaded to R2 with key=${key}`); + } catch (error) { + console.error(`[worker.js /r2/upload] R2.put() error:`, error); + throw error; + } + + return new Response(JSON.stringify({ + key: key + }), { + headers: { 'Content-Type': 'application/json' } + }); + + } else { + return new Response(JSON.stringify({ + error: 'Unknown R2 operation' + }), { + status: 404, + headers: { 'Content-Type': 'application/json' } + }); + } + + } catch (error) { + console.error('R2 proxy error:', error); + return new Response(JSON.stringify({ + error: error.message, + stack: error.stack + }), { + status: 500, + headers: { 'Content-Type': 'application/json' } + }); + } +} + +/** + * Generate unique key for uploaded files + */ +function generateUniqueKey(key) { + const parts = key.split('.'); + const ext = parts.length > 1 ? '.' + parts.pop() : ''; + const name = parts.join('.'); + const uuid = crypto.randomUUID().split('-')[0]; + return `${name}.${uuid}${ext}`; +} diff --git a/benchmarks/wrappers/cloudflare/nodejs/handler.js b/benchmarks/wrappers/cloudflare/nodejs/handler.js new file mode 100644 index 00000000..80170344 --- /dev/null +++ b/benchmarks/wrappers/cloudflare/nodejs/handler.js @@ -0,0 +1,261 @@ +import { DurableObject } from "cloudflare:workers"; + +// Cloudflare Workers freezes Date.now() / performance.now() between I/O +// operations as a timing-sidechannel mitigation, so wall-clock time does +// not advance inside pure-compute sections. To record a meaningful +// compute_time, we issue a throwaway self-fetch that triggers I/O and +// unfreezes the clock before we sample it. +// Docs: https://developers.cloudflare.com/workers/reference/security-model/#step-1-disallow-timers-and-multi-threading +async function advanceWorkersClock(request) { + try { + const url = new URL(request.url); + url.pathname = '/favicon'; + await fetch(url.toString(), { method: 'HEAD' }); + } catch (e) { + // Ignore — we only care about the side effect of performing I/O. + } +} + +// Durable Object class for KV API compatibility +export class KVApiObject extends DurableObject { + constructor(state, env) { + super(state, env); + this.storage = state.storage; + } + + // Proxy methods to make the storage API accessible from the stub + async put(key, value) { + return await this.storage.put(key, value); + } + + async get(key) { + return await this.storage.get(key); + } + + async delete(key) { + return await this.storage.delete(key); + } + + async list(options) { + return await this.storage.list(options); + } +} + +export default { + async fetch(request, env) { + try { + // Store R2 bucket binding and benchmark name in globals for fs-polyfill access + if (env.R2) { + globalThis.R2_BUCKET = env.R2; + } + if (env.BENCHMARK_NAME) { + globalThis.BENCHMARK_NAME = env.BENCHMARK_NAME; + } + + if (request.url.includes('favicon')) { + return new Response('None'); + } + + // Get unique request ID from Cloudflare (CF-Ray header) + const req_id = request.headers.get('CF-Ray') || crypto.randomUUID(); + + // Start timing measurements + const start = performance.now(); + const begin = Date.now() / 1000; + + + // Parse JSON body first (similar to Azure handler which uses req.body) + const req_text = await request.text(); + let event = {}; + if (req_text && req_text.length > 0) { + try { + event = JSON.parse(req_text); + } catch (e) { + // If body isn't JSON, keep event empty + event = {}; + } + } + + // Parse query string into event (URL parameters override/merge with body) + // This makes it compatible with both input formats + const urlParts = request.url.split('?'); + if (urlParts.length > 1) { + const query = urlParts[1]; + const pairs = query.split('&'); + for (const p of pairs) { + const [k, v] = p.split('='); + try { + if (v === undefined) { + event[k] = null; + } else if (!Number.isNaN(Number(v)) && Number.isFinite(Number(v))) { + // mirror Python attempt to convert to int + const n = Number(v); + event[k] = Number.isInteger(n) ? parseInt(v, 10) : n; + } else { + event[k] = decodeURIComponent(v); + } + } catch (e) { + event[k] = v; + } + } + } + + // Set timestamps + const income_timestamp = Math.floor(Date.now() / 1000); + event['request-id'] = req_id; + event['income-timestamp'] = income_timestamp; + + // Load the benchmark function module and initialize storage if available + // With nodejs_compat enabled, we can use require() for CommonJS modules + let funcModule; + try { + // Fallback to dynamic import for ES modules + funcModule = await import('./function.js'); + } catch (e2) { + throw new Error('Failed to import benchmark function module: ' + e2.message); + } + + + try { + const storageModule = await import('./storage.js'); + if (storageModule && storageModule.storage && typeof storageModule.storage.init_instance === 'function') { + storageModule.storage.init_instance({ env, request }); + } else { + console.warn('storage module imported but storage.init_instance is missing; skipping storage setup'); + } + } catch (e) { + // storage module may not be bundled for benchmarks that don't need it + } + + if (env.NOSQL_STORAGE_DATABASE) { + try { + const nosqlModule = await import('./nosql.js'); + if (nosqlModule && nosqlModule.nosql && typeof nosqlModule.nosql.init_instance === 'function') { + nosqlModule.nosql.init_instance({ env, request }); + } else { + console.warn('nosql module imported but nosql.init_instance is missing; skipping nosql setup'); + } + } catch (e) { + // nosql module might not exist for all benchmarks + console.log('Could not initialize nosql:', e.message); + } + } + + // Execute the benchmark handler. Benchmarks expose `handler` either as a + // named export (`exports.handler` / `export const handler`) or nested + // under a default export (`export default { handler }`). + let ret; + try { + const handler = + (funcModule && typeof funcModule.handler === 'function' && funcModule.handler) || + (funcModule && funcModule.default && typeof funcModule.default.handler === 'function' && funcModule.default.handler); + if (!handler) { + throw new Error('benchmark handler function not found'); + } + ret = await handler(event); + } catch (err) { + await advanceWorkersClock(request); + // Calculate timing even for errors + const end = Date.now() / 1000; + const elapsed = performance.now() - start; + const micro = elapsed * 1000; // Convert milliseconds to microseconds + + // Mirror Python behavior: return structured error payload + const errorPayload = JSON.stringify({ + begin: begin, + end: end, + compute_time: micro, + results_time: 0, + result: { output: null }, + is_cold: false, + is_cold_worker: false, + container_id: '0', + environ_container_id: 'no_id', + request_id: '0', + error: String(err && err.message ? err.message : err), + stack: err && err.stack ? err.stack : undefined, + event: event, + env: env, + }); + return new Response(errorPayload, { status: 500, headers: { 'Content-Type': 'application/json' } }); + } + + await advanceWorkersClock(request); + + // Now read the updated timer + const end = Date.now() / 1000; + const elapsed = performance.now() - start; + const micro = elapsed * 1000; // Convert milliseconds to microseconds + + // Build log_data similar to Python handler + const log_data = { result: ret && ret.result !== undefined ? ret.result : ret }; + if (ret && ret.measurement !== undefined) { + log_data.measurement = ret.measurement; + } else { + log_data.measurement = {}; + } + + // Add memory usage to measurement + const memUsage = process.memoryUsage(); + const memory_mb = memUsage.heapUsed / 1024 / 1024; + log_data.measurement.memory_used_mb = memory_mb; + + if (event.logs !== undefined) { + log_data.time = 0; + } + + if (event.html) { + return new Response(String(ret && ret.result !== undefined ? ret.result : ''), { + headers: { 'Content-Type': 'text/html; charset=utf-8' }, + }); + } + + const responseBody = JSON.stringify({ + begin: begin, + end: end, + compute_time: micro, + results_time: 0, + result: log_data, + is_cold: false, + is_cold_worker: false, + container_id: '0', + environ_container_id: 'no_id', + request_id: req_id, + }); + + return new Response(responseBody, { headers: { 'Content-Type': 'application/json' } }); + } catch (topLevelError) { + // Catch any uncaught errors (module loading, syntax errors, etc.) + // Try to include timing if available + let errorBegin = 0; + let errorEnd = 0; + let errorMicro = 0; + try { + errorEnd = Date.now() / 1000; + if (typeof begin !== 'undefined' && typeof start !== 'undefined') { + errorBegin = begin; + const elapsed = performance.now() - start; + errorMicro = elapsed * 1000; + } + } catch (e) { + // Ignore timing errors in error handler + } + + const errorPayload = JSON.stringify({ + begin: errorBegin, + end: errorEnd, + compute_time: errorMicro, + results_time: 0, + result: { output: null }, + is_cold: false, + is_cold_worker: false, + container_id: '0', + environ_container_id: 'no_id', + request_id: '0', + error: `Top-level error: ${topLevelError && topLevelError.message ? topLevelError.message : String(topLevelError)}`, + stack: topLevelError && topLevelError.stack ? topLevelError.stack : undefined, + }); + return new Response(errorPayload, { status: 500, headers: { 'Content-Type': 'application/json' } }); + } + }, +}; diff --git a/benchmarks/wrappers/cloudflare/nodejs/nosql.js b/benchmarks/wrappers/cloudflare/nodejs/nosql.js new file mode 100644 index 00000000..4fe3c80b --- /dev/null +++ b/benchmarks/wrappers/cloudflare/nodejs/nosql.js @@ -0,0 +1,158 @@ +// NoSQL wrapper for Cloudflare Workers +// Uses KV namespaces for storage +// Returns Promises that the handler will resolve + +class nosql { + constructor() { + this.env = null; + } + + static init_instance(entry) { + // Reuse existing instance if it exists, otherwise create new one + if (!nosql.instance) { + nosql.instance = new nosql(); + } + + if (entry && entry.env) { + nosql.instance.env = entry.env; + // Share env globally so bundled copies of this module (inlined by esbuild + // into function.js) can also reach the live KV bindings. + globalThis._nosqlEnv = entry.env; + } + } + + _get_table(tableName) { + // Fall back to the global env bridge for copies of this class that were + // inlined by esbuild into a separate bundle (e.g. function.js) and + // therefore have a different static `instance` from the one initialized + // by handler.js via `import('./nosql.js')`. + const env = this.env || globalThis._nosqlEnv; + if (!env) { + throw new Error(`nosql env not initialized for table ${tableName}`); + } + + // Unlike AWS/Azure/GCP where you instantiate a client SDK and address + // resources by name, Cloudflare Workers expose every bound resource + // (KV namespace, R2 bucket, D1 database, queue, etc.) as a property on + // the `env` object passed into the fetch handler. The property name is + // the binding name declared in wrangler.toml, so looking up a KV + // namespace by its table name is simply `env[tableName]`. + const table = env[tableName]; + if (!table || typeof table.get !== 'function' || typeof table.put !== 'function') { + const envKeys = Object.keys(env || {}); + throw new Error( + `KV binding '${tableName}' not found. env keys: [${envKeys.join(', ')}]` + ); + } + + return table; + } + + _key(primaryKey, secondaryKey) { + return `${primaryKey[1]}#${secondaryKey[1]}`; + } + + _indexKey(primaryKey) { + return `__sebs_idx__${primaryKey[1]}`; + } + + async _readIndex(table, primaryKey) { + const raw = await table.get(this._indexKey(primaryKey)); + if (raw === null) { + return []; + } + try { + const parsed = JSON.parse(raw); + return Array.isArray(parsed) ? parsed : []; + } catch { + return []; + } + } + + async _writeIndex(table, primaryKey, values) { + await table.put(this._indexKey(primaryKey), JSON.stringify(values)); + } + + // Async methods - build.js will patch function.js to await these + async insert(tableName, primaryKey, secondaryKey, data) { + const keyData = { ...data }; + keyData[primaryKey[0]] = primaryKey[1]; + keyData[secondaryKey[0]] = secondaryKey[1]; + + const table = this._get_table(tableName); + await table.put(this._key(primaryKey, secondaryKey), JSON.stringify(keyData)); + + const index = await this._readIndex(table, primaryKey); + if (!index.includes(secondaryKey[1])) { + index.push(secondaryKey[1]); + await this._writeIndex(table, primaryKey, index); + } + } + + async get(tableName, primaryKey, secondaryKey) { + const table = this._get_table(tableName); + const raw = await table.get(this._key(primaryKey, secondaryKey)); + if (raw === null) { + return null; + } + + try { + return JSON.parse(raw); + } catch { + return raw; + } + } + + async update(tableName, primaryKey, secondaryKey, updates) { + const existing = await this.get(tableName, primaryKey, secondaryKey) || {}; + const merged = { ...existing, ...updates }; + await this.insert(tableName, primaryKey, secondaryKey, merged); + } + + async query(tableName, primaryKey, secondaryKeyName) { + const table = this._get_table(tableName); + let secondaryKeys = await this._readIndex(table, primaryKey); + + // Fallback for legacy namespaces without explicit index key. + if (secondaryKeys.length === 0) { + const listed = await table.list({ prefix: `${primaryKey[1]}#` }); + secondaryKeys = (listed.keys || []).map((k) => k.name.split('#').slice(1).join('#')); + } + + const results = []; + + for (const secondaryValue of secondaryKeys) { + const raw = await table.get(`${primaryKey[1]}#${secondaryValue}`); + if (raw === null) { + continue; + } + try { + results.push(JSON.parse(raw)); + } catch { + results.push(raw); + } + } + + return results; + } + + async delete(tableName, primaryKey, secondaryKey) { + const table = this._get_table(tableName); + await table.delete(this._key(primaryKey, secondaryKey)); + + const index = await this._readIndex(table, primaryKey); + const next = index.filter((v) => v !== secondaryKey[1]); + if (next.length !== index.length) { + await this._writeIndex(table, primaryKey, next); + } + } + + static get_instance() { + if (!nosql.instance) { + nosql.instance = new nosql(); + } + return nosql.instance; + } +} + +export { nosql }; diff --git a/benchmarks/wrappers/cloudflare/nodejs/postprocess.js b/benchmarks/wrappers/cloudflare/nodejs/postprocess.js new file mode 100644 index 00000000..2c06ce41 --- /dev/null +++ b/benchmarks/wrappers/cloudflare/nodejs/postprocess.js @@ -0,0 +1,86 @@ +/** + * Post-processing step: replace esbuild's dynamic __require("node:…") helper + * calls in the bundled dist/ output with static ESM imports. + * + * esbuild bundles dependencies that themselves call require() at runtime, + * turning them into __require("node:fs") style calls. Cloudflare Workers + * run in an ESM-only environment, so these must be hoisted to top-level + * import statements that wrangler / the runtime can resolve. + * + * Only the top-level requires in *source* files can be handled by esbuild's + * own external/format options, which is why this step is needed separately. + * + * Usage: node postprocess.js (run from the worker package directory) + */ + +'use strict'; + +const fs = require('fs'); +const { join, relative } = require('path'); + +function getAllJsFiles(dir, fileList = []) { + if (!fs.existsSync(dir)) return fileList; + const files = fs.readdirSync(dir, { withFileTypes: true }); + for (const file of files) { + const filePath = join(dir, file.name); + if (file.isDirectory()) { + getAllJsFiles(filePath, fileList); + } else if (file.name.endsWith('.js')) { + fileList.push(filePath); + } + } + return fileList; +} + +const distDir = './dist'; +const jsFiles = getAllJsFiles(distDir); + +let totalFixed = 0; + +for (const filePath of jsFiles) { + let content = fs.readFileSync(filePath, 'utf-8'); + + // Collect all unique node: modules required via esbuild's __require helper. + const nodeModules = new Set(); + const requireRegex = /__require\d*\("(node:[^"]+)"\)/g; + let match; + while ((match = requireRegex.exec(content)) !== null) { + nodeModules.add(match[1]); + } + + if (nodeModules.size === 0) continue; + + // Build static import declarations and a lookup cache object. + let imports = ''; + const mapping = {}; + let i = 0; + for (const mod of nodeModules) { + const varName = `__node_${mod.replace('node:', '').replace(/[^a-z0-9]/gi, '_')}_${i++}`; + imports += `import * as ${varName} from '${mod}';\n`; + mapping[mod] = varName; + } + + imports += '\nconst __node_cache = {\n'; + for (const [mod, varName] of Object.entries(mapping)) { + imports += ` '${mod}': ${varName},\n`; + } + imports += '};\n\n'; + + // Replace every __require("node:…") call with a cache lookup. + content = content.replace(/__require(\d*)\("(node:[^"]+)"\)/g, (_match, _num, mod) => { + return `__node_cache['${mod}']`; + }); + + // Prepend the import block. + content = imports + content; + + fs.writeFileSync(filePath, content, 'utf-8'); + console.log(`✓ Fixed ${nodeModules.size} node: import(s) in ${relative(distDir, filePath)}`); + totalFixed++; +} + +if (totalFixed === 0) { + console.log('No __require node: calls found — nothing to patch.'); +} else { + console.log(`✓ Post-processing complete (${totalFixed} file(s) patched).`); +} diff --git a/benchmarks/wrappers/cloudflare/nodejs/request-polyfill.js b/benchmarks/wrappers/cloudflare/nodejs/request-polyfill.js new file mode 100644 index 00000000..f44bfa23 --- /dev/null +++ b/benchmarks/wrappers/cloudflare/nodejs/request-polyfill.js @@ -0,0 +1,100 @@ +/** + * Polyfill for the 'request' module using Cloudflare Workers fetch API + * Implements the minimal interface needed for benchmark compatibility + */ + +const { Writable } = require('node:stream'); +const fs = require('node:fs'); + +function request(url, options, callback) { + // Handle different call signatures + if (typeof options === 'function') { + callback = options; + options = {}; + } + + // Add default headers to mimic a browser request + const fetchOptions = { + ...options, + headers: { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36', + 'Accept': '*/*', + ...((options && options.headers) || {}) + } + }; + + // Create a simple object that has a pipe method + const requestObj = { + pipe(destination) { + // Perform the fetch and write to destination + fetch(url, fetchOptions) + .then(async (response) => { + if (!response.ok) { + const error = new Error(`HTTP ${response.status}: ${response.statusText}`); + error.statusCode = response.status; + destination.emit('error', error); + if (callback) callback(error, response, null); + return destination; + } + + // Get the response as arrayBuffer and write it all at once + const buffer = await response.arrayBuffer(); + + // Write the buffer to the destination + if (destination.write) { + destination.write(Buffer.from(buffer)); + destination.end(); + } + + if (callback) callback(null, response, Buffer.from(buffer)); + }) + .catch((error) => { + destination.emit('error', error); + if (callback) callback(error, null, null); + }); + + return destination; + }, + + abort() { + // No-op for compatibility + } + }; + + return requestObj; +} + +// Add common request methods +request.get = (url, options, callback) => { + if (typeof options === 'function') { + callback = options; + options = {}; + } + return request(url, { ...options, method: 'GET' }, callback); +}; + +request.post = (url, options, callback) => { + if (typeof options === 'function') { + callback = options; + options = {}; + } + return request(url, { ...options, method: 'POST' }, callback); +}; + +request.put = (url, options, callback) => { + if (typeof options === 'function') { + callback = options; + options = {}; + } + return request(url, { ...options, method: 'PUT' }, callback); +}; + +request.delete = (url, options, callback) => { + if (typeof options === 'function') { + callback = options; + options = {}; + } + return request(url, { ...options, method: 'DELETE' }, callback); +}; + +module.exports = request; diff --git a/benchmarks/wrappers/cloudflare/nodejs/storage.js b/benchmarks/wrappers/cloudflare/nodejs/storage.js new file mode 100644 index 00000000..3ba30383 --- /dev/null +++ b/benchmarks/wrappers/cloudflare/nodejs/storage.js @@ -0,0 +1,268 @@ +const fs = require('fs'); +const path = require('path'); +const uuid = require('uuid'); + +// Storage wrapper compatible with the Python storage implementation. +// Supports Cloudflare R2 (via env.R2) when available; falls back to +// filesystem-based operations when running in Node.js (for local tests). + +class storage { + constructor() { + this.handle = null; // R2 binding + this.written_files = new Set(); + } + + static unique_name(name) { + const parsed = path.parse(name); + const uuid_name = uuid.v4().split('-')[0]; + return path.join(parsed.dir, `${parsed.name}.${uuid_name}${parsed.ext}`); + } + + // entry is expected to be an object with `env` (Workers) or nothing for Node + static init_instance(entry) { + storage.instance = new storage(); + if (entry && entry.env && entry.env.R2) { + storage.instance.handle = entry.env.R2; + } + storage.instance.written_files = new Set(); + } + + // Upload a file given a local filepath. In Workers env this is not available + // so callers should use upload_stream or pass raw data. For Node.js we read + // the file from disk and put it into R2 if available, otherwise throw. + upload(__bucket, key, filepath) { + // Use singleton instance if available, otherwise use this instance + const instance = storage.instance || this; + + // If file was previously written during this invocation, use /tmp absolute + let realPath = filepath; + if (instance.written_files.has(filepath)) { + realPath = path.join('/tmp', path.resolve(filepath)); + } + + const unique_key = storage.unique_name(key); + + // Try filesystem first (for Workers with nodejs_compat that have /tmp) + if (fs && fs.existsSync(realPath)) { + const data = fs.readFileSync(realPath); + + if (instance.handle) { + const uploadPromise = instance.handle.put(unique_key, data); + return [unique_key, uploadPromise]; + } else { + return [unique_key, Promise.resolve()]; + } + } + + // Fallback: In Workers environment with R2, check if file exists in R2 + // (it may have been written by fs-polyfill's createWriteStream) + if (instance.handle) { + // Normalize the path to match what fs-polyfill would use + let normalizedPath = realPath.replace(/^\.?\//, '').replace(/^tmp\//, ''); + + // Add benchmark name prefix if available (matching fs-polyfill behavior) + if (typeof globalThis !== 'undefined' && globalThis.BENCHMARK_NAME && + !normalizedPath.startsWith(globalThis.BENCHMARK_NAME + '/')) { + normalizedPath = globalThis.BENCHMARK_NAME + '/' + normalizedPath; + } + + // Read from R2 and re-upload with unique key + const uploadPromise = instance.handle.get(normalizedPath).then(async (obj) => { + if (obj) { + const data = await obj.arrayBuffer(); + return instance.handle.put(unique_key, data); + } else { + throw new Error(`File not found in R2: ${normalizedPath} (original path: ${filepath})`); + } + }); + + return [unique_key, uploadPromise]; + } + + // If running in Workers (no fs) and caller provided Buffer/Stream, they + // should call upload_stream directly. Otherwise, throw. + throw new Error('upload(): file not found on disk and no R2 handle provided'); + } + + async download(__bucket, key, filepath) { + const instance = storage.instance || this; + const data = await this.download_stream(__bucket, key); + + let real_fp = filepath; + if (!filepath.startsWith('/tmp')) { + real_fp = path.join('/tmp', path.resolve(filepath)); + } + + instance.written_files.add(filepath); + + // Write data to file if we have fs + if (fs) { + fs.mkdirSync(path.dirname(real_fp), { recursive: true }); + if (Buffer.isBuffer(data)) { + fs.writeFileSync(real_fp, data); + } else { + fs.writeFileSync(real_fp, Buffer.from(String(data))); + } + return; + } + + // In Workers environment, callers should use stream APIs directly. + return; + } + + async download_directory(__bucket, prefix, out_path) { + const instance = storage.instance || this; + + if (!instance.handle) { + throw new Error('download_directory requires R2 binding (env.R2)'); + } + + const list_res = await instance.handle.list({ prefix }); + const objects = list_res.objects || []; + for (const obj of objects) { + const file_name = obj.key; + const path_to_file = path.dirname(file_name); + fs.mkdirSync(path.join(out_path, path_to_file), { recursive: true }); + await this.download(__bucket, file_name, path.join(out_path, file_name)); + } + } + + async downloadDirectory(bucket, prefix, out_path) { + return this.download_directory(bucket, prefix, out_path); + } + + async upload_stream(__bucket, key, data) { + const instance = storage.instance || this; + const unique_key = storage.unique_name(key); + if (instance.handle) { + // R2 put accepts ArrayBuffer, ReadableStream, or string + await instance.handle.put(unique_key, data); + return unique_key; + } + + // If no R2, write to local fs as fallback + if (fs) { + const outPath = path.join('/tmp', unique_key); + fs.mkdirSync(path.dirname(outPath), { recursive: true }); + if (Buffer.isBuffer(data)) fs.writeFileSync(outPath, data); + else fs.writeFileSync(outPath, Buffer.from(String(data))); + return unique_key; + } + + throw new Error('upload_stream(): no storage backend available'); + } + + async download_stream(__bucket, key) { + const instance = storage.instance || this; + + if (instance.handle) { + const obj = await instance.handle.get(key); + if (!obj) return null; + // R2 object provides arrayBuffer()/text() helpers in Workers + if (typeof obj.arrayBuffer === 'function') { + const ab = await obj.arrayBuffer(); + return Buffer.from(ab); + } + if (typeof obj.text === 'function') { + return await obj.text(); + } + // Fallback: return null + return null; + } + + // Fallback to local filesystem + const localPath = path.join('/tmp', key); + if (fs && fs.existsSync(localPath)) { + return fs.readFileSync(localPath); + } + + throw new Error('download_stream(): object not found'); + } + + // Additional stream methods for compatibility with Azure storage API + // These provide a stream-based interface similar to Azure's uploadStream/downloadStream + uploadStream(__bucket, key) { + const unique_key = storage.unique_name(key); + + if (this.handle) { + // For R2, we create a PassThrough stream that collects data + // then uploads when ended + const stream = require('stream'); + const passThrough = new stream.PassThrough(); + const chunks = []; + + passThrough.on('data', (chunk) => chunks.push(chunk)); + + const upload = new Promise((resolve, reject) => { + passThrough.on('end', async () => { + try { + const buffer = Buffer.concat(chunks); + await this.handle.put(unique_key, buffer); + resolve(); + } catch (err) { + reject(err); + } + }); + passThrough.on('error', reject); + }); + + return [passThrough, upload, unique_key]; + } + + // Fallback to filesystem + if (fs) { + const stream = require('stream'); + const outPath = path.join('/tmp', unique_key); + fs.mkdirSync(path.dirname(outPath), { recursive: true }); + const writeStream = fs.createWriteStream(outPath); + const upload = new Promise((resolve, reject) => { + writeStream.on('finish', resolve); + writeStream.on('error', reject); + }); + return [writeStream, upload, unique_key]; + } + + throw new Error('uploadStream(): no storage backend available'); + } + + async downloadStream(__bucket, key) { + if (this.handle) { + const obj = await this.handle.get(key); + if (!obj) return null; + + // R2 object has a body ReadableStream + if (obj.body) { + return obj.body; + } + + // Fallback: convert to buffer then to stream + if (typeof obj.arrayBuffer === 'function') { + const stream = require('stream'); + const ab = await obj.arrayBuffer(); + const buffer = Buffer.from(ab); + const readable = new stream.PassThrough(); + readable.end(buffer); + return readable; + } + + return null; + } + + // Fallback to local filesystem + const localPath = path.join('/tmp', key); + if (fs && fs.existsSync(localPath)) { + return fs.createReadStream(localPath); + } + + throw new Error('downloadStream(): object not found'); + } + + static get_instance() { + if (!storage.instance) { + throw new Error('must init storage singleton first'); + } + return storage.instance; + } +} + +module.exports.storage = storage; diff --git a/benchmarks/wrappers/cloudflare/python/container/handler.py b/benchmarks/wrappers/cloudflare/python/container/handler.py new file mode 100644 index 00000000..8ae89e6c --- /dev/null +++ b/benchmarks/wrappers/cloudflare/python/container/handler.py @@ -0,0 +1,199 @@ +#!/usr/bin/env python3 +""" +Container handler for Cloudflare Workers - Python +This handler is used when deploying as a container worker +""" + +import json +import sys +import os +import traceback +import resource +from http.server import HTTPServer, BaseHTTPRequestHandler +from urllib.parse import urlparse, parse_qs +import datetime + +# Monkey-patch requests library to add User-Agent header +# This is needed because many HTTP servers (like Wikimedia) reject requests without User-Agent +try: + import requests + original_request = requests.request + + def patched_request(method, url, **kwargs): + if 'headers' not in kwargs: + kwargs['headers'] = {} + if 'User-Agent' not in kwargs['headers']: + kwargs['headers']['User-Agent'] = 'SeBS/1.2 (https://github.com/spcl/serverless-benchmarks) SeBS Benchmark Suite/1.2' + return original_request(method, url, **kwargs) + + requests.request = patched_request + print("Monkey-patched requests library to add User-Agent header") +except ImportError: + print("requests library not available, skipping User-Agent monkey-patch") + +# Also patch urllib for libraries that use it directly +import urllib.request +original_urlopen = urllib.request.urlopen + +def patched_urlopen(url, data=None, timeout=None, **kwargs): + if isinstance(url, str): + req = urllib.request.Request(url, data=data) + req.add_header('User-Agent', 'SeBS/1.2 (https://github.com/spcl/serverless-benchmarks) SeBS Benchmark Suite/1.2') + return original_urlopen(req, timeout=timeout, **kwargs) + elif isinstance(url, urllib.request.Request): + if not url.has_header('User-Agent'): + url.add_header('User-Agent', 'SeBS/1.2 (https://github.com/spcl/serverless-benchmarks) SeBS Benchmark Suite/1.2') + return original_urlopen(url, data=data, timeout=timeout, **kwargs) + else: + return original_urlopen(url, data=data, timeout=timeout, **kwargs) + +urllib.request.urlopen = patched_urlopen +print("Monkey-patched urllib.request.urlopen to add User-Agent header") + +# Import the benchmark handler function +from function.function import handler as benchmark_handler + +# Import storage and nosql if available +try: + from function import storage +except ImportError: + storage = None + print("Storage module not available") + +try: + from function import nosql +except ImportError: + nosql = None + print("NoSQL module not available") + +PORT = int(os.environ.get('PORT', 8080)) + + +class ContainerHandler(BaseHTTPRequestHandler): + def do_GET(self): + self.handle_request() + + def do_POST(self): + self.handle_request() + + def handle_request(self): + # Handle favicon requests + if 'favicon' in self.path: + self.send_response(200) + self.end_headers() + self.wfile.write(b'None') + return + + try: + # Get unique request ID from Cloudflare (CF-Ray header) + import uuid + req_id = self.headers.get('CF-Ray', str(uuid.uuid4())) + + # Extract Worker URL from header for R2 and NoSQL proxy + worker_url = self.headers.get('X-Worker-URL') + if worker_url: + if storage: + storage.storage.set_worker_url(worker_url) + if nosql: + nosql.nosql.set_worker_url(worker_url) + print(f"Set worker URL for R2/NoSQL proxy: {worker_url}") + + # Read request body + content_length = int(self.headers.get('Content-Length', 0)) + body = self.rfile.read(content_length).decode('utf-8') if content_length > 0 else '' + + # Parse event from JSON body or URL params + event = {} + if body: + try: + event = json.loads(body) + except json.JSONDecodeError as e: + print(f'Failed to parse JSON body: {e}') + + # Parse URL parameters + parsed_url = urlparse(self.path) + params = parse_qs(parsed_url.query) + for key, values in params.items(): + if key not in event and values: + value = values[0] + try: + event[key] = int(value) + except ValueError: + event[key] = value + + # Add request metadata + income_timestamp = datetime.datetime.now().timestamp() + event['request-id'] = req_id + event['income-timestamp'] = income_timestamp + + # Measure execution time + begin = datetime.datetime.now().timestamp() + + # Call the benchmark function + result = benchmark_handler(event) + + # Calculate timing + end = datetime.datetime.now().timestamp() + compute_time = end - begin + + # Prepare response matching native handler format exactly + log_data = { + 'result': result['result'] + } + if 'measurement' in result: + log_data['measurement'] = result['measurement'] + else: + log_data['measurement'] = {} + + # Add memory usage to measurement + memory_mb = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024.0 + log_data['measurement']['memory_used_mb'] = memory_mb + + response_data = { + 'begin': begin, + 'end': end, + 'results_time': 0, + 'result': log_data, + 'is_cold': False, + 'is_cold_worker': False, + 'container_id': "0", + 'environ_container_id': "no_id", + 'request_id': req_id + } + + # Send response + if event.get('html'): + # For HTML requests, return just the result + self.send_response(200) + self.send_header('Content-Type', 'text/html; charset=utf-8') + self.end_headers() + html_result = result.get('result', result) + self.wfile.write(str(html_result).encode('utf-8')) + else: + # For API requests, return structured response + self.send_response(200) + self.send_header('Content-Type', 'application/json') + self.end_headers() + self.wfile.write(json.dumps(response_data).encode('utf-8')) + + except Exception as error: + print(f'Error processing request: {error}') + traceback.print_exc() + self.send_response(500) + self.send_header('Content-Type', 'application/json') + self.end_headers() + error_response = { + 'error': str(error), + 'traceback': traceback.format_exc() + } + self.wfile.write(json.dumps(error_response).encode('utf-8')) + + def log_message(self, format, *args): + # Override to use print instead of stderr + print(f"{self.address_string()} - {format % args}") + + +if __name__ == '__main__': + server = HTTPServer(('0.0.0.0', PORT), ContainerHandler) + print(f'Container server listening on port {PORT}') + server.serve_forever() diff --git a/benchmarks/wrappers/cloudflare/python/container/nosql.py b/benchmarks/wrappers/cloudflare/python/container/nosql.py new file mode 100644 index 00000000..5a414d21 --- /dev/null +++ b/benchmarks/wrappers/cloudflare/python/container/nosql.py @@ -0,0 +1,122 @@ +""" +NoSQL module for Cloudflare Python Containers. + +Issues HTTP POSTs to {worker_url}/nosql/. The server side is +implemented in benchmarks/wrappers/cloudflare/nodejs/container/worker.js +(handleNoSQLRequest), which is copied into every container project at deploy +time by sebs/cloudflare/containers.py because @cloudflare/containers is +Node.js-only and wraps Python containers as well. +""" +import json +import urllib.request +import urllib.parse +from typing import List, Optional, Tuple + + +class nosql: + """NoSQL client for containers using HTTP proxy to Worker's Durable Object""" + + instance: Optional["nosql"] = None + worker_url = None # Set by handler from X-Worker-URL header + + @staticmethod + def init_instance(*args, **kwargs): + """Initialize singleton instance""" + if nosql.instance is None: + nosql.instance = nosql() + return nosql.instance + + @staticmethod + def set_worker_url(url): + """Set worker URL for NoSQL proxy (called by handler)""" + nosql.worker_url = url + + def _make_request(self, operation: str, params: dict) -> dict: + """Make HTTP request to worker nosql proxy""" + if not nosql.worker_url: + raise RuntimeError("Worker URL not set - cannot access NoSQL") + + url = f"{nosql.worker_url}/nosql/{operation}" + data = json.dumps(params).encode('utf-8') + + req = urllib.request.Request(url, data=data, method='POST') + req.add_header('Content-Type', 'application/json') + + try: + with urllib.request.urlopen(req) as response: + return json.loads(response.read().decode('utf-8')) + except urllib.error.HTTPError as e: + error_body = e.read().decode('utf-8') + try: + error_data = json.loads(error_body) + raise RuntimeError(f"NoSQL operation failed: {error_data.get('error', error_body)}") + except json.JSONDecodeError: + raise RuntimeError(f"NoSQL operation failed: {error_body}") + except Exception as e: + raise RuntimeError(f"NoSQL operation failed: {e}") + + def insert( + self, + table_name: str, + primary_key: Tuple[str, str], + secondary_key: Tuple[str, str], + data: dict, + ): + params = { + 'table_name': table_name, + 'primary_key': list(primary_key), + 'secondary_key': list(secondary_key), + 'data': data + } + return self._make_request('insert', params) + + def update( + self, + table_name: str, + primary_key: Tuple[str, str], + secondary_key: Tuple[str, str], + data: dict, + ): + params = { + 'table_name': table_name, + 'primary_key': list(primary_key), + 'secondary_key': list(secondary_key), + 'data': data + } + return self._make_request('update', params) + + def get( + self, table_name: str, primary_key: Tuple[str, str], secondary_key: Tuple[str, str] + ) -> Optional[dict]: + params = { + 'table_name': table_name, + 'primary_key': list(primary_key), + 'secondary_key': list(secondary_key) + } + result = self._make_request('get', params) + return result.get('data') + + def query( + self, table_name: str, primary_key: Tuple[str, str], secondary_key_name: str + ) -> List[dict]: + params = { + 'table_name': table_name, + 'primary_key': list(primary_key), + 'secondary_key_name': secondary_key_name + } + result = self._make_request('query', params) + return result.get('items', []) + + def delete(self, table_name: str, primary_key: Tuple[str, str], secondary_key: Tuple[str, str]): + params = { + 'table_name': table_name, + 'primary_key': list(primary_key), + 'secondary_key': list(secondary_key) + } + return self._make_request('delete', params) + + @staticmethod + def get_instance(): + if nosql.instance is None: + nosql.instance = nosql() + return nosql.instance diff --git a/benchmarks/wrappers/cloudflare/python/container/storage.py b/benchmarks/wrappers/cloudflare/python/container/storage.py new file mode 100644 index 00000000..8c9a32fc --- /dev/null +++ b/benchmarks/wrappers/cloudflare/python/container/storage.py @@ -0,0 +1,258 @@ +""" +Storage module for Cloudflare Python Containers +Uses HTTP proxy to access R2 storage through the Worker's R2 binding +""" +import io +import os +import json +import urllib.request +import urllib.parse + +# Cloudflare Workers enforce a 100 MB request body limit at the edge. +# Use multipart upload for payloads larger than this threshold so that +# each individual request stays well below that limit. +_MULTIPART_THRESHOLD = 10 * 1024 * 1024 # 10 MB +_PART_SIZE = 10 * 1024 * 1024 # 10 MB per part (R2 min is 5 MB) + +class storage: + """R2 storage client for containers using HTTP proxy to Worker""" + instance = None + worker_url = None # Set by handler from X-Worker-URL header + + def __init__(self): + # Container accesses R2 through worker.js proxy + # Worker URL is injected via X-Worker-URL header in each request + self.r2_enabled = True + + @staticmethod + def init_instance(entry=None): + """Initialize singleton instance""" + if storage.instance is None: + storage.instance = storage() + return storage.instance + + @staticmethod + def get_instance(): + """Get singleton instance""" + if storage.instance is None: + storage.init_instance() + return storage.instance + + @staticmethod + def set_worker_url(url): + """Set worker URL for R2 proxy (called by handler)""" + storage.worker_url = url + + @staticmethod + def unique_name(name): + """Generate unique name for file""" + import uuid + name_part, extension = os.path.splitext(name) + return f'{name_part}.{str(uuid.uuid4()).split("-")[0]}{extension}' + + # ------------------------------------------------------------------ + # Internal helpers + # ------------------------------------------------------------------ + + def _post_json(self, url: str, body: bytes = b'', content_type: str = 'application/octet-stream'): + """POST *body* to *url* and return the parsed JSON response.""" + req = urllib.request.Request(url, data=body, method='POST') + req.add_header('Content-Type', content_type) + with urllib.request.urlopen(req) as resp: + return json.loads(resp.read().decode('utf-8')) + + def _upload_bytes(self, key: str, data: bytes) -> str: + """Upload *data* to the exact R2 *key* via the worker proxy. + + Uses a single PUT for small payloads and R2 multipart upload for + payloads that exceed _MULTIPART_THRESHOLD (to stay under Cloudflare's + 100 MB per-request edge limit). + + Returns the R2 key. + """ + if len(data) <= _MULTIPART_THRESHOLD: + return self._single_upload(key, data) + return self._multipart_upload(key, data) + + def _single_upload(self, key: str, data: bytes) -> str: + params = urllib.parse.urlencode({'key': key}) + url = f"{storage.worker_url}/r2/upload?{params}" + result = self._post_json(url, data) + return result['key'] + + def _multipart_upload(self, key: str, data: bytes) -> str: + """Split *data* into ≤_PART_SIZE chunks and use R2 multipart upload.""" + # 1. Initiate + params = urllib.parse.urlencode({'key': key}) + init_url = f"{storage.worker_url}/r2/multipart-init?{params}" + init = self._post_json(init_url) + upload_id = init['uploadId'] + upload_key = init['key'] + print(f"[storage] multipart upload initiated: key={upload_key}, uploadId={upload_id}, " + f"total={len(data):,} bytes, parts={-(-len(data)//_PART_SIZE)}") + + # 2. Upload parts + completed_parts = [] + for part_num, offset in enumerate(range(0, len(data), _PART_SIZE), start=1): + chunk = data[offset:offset + _PART_SIZE] + params = urllib.parse.urlencode({ + 'key': upload_key, + 'uploadId': upload_id, + 'partNumber': part_num, + }) + part_url = f"{storage.worker_url}/r2/multipart-part?{params}" + part = self._post_json(part_url, chunk) + completed_parts.append({'partNumber': part['partNumber'], 'etag': part['etag']}) + print(f"[storage] uploaded part {part_num}, etag={part['etag']}") + + # 3. Complete + params = urllib.parse.urlencode({'key': upload_key, 'uploadId': upload_id}) + complete_url = f"{storage.worker_url}/r2/multipart-complete?{params}" + result = self._post_json( + complete_url, + json.dumps({'parts': completed_parts}).encode('utf-8'), + content_type='application/json', + ) + print(f"[storage] multipart upload complete: key={result['key']}") + return result['key'] + + # ------------------------------------------------------------------ + # Public API + # ------------------------------------------------------------------ + + def upload_stream(self, bucket: str, key: str, data): + """Upload data to R2 via worker proxy""" + if not self.r2_enabled: + print("Warning: R2 not configured, skipping upload") + return key + + if not storage.worker_url: + raise RuntimeError("Worker URL not set - cannot access R2") + + # Handle BytesIO objects + if isinstance(data, io.BytesIO): + data = data.getvalue() + + # Convert to bytes if needed + if isinstance(data, str): + data = data.encode('utf-8') + + unique_key = self.unique_name(key) + + try: + return self._upload_bytes(unique_key, data) + except Exception as e: + print(f"R2 upload error: {e}") + raise RuntimeError(f"Failed to upload to R2: {e}") + + def download_stream(self, bucket: str, key: str) -> bytes: + """Download data from R2 via worker proxy""" + if not self.r2_enabled: + raise RuntimeError("R2 not configured") + + if not storage.worker_url: + raise RuntimeError("Worker URL not set - cannot access R2") + + # Download via worker proxy + params = urllib.parse.urlencode({'bucket': bucket, 'key': key}) + url = f"{storage.worker_url}/r2/download?{params}" + + try: + with urllib.request.urlopen(url) as response: + return response.read() + except urllib.error.HTTPError as e: + if e.code == 404: + raise RuntimeError(f"Object not found: {key}") + else: + raise RuntimeError(f"Failed to download from R2: {e}") + except Exception as e: + print(f"R2 download error: {e}") + raise RuntimeError(f"Failed to download from R2: {e}") + + def upload(self, bucket, key, filepath): + """Upload file from disk with unique key generation""" + # Generate unique key to avoid conflicts + unique_key = self.unique_name(key) + with open(filepath, 'rb') as f: + data = f.read() + try: + self._upload_bytes(unique_key, data) + except Exception as e: + raise RuntimeError(f"Failed to upload to R2: {e}") + return unique_key + + def _upload_with_key(self, bucket: str, key: str, data): + """Upload data to R2 via worker proxy with exact key (internal method)""" + if not self.r2_enabled: + print("Warning: R2 not configured, skipping upload") + return + + if not storage.worker_url: + raise RuntimeError("Worker URL not set - cannot access R2") + + # Handle BytesIO objects + if isinstance(data, io.BytesIO): + data = data.getvalue() + + # Convert to bytes if needed + if isinstance(data, str): + data = data.encode('utf-8') + + try: + result_key = self._upload_bytes(key, data) + print(f"[storage._upload_with_key] Upload successful, key={result_key}") + except Exception as e: + print(f"R2 upload error: {e}") + raise RuntimeError(f"Failed to upload to R2: {e}") + + def download(self, bucket, key, filepath): + """Download file to disk""" + data = self.download_stream(bucket, key) + os.makedirs(os.path.dirname(filepath), exist_ok=True) + with open(filepath, 'wb') as f: + f.write(data) + + def download_directory(self, bucket, prefix, local_path): + """ + Download all files with a given prefix to a local directory. + Lists objects via /r2/list endpoint and downloads each one in parallel. + """ + import concurrent.futures + + if not storage.worker_url: + raise RuntimeError("Worker URL not set - cannot access R2") + + # Create local directory + os.makedirs(local_path, exist_ok=True) + + # List objects with prefix via worker proxy + params = urllib.parse.urlencode({'bucket': bucket, 'prefix': prefix}) + list_url = f"{storage.worker_url}/r2/list?{params}" + + try: + with urllib.request.urlopen(list_url) as response: + result = json.loads(response.read().decode('utf-8')) + objects = result.get('objects', []) + + print(f"Found {len(objects)} objects with prefix '{prefix}'") + + def _download_one(obj): + obj_key = obj['key'] + local_file_path = os.path.join(local_path, obj_key) + local_dir = os.path.dirname(local_file_path) + if local_dir: + os.makedirs(local_dir, exist_ok=True) + print(f"Downloading {obj_key} to {local_file_path}") + self.download(bucket, obj_key, local_file_path) + + # Download all objects in parallel (up to 16 concurrent) + with concurrent.futures.ThreadPoolExecutor(max_workers=16) as executor: + futures = [executor.submit(_download_one, obj) for obj in objects] + for fut in concurrent.futures.as_completed(futures): + fut.result() # re-raise any exception + + return local_path + + except Exception as e: + print(f"Error listing/downloading directory: {e}") + raise RuntimeError(f"Failed to download directory: {e}") diff --git a/benchmarks/wrappers/cloudflare/python/handler.py b/benchmarks/wrappers/cloudflare/python/handler.py new file mode 100644 index 00000000..65376c6d --- /dev/null +++ b/benchmarks/wrappers/cloudflare/python/handler.py @@ -0,0 +1,144 @@ +import datetime, io, json, os, uuid, sys, ast +import asyncio +import importlib.util +import traceback +import time +try: + import resource + HAS_RESOURCE = True +except ImportError: + # Pyodide (Python native workers) doesn't support resource module + HAS_RESOURCE = False +from workers import WorkerEntrypoint, Response +from js import fetch as js_fetch, URL + +## sys.path.append(os.path.join(os.path.dirname(__file__), '.python_packages/lib/site-packages')) + +""" +currently assumed file structure: + +handler.py +function/ + function.py + .py + storage.py + nosql.py + +""" + +class Default(WorkerEntrypoint): + async def fetch(self, request, env): + try: + return await self.fetch2(request, env) + except Exception as e: + t = traceback.format_exc() + print(t) + return Response(t) + + async def fetch2(self, request, env): + if "favicon" in request.url: return Response("None") + + # Get unique request ID from Cloudflare (CF-Ray header) + req_id = request.headers.get('CF-Ray', str(uuid.uuid4())) + + # Start timing measurements + start = time.perf_counter() + begin = datetime.datetime.now().timestamp() + + req_text = await request.text() + + event = json.loads(req_text) if len(req_text) > 0 else {} + ## print(event) + + # dirty url parameters parsing, for testing + tmp = request.url.split("?") + if len(tmp) > 1: + urlparams = tmp[1] + urlparams = [chunk.split("=") for chunk in urlparams.split("&")] + for param in urlparams: + try: + event[param[0]] = int(param[1]) + except ValueError: + event[param[0]] = param[1] + except IndexError: + event[param[0]] = None + + ## note: time fixed in worker + income_timestamp = datetime.datetime.now().timestamp() + + event['request-id'] = req_id + event['income-timestamp'] = income_timestamp + + + + from function import storage + + storage.storage.init_instance(self) + + + if hasattr(self.env, 'NOSQL_STORAGE_DATABASE'): + from function import nosql + + nosql.nosql.init_instance(self) + + print("event:", event) + + +## make_benchmark_func() +## function = import_from_path("function.function", "/tmp/function.py") + + from function import function + + ret = function.handler(event) + + log_data = { + 'result': ret['result'] + } + if 'measurement' in ret: + log_data['measurement'] = ret['measurement'] + else: + log_data['measurement'] = {} + + # Add memory usage to measurement (if resource module is available) + if HAS_RESOURCE: + memory_mb = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024.0 + log_data['measurement']['memory_used_mb'] = memory_mb + else: + # Pyodide doesn't support resource module + log_data['measurement']['memory_used_mb'] = 0.0 + + if 'logs' in event: + log_data['time'] = 0 + + if "html" in event: + headers = {"Content-Type" : "text/html; charset=utf-8"} + return Response(str(ret["result"]), headers = headers) + else: + # Trigger a fetch request to update the timer before measuring + # Time measurements only update after a fetch request or R2 operation + try: + # Fetch the worker's own URL with favicon to minimize overhead + final_url = URL.new(request.url) + final_url.pathname = '/favicon' + await js_fetch(str(final_url), method='HEAD') + except: + # Ignore fetch errors + pass + + # Calculate timestamps + end = datetime.datetime.now().timestamp() + elapsed = time.perf_counter() - start + micro = elapsed * 1_000_000 # Convert seconds to microseconds + + return Response(json.dumps({ + 'begin': begin, + 'end': end, + 'compute_time': micro, + 'results_time': 0, + 'result': log_data, + 'is_cold': False, + 'is_cold_worker': False, + 'container_id': "0", + 'environ_container_id': "no_id", + 'request_id': req_id + })) diff --git a/benchmarks/wrappers/cloudflare/python/nosql.py b/benchmarks/wrappers/cloudflare/python/nosql.py new file mode 100644 index 00000000..7a91a94f --- /dev/null +++ b/benchmarks/wrappers/cloudflare/python/nosql.py @@ -0,0 +1,242 @@ +from typing import List, Optional, Tuple +import json +import pickle +from pyodide.ffi import to_js, run_sync +from workers import WorkerEntrypoint, DurableObject + + +class nosql_do: + instance: Optional["nosql_do"] = None + DO_BINDING_NAME = "DURABLE_STORE" + + @staticmethod + def init_instance(entry: WorkerEntrypoint): + nosql_do.instance = nosql_do() + nosql_do.instance.binding = getattr(entry.env, nosql_do.DO_BINDING_NAME) + + + def get_table(self, table_name): + kvapiobj = self.binding.getByName(table_name) + return kvapiobj + + def key_maker(self, key1, key2): + return f"({key1[0]},{str(key1[1])})+({key2[0]},{key2[1]})" + + def key_maker_partial(self, key1, key2): + return f"({key1[0]},{str(key1[1])})+({key2[0]}" + +## these data conversion funcs should not be necessary. i couldn't get pyodide to clone the data otherwise + def data_pre(self, data): + return pickle.dumps(data, 0).decode("ascii") + + def data_post(self, data): + # Handle None (key not found in storage) + if data is None: + return None + # Handle both string and bytes data from Durable Object storage + if isinstance(data, str): + return pickle.loads(bytes(data, "ascii")) + else: + return pickle.loads(data) + + def insert( + self, + table_name: str, + primary_key: Tuple[str, str], + secondary_key: Tuple[str, str], + data: dict, + ): + send_data = self.data_pre(data) + k=self.key_maker(primary_key, secondary_key) + put_res = run_sync(self.get_table(table_name).put(k, send_data)) + return + + ## does this really need different behaviour from insert? + def update( + self, + table_name: str, + primary_key: Tuple[str, str], + secondary_key: Tuple[str, str], + data: dict, + ): + self.insert(table_name, primary_key, secondary_key, data) + return + + + def get( + self, table_name: str, primary_key: Tuple[str, str], secondary_key: Tuple[str, str] + ) -> Optional[dict]: + k=self.key_maker(primary_key, secondary_key) + get_res = run_sync(self.get_table(table_name).get(k)) + ## print(get_res) + return self.data_post(get_res) + + """ + This query must involve partition key - it does not scan across partitions. + """ + + def query( + self, table_name: str, primary_key: Tuple[str, str], secondary_key_name: str + ) -> List[dict]: + + prefix_key = self.key_maker_partial(primary_key, (secondary_key_name,)) + list_res = run_sync(self.get_table(table_name).list()) + + keys = [] + for key in list_res: + if key.startswith(prefix_key): + print(key) + keys.append(key) + ##print("keys", keys) + assert len(keys) <= 100 + + + # todo: please use bulk sometime (it didn't work when i tried it) + res = [] + for key in keys: + + get_res = run_sync(self.get_table(table_name).get(key)) + ## print(get_res) + res.append(self.data_post(get_res)) + return res + + def delete(self, table_name: str, primary_key: Tuple[str, str], secondary_key: Tuple[str, str]): + run_sync(self.get_table(table_name).delete(self.key_maker(primary_key, secondary_key))) + return + + @staticmethod + def get_instance(): + if nosql_do.instance is None: + nosql_do.instance = nosql_do() + return nosql_do.instance + +### ------------------------------ + +class nosql_kv: + + instance: Optional["nosql_kv"] = None + + @staticmethod + def init_instance(entry: WorkerEntrypoint): + nosql_kv.instance = nosql_kv() + nosql_kv.instance.env = entry.env + + def key_maker(self, key1, key2): + return f"{key1[1]}#{key2[1]}" + + def key_maker_partial(self, key1, key2): + return f"{key1[1]}#" + + def index_key(self, primary_key): + return f"__sebs_idx__{primary_key[1]}" + + def get_table(self, table_name): + return getattr(self.env, (table_name)) + + def insert( + self, + table_name: str, + primary_key: Tuple[str, str], + secondary_key: Tuple[str, str], + data: dict, + ): + key_data = {**data} + key_data[primary_key[0]] = primary_key[1] + key_data[secondary_key[0]] = secondary_key[1] + put_res = run_sync( + self.get_table(table_name).put( + self.key_maker(primary_key, secondary_key), + json.dumps(key_data), + ) + ) + + idx_raw = run_sync(self.get_table(table_name).get(self.index_key(primary_key))) + idx = [] + if idx_raw: + idx = json.loads(idx_raw) + if secondary_key[1] not in idx: + idx.append(secondary_key[1]) + run_sync(self.get_table(table_name).put(self.index_key(primary_key), json.dumps(idx))) + return + + def update( + self, + table_name: str, + primary_key: Tuple[str, str], + secondary_key: Tuple[str, str], + data: dict, + ): + existing = self.get(table_name, primary_key, secondary_key) + if existing is None: + existing = {} + merged = {**existing, **data} + merged[primary_key[0]] = primary_key[1] + merged[secondary_key[0]] = secondary_key[1] + put_res = run_sync( + self.get_table(table_name).put( + self.key_maker(primary_key, secondary_key), + json.dumps(merged), + ) + ) + return + + def get( + self, table_name: str, primary_key: Tuple[str, str], secondary_key: Tuple[str, str] + ) -> Optional[dict]: + get_res = run_sync( + self.get_table(table_name).get( + self.key_maker(primary_key, secondary_key) + )) + if get_res is None: + return None + if isinstance(get_res, dict): + return get_res + return json.loads(get_res) + + """ + This query must involve partition key - it does not scan across partitions. + """ + + def query( + self, table_name: str, primary_key: Tuple[str, str], secondary_key_name: str + ) -> List[dict]: + idx_raw = run_sync(self.get_table(table_name).get(self.index_key(primary_key))) + idx = [] + if idx_raw: + idx = json.loads(idx_raw) + + res = [] + for secondary_key_value in idx: + key = f"{primary_key[1]}#{secondary_key_value}" + get_res = run_sync(self.get_table(table_name).get(key)) + if get_res is None: + continue + if isinstance(get_res, dict): + res.append(get_res) + else: + res.append(json.loads(get_res)) + return res + + def delete(self, table_name: str, primary_key: Tuple[str, str], secondary_key: Tuple[str, str]): + run_sync(self.get_table(table_name).delete(self.key_maker(primary_key, secondary_key))) + + idx_raw = run_sync(self.get_table(table_name).get(self.index_key(primary_key))) + idx = [] + if idx_raw: + idx = json.loads(idx_raw) + if secondary_key[1] in idx: + idx = [v for v in idx if v != secondary_key[1]] + run_sync(self.get_table(table_name).put(self.index_key(primary_key), json.dumps(idx))) + + return + + @staticmethod + def get_instance(): + if nosql_kv.instance is None: + nosql_kv.instance = nosql_kv() + return nosql_kv.instance + + + + +nosql = nosql_kv diff --git a/benchmarks/wrappers/cloudflare/python/storage.py b/benchmarks/wrappers/cloudflare/python/storage.py new file mode 100644 index 00000000..cabdb718 --- /dev/null +++ b/benchmarks/wrappers/cloudflare/python/storage.py @@ -0,0 +1,104 @@ +import io +import os +import uuid +from pyodide.ffi import to_js, jsnull, run_sync + +from workers import WorkerEntrypoint + +## all filesystem calls will rely on the node:fs flag +""" layout +/bundle +└── (one file for each module in your Worker bundle) +/tmp +└── (empty, but you can write files, create directories, symlinks, etc) +/dev +├── null +├── random +├── full +└── zero +""" +class storage: + instance = None + + @staticmethod + def unique_name(name): + name, extension = os.path.splitext(name) + return '{name}.{random}{extension}'.format( + name=name, + extension=extension, + random=str(uuid.uuid4()).split('-')[0] + ) + def get_bucket(self, bucket): + # R2 buckets are always bound as 'R2' in wrangler.toml + # The bucket parameter is the actual bucket name but we access via the binding + return self.entry_env.R2 + + @staticmethod + def init_instance(entry: WorkerEntrypoint): + storage.instance = storage() + storage.instance.entry_env = entry.env + storage.instance.written_files = set() + + def upload(self, bucket, key, filepath): + if filepath in self.written_files: + filepath = "/tmp" + os.path.abspath(filepath) + with open(filepath, "rb") as f: + unique_key = self.upload_stream(bucket, key, f.read()) + return unique_key + + def download(self, bucket, key, filepath): + data = self.download_stream(bucket, key) + # should only allow writes to tmp dir. so do have to edit the filepath here? + real_fp = filepath + if not filepath.startswith("/tmp"): + real_fp = "/tmp" + os.path.abspath(filepath) + + self.written_files.add(filepath) + with open(real_fp, "wb") as f: + f.write(data) + return + + def download_directory(self, bucket, prefix, out_path): + bobj = self.get_bucket(bucket) + list_res = run_sync(bobj.list(to_js({"prefix": prefix}))) + for obj in list_res.objects: + file_name = obj.key + path_to_file = os.path.dirname(file_name) + os.makedirs(os.path.join(out_path, path_to_file), exist_ok=True) + self.download(bucket, file_name, os.path.join(out_path, file_name)) + return + + def upload_stream(self, bucket, key, data): + return run_sync(self.aupload_stream(bucket, key, data)) + + async def aupload_stream(self, bucket, key, data): + unique_key = storage.unique_name(key) + if hasattr(data, 'getvalue'): + data = data.getvalue() + if isinstance(data, bytes): + data_js = to_js(data) + else: + data_js = str(data) + bobj = self.get_bucket(bucket) + put_res = await bobj.put(unique_key, data_js) + return unique_key + + def download_stream(self, bucket, key): + return run_sync(self.adownload_stream(bucket, key)) + + async def adownload_stream(self, bucket, key): + bobj = self.get_bucket(bucket) + get_res = await bobj.get(key) + if get_res == jsnull: + print("key not stored in bucket") + return b'' + # Always read as raw binary data (Blob/ArrayBuffer) + data = await get_res.bytes() + return bytes(data) + + @staticmethod + def get_instance(): + if storage.instance is None: + raise RuntimeError("must init storage singleton first") + return storage.instance + return storage.instance diff --git a/configs/cloudflare-test.json b/configs/cloudflare-test.json new file mode 100644 index 00000000..275aa021 --- /dev/null +++ b/configs/cloudflare-test.json @@ -0,0 +1,26 @@ +{ + "experiments": { + "deployment": "cloudflare", + "update_code": false, + "update_storage": false, + "download_results": false, + "architecture": "x64", + "container_deployment": false, + "runtime": { + "language": "nodejs", + "version": "18" + } + }, + "deployment": { + "name": "cloudflare", + "cloudflare": { + "credentials": { + "api_token": "", + "account_id": "", + "r2_access_key_id": "", + "r2_secret_access_key": "" + } + }, + "container": false + } +} diff --git a/configs/systems.json b/configs/systems.json index d649e0bc..3506e3d3 100644 --- a/configs/systems.json +++ b/configs/systems.json @@ -25,6 +25,13 @@ "3.9": "python:3.9-slim", "3.10": "python:3.10-slim", "3.11": "python:3.11-slim" + }, + "arm64": { + "3.7": "python:3.7-slim", + "3.8": "python:3.8-slim", + "3.9": "python:3.9-slim", + "3.10": "python:3.10-slim", + "3.11": "python:3.11-slim" } }, "images": [ @@ -71,12 +78,8 @@ } } }, - "architecture": [ - "x64" - ], - "deployments": [ - "package" - ] + "architecture": ["x64", "arm64"], + "deployments": ["package"] }, "aws": { "languages": { @@ -452,11 +455,97 @@ } } }, - "architecture": [ - "x64" - ], - "deployments": [ - "container" - ] + "architecture": ["x64"], + "deployments": ["container"] + }, + "cloudflare": { + "languages": { + "python": { + "base_images": { + "x64": { + "3.8": "python:3.8-slim", + "3.9": "python:3.9-slim", + "3.10": "python:3.10-slim", + "3.11": "python:3.11-slim", + "3.12": "python:3.12-slim" + } + }, + "container_images": { + "x64": { + "3.8": "python:3.8-slim", + "3.9": "python:3.9-slim", + "3.10": "python:3.10-slim", + "3.11": "python:3.11-slim", + "3.12": "python:3.12-slim" + } + }, + "images": ["build"], + "supported_variants": ["default", "cloudflare"], + "deployment": { + "files": [ + "handler.py", + "storage.py", + "nosql.py" + ], + "packages": [], + "module_packages": {} + }, + "container_deployment": { + "files": [ + "handler.py", + "storage.py", + "nosql.py" + ], + "packages": [], + "module_packages": {} + } + }, + "nodejs": { + "base_images": { + "x64": { + "18": "node:18-slim", + "20": "node:20-slim" + } + }, + "container_images": { + "x64": { + "18": "node:18-slim", + "20": "node:20-slim" + } + }, + "images": ["build"], + "supported_variants": ["default", "cloudflare"], + "deployment": { + "files": [ + "handler.js", + "storage.js", + "nosql.js", + "build.js", + "postprocess.js", + "request-polyfill.js" + ], + "packages": { + "uuid": "3.4.0" + } + }, + "container_deployment": { + "files": [ + "handler.js", + "storage.js", + "nosql.js" + ], + "packages": { + "uuid": "3.4.0" + } + } + } + }, + "images": { + "manage": { + "username": "docker_user" + } + }, + "architecture": ["x64"], + "deployments": ["workers", "container"] } } diff --git a/dockerfiles/cloudflare/Dockerfile.manage b/dockerfiles/cloudflare/Dockerfile.manage new file mode 100644 index 00000000..46ffc5ab --- /dev/null +++ b/dockerfiles/cloudflare/Dockerfile.manage @@ -0,0 +1,36 @@ +FROM node:20-slim + +# Disable telemetry +ENV WRANGLER_SEND_METRICS=false + +# Install system dependencies including Docker CLI +RUN apt-get clean && apt-get update \ + && apt-get install -y ca-certificates curl gnupg gosu python3 python3-pip python3-venv git \ + && install -m 0755 -d /etc/apt/keyrings \ + && curl -fsSL https://download.docker.com/linux/debian/gpg -o /etc/apt/keyrings/docker.asc \ + && chmod a+r /etc/apt/keyrings/docker.asc \ + && echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/debian bookworm stable" > /etc/apt/sources.list.d/docker.list \ + && apt-get update \ + && apt-get install -y docker-ce-cli \ + && apt-get purge -y --auto-remove \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +# Install wrangler and @cloudflare/containers globally +RUN npm install -g wrangler @cloudflare/containers + +# Install uv (fast Python package installer) and pywrangler +# Pin workers-py to 1.8.0: 1.9.x introduced a broken import (rich.logging.Console) +# which does not exist in any version of rich. Remove the pin once upstream fixes it. +RUN curl -LsSf https://astral.sh/uv/install.sh | sh && \ + /root/.local/bin/uv tool install 'workers-py==1.8.0' + +# Add paths to environment +ENV PATH="/root/.local/bin:/root/.local/share/uv/tools/workers-py/bin:${PATH}" + +# Create working directory +RUN mkdir -p /sebs/ +COPY dockerfiles/entrypoint.sh /sebs/entrypoint.sh +RUN chmod +x /sebs/entrypoint.sh + +ENTRYPOINT ["/sebs/entrypoint.sh"] diff --git a/dockerfiles/cloudflare/nodejs/Dockerfile.build b/dockerfiles/cloudflare/nodejs/Dockerfile.build new file mode 100644 index 00000000..204434e7 --- /dev/null +++ b/dockerfiles/cloudflare/nodejs/Dockerfile.build @@ -0,0 +1,22 @@ +ARG BASE_IMAGE=node:20-slim +FROM ${BASE_IMAGE} + +# useradd, groupmod, gosu (needed by entrypoint.sh to drop privileges) +RUN apt-get update && apt-get install -y --no-install-recommends \ + passwd curl ca-certificates \ + && rm -rf /var/lib/apt/lists/* +ENV GOSU_VERSION=1.14 +RUN curl -o /usr/local/bin/gosu -SL "https://github.com/tianon/gosu/releases/download/${GOSU_VERSION}/gosu-$(dpkg --print-architecture)" \ + && chmod +x /usr/local/bin/gosu + +# Install esbuild globally once — benchmark source arrives via bind-mount. +RUN npm install -g esbuild + +RUN mkdir -p /sebs/ +COPY dockerfiles/cloudflare_nodejs_installer.sh /sebs/installer.sh +COPY dockerfiles/entrypoint.sh /sebs/entrypoint.sh +RUN chmod +x /sebs/installer.sh /sebs/entrypoint.sh + +ENV PATH=/usr/sbin:$PATH +CMD ["/bin/bash", "/sebs/installer.sh"] +ENTRYPOINT ["/sebs/entrypoint.sh"] diff --git a/dockerfiles/cloudflare/nodejs/Dockerfile.function b/dockerfiles/cloudflare/nodejs/Dockerfile.function new file mode 100644 index 00000000..1bf6a89c --- /dev/null +++ b/dockerfiles/cloudflare/nodejs/Dockerfile.function @@ -0,0 +1,36 @@ +ARG BASE_IMAGE=node:18-slim +FROM ${BASE_IMAGE} + +# Install system dependencies needed for benchmarks +RUN apt-get update && apt-get install -y --no-install-recommends \ + wget \ + xz-utils \ + && rm -rf /var/lib/apt/lists/* + +WORKDIR /app + +# Copy package files first for better caching +COPY package*.json ./ + +# Install dependencies +RUN npm install --production + +# Copy all application files +COPY . . + +# Run benchmark init script if it exists (e.g., for ffmpeg in video-processing) +# This downloads static binaries needed by the benchmark +# Note: ignore errors from init.sh (e.g., when resources already exist) +RUN if [ -f "init.sh" ]; then \ + chmod +x init.sh && \ + ./init.sh /app verbose x64 || true; \ + fi + +# Expose port 8080 for container communication +EXPOSE 8080 + +# Set environment variable for port +ENV PORT=8080 + +# Start the HTTP server +CMD ["node", "handler.js"] diff --git a/dockerfiles/cloudflare/python/Dockerfile.build b/dockerfiles/cloudflare/python/Dockerfile.build new file mode 100644 index 00000000..283b088e --- /dev/null +++ b/dockerfiles/cloudflare/python/Dockerfile.build @@ -0,0 +1,30 @@ +ARG BASE_IMAGE=python:3.11-slim +FROM ${BASE_IMAGE} + +# useradd, groupmod, gosu (needed by entrypoint.sh to drop privileges) +# curl + ca-certificates are also required by the uv installer; git is sometimes +# pulled in by workers-py when it resolves VCS-declared deps. +RUN apt-get update && apt-get install -y --no-install-recommends \ + passwd curl ca-certificates git \ + && rm -rf /var/lib/apt/lists/* +ENV GOSU_VERSION=1.14 +RUN curl -o /usr/local/bin/gosu -SL "https://github.com/tianon/gosu/releases/download/${GOSU_VERSION}/gosu-$(dpkg --print-architecture)" \ + && chmod +x /usr/local/bin/gosu + +# Install uv (fast Python package manager) and the workers-py toolchain, +# which provides pywrangler for Pyodide-based Cloudflare Worker deploys. +# Pinned to 1.8.0 to match Dockerfile.manage — 1.9.x introduced a broken +# import (rich.logging.Console). Remove the pin once upstream fixes it. +RUN curl -LsSf https://astral.sh/uv/install.sh | sh && \ + /root/.local/bin/uv tool install 'workers-py==1.8.0' + +ENV PATH="/root/.local/bin:/root/.local/share/uv/tools/workers-py/bin:/usr/sbin:${PATH}" + +# Benchmark source arrives via bind-mount at /mnt/function — no COPY . . here. +RUN mkdir -p /sebs/ +COPY dockerfiles/cloudflare_python_installer.sh /sebs/installer.sh +COPY dockerfiles/entrypoint.sh /sebs/entrypoint.sh +RUN chmod +x /sebs/installer.sh /sebs/entrypoint.sh + +CMD ["/bin/bash", "/sebs/installer.sh"] +ENTRYPOINT ["/sebs/entrypoint.sh"] diff --git a/dockerfiles/cloudflare/python/Dockerfile.function b/dockerfiles/cloudflare/python/Dockerfile.function new file mode 100644 index 00000000..e9ecc0e8 --- /dev/null +++ b/dockerfiles/cloudflare/python/Dockerfile.function @@ -0,0 +1,38 @@ +ARG BASE_IMAGE=python:3.11-slim +FROM ${BASE_IMAGE} + +# Install system dependencies needed for benchmarks +RUN apt-get update && apt-get install -y --no-install-recommends \ + wget \ + xz-utils \ + && rm -rf /var/lib/apt/lists/* + +WORKDIR /app + +# Copy all application files first +COPY . . + +# Run benchmark init script if it exists (e.g., for ffmpeg in video-processing) +# This downloads static binaries needed by the benchmark +# Note: ignore errors from init.sh (e.g., when resources already exist) +RUN if [ -f "init.sh" ]; then \ + chmod +x init.sh && \ + ./init.sh /app verbose x64 || true; \ + fi + +# Install dependencies +# Core dependencies for wrapper modules: +# - storage.py uses urllib (stdlib) to proxy R2 requests through worker.js +# - nosql.py, worker.py, handler.py use stdlib only +# Then install benchmark-specific requirements from requirements.txt +RUN pip install --no-cache-dir --upgrade pip && \ + if [ -f requirements.txt ]; then pip install --no-cache-dir -r requirements.txt; fi + +# Expose port 8080 for container communication +EXPOSE 8080 + +# Set environment variable for port +ENV PORT=8080 + +# Start the HTTP server +CMD ["python", "handler.py"] diff --git a/dockerfiles/cloudflare_nodejs_installer.sh b/dockerfiles/cloudflare_nodejs_installer.sh new file mode 100644 index 00000000..dfe4482b --- /dev/null +++ b/dockerfiles/cloudflare_nodejs_installer.sh @@ -0,0 +1,10 @@ +#!/bin/bash +set -e + +cd /mnt/function + +npm install --production +npm install --force esbuild + +node build.js +node postprocess.js diff --git a/dockerfiles/cloudflare_python_installer.sh b/dockerfiles/cloudflare_python_installer.sh new file mode 100644 index 00000000..884afff0 --- /dev/null +++ b/dockerfiles/cloudflare_python_installer.sh @@ -0,0 +1,11 @@ +#!/bin/bash +set -e + +cd /mnt/function + +if [ -f pyproject.toml ]; then + python -c "import tomllib; tomllib.load(open('pyproject.toml','rb'))" + pywrangler --version +fi + +touch .build-validated diff --git a/docs/build.md b/docs/build.md index c6c7f4ba..671a4590 100644 --- a/docs/build.md +++ b/docs/build.md @@ -92,23 +92,70 @@ additive on top of it. ### 1. Declaring variants in a benchmark (`config.json`) A benchmark opts into variant support by using the extended language object syntax in its -`config.json`. The legacy string form (`"python"`) implies only the `"default"` variant. +`config.json`. The legacy string form (`"python"`) implies only the `"default"` variant +and should be kept for languages that have no variant-specific code or configuration. ```json { "timeout": 10, "memory": 128, "languages": [ - { "language": "nodejs", "variants": ["default", "bun", "llrt"] }, - { "language": "python", "variants": ["default", "pypy"] } + "java", + { + "language": "nodejs", + "variants": { + "default": "default", + "bun": "bun", + "llrt": "llrt" + } + }, + { + "language": "python", + "variants": { + "default": "default", + "pypy": "pypy" + } + } ], "modules": [] } ``` +The `variants` field is a **dict** mapping each variant name to the source overlay directory +to apply for that variant (see [section 2](#2-variant-source-code-inside-a-benchmark) below). +The special sentinel value `"default"` means *use the base language directory without any +overlay* — no files are copied from a sub-directory. + SeBS validates this at startup: if you request a variant that is not listed here, the run is rejected with an error. +#### Deployment-mode-split variants + +Some variants behave differently depending on whether the function is deployed as a **code +package** (workers) or as a **container image**. For those cases the overlay directory can be +specified per deployment mode using a nested dict: + +```json +{ + "language": "nodejs", + "variants": { + "default": "default", + "cloudflare": {"workers": "cloudflare", "containers": "default"} + } +} +``` + +The inner dict must use the keys `"workers"` and/or `"containers"`. A missing key means the +variant is not supported in that deployment mode and SeBS will raise an error if it is +requested. A value of `"default"` means no overlay is applied for that mode (the base +language files are used unchanged). + +This is useful when a variant requires platform-specific source changes for one deployment +mode but can reuse the standard implementation for the other. For example, the `cloudflare` +variant of benchmarks that target Cloudflare Workers uses a Pyodide-aware implementation for +the `workers` mode, but falls back to the standard CPython implementation (`"default"`) for +the `containers` mode. + --- ### 2. Variant source code inside a benchmark @@ -120,7 +167,11 @@ language directory of the benchmark: benchmarks//// ``` -Two strategies are supported: +The overlay directory name comes from the value in the `variants` dict (or the inner +`workers`/`containers` value for deployment-mode-split variants). When that value is +`"default"`, no sub-directory is consulted and the base language files are used as-is. + +Two strategies are supported for non-`"default"` overlay directories: #### Patch variant (small targeted changes) diff --git a/docs/platforms.md b/docs/platforms.md index 14aae20c..c63acf6d 100644 --- a/docs/platforms.md +++ b/docs/platforms.md @@ -1,6 +1,6 @@ # Platform Configuration -SeBS supports three commercial serverless platforms: AWS Lambda, Azure Functions, and Google Cloud Functions. +SeBS supports four commercial serverless platforms: AWS Lambda, Azure Functions, Google Cloud Functions, and Cloudflare Workers. Furthermore, we support the open source FaaS system OpenWhisk. The file `configs/example.json` contains all parameters that users can change @@ -17,6 +17,7 @@ Supported platforms: * [Amazon Web Services (AWS) Lambda](#aws-lambda) * [Microsoft Azure Functions](#azure-functions) * [Google Cloud (GCP) Functions](#google-cloud-functions) +* [Cloudflare Workers](#cloudflare-workers) * [OpenWhisk](#openwhisk) ## Storage Configuration @@ -334,6 +335,185 @@ The current GCP backend has the following practical limits: * GCP deployments currently reject `arm64`, as arm64 instances are not available for GCR. * C++ packaging is not supported on GCP (but possible to be implemented on containers). +## Cloudflare Workers + +> [!NOTE] +> **Terminology mapping**: SeBS uses the term *function* throughout its CLI and configuration. On Cloudflare, the equivalent unit of deployment is a **Worker**. Wherever SeBS refers to a function (e.g. `--function-name`, `create_function`, `CloudflareWorker`), it refers to a Cloudflare Worker script deployed to `{name}.{account}.workers.dev`. + +Cloudflare offers a free tier for Workers with generous limits for development and testing. To use Cloudflare Workers with SeBS, you need to create a Cloudflare account and obtain API credentials. + +### Credentials + +SeBS supports both authentication methods Cloudflare offers. Both are +functionally equivalent for SeBS: every API call, R2 upload, KV +operation, and `wrangler` invocation works with either. Pick based on +your Cloudflare account, not on SeBS features: + +- **API Token (recommended)**: A scoped credential you mint in the + Cloudflare dashboard. It can be limited to the permissions SeBS needs + and revoked independently, so this is the safest default for most + users. +- **Email + Global API Key (legacy)**: Your account email plus the + Global API Key from the Cloudflare dashboard. SeBS still supports this + path for older setups and accounts that cannot use scoped tokens, but + it grants broad account access and should be handled more carefully. + +Regardless of which method you choose, you also need your account ID +from the Cloudflare dashboard. + +You can pass credentials using environment variables: + +```bash +# Option 1: API Token (recommended) +export CLOUDFLARE_API_TOKEN="your-api-token" +export CLOUDFLARE_ACCOUNT_ID="your-account-id" + +# Option 2: Email + Global API Key (legacy) +export CLOUDFLARE_EMAIL="your-email@example.com" +export CLOUDFLARE_API_KEY="your-global-api-key" +export CLOUDFLARE_ACCOUNT_ID="your-account-id" +``` + +or in the JSON configuration file: + +```json +"deployment": { + "name": "cloudflare", + "cloudflare": { + "credentials": { + "api_token": "your-api-token", + "account_id": "your-account-id" + }, + "resources": { + "resources_id": "unique-resource-id" + } + } +} +``` + +**Note**: The `resources_id` is used to uniquely identify and track resources created by SeBS for a specific deployment. + +### Language Support + +Cloudflare Workers support multiple languages through different deployment methods: + +- **JavaScript/Node.js**: Supported via script-based deployment or container-based deployment using Wrangler CLI +- **Python**: Supported via script-based deployment or container-based deployment using Wrangler CLI + +### CLI Container + +SeBS uses a containerized CLI approach for Cloudflare deployments, eliminating the need to install Node.js, npm, wrangler, pywrangler, or uv on your host system. The CLI container (`spcleth/serverless-benchmarks:manage.cloudflare`) is pulled from Docker Hub on first use and contains all necessary tools. This ensures consistent behavior across platforms and simplifies setup — only Docker is required. + +To build and push an updated `manage.cloudflare` image (developers only): + +```bash +sebs docker build --deployment cloudflare --image-type manage +sebs docker push --deployment cloudflare --image-type manage +``` + +#### Shared singleton and lifecycle + +`CloudflareCLI` is a process-wide singleton: both the script-based (`workers.py`) and container-based (`containers.py`) deployment handlers share a single `manage.cloudflare` Docker container. The first call to `CloudflareCLI.get_instance()` starts the container and registers a shutdown hook via `atexit`; subsequent calls from any handler or thread return the already-running instance. + +This has two consequences: + +- **Thread safety during creation** — `get_instance()` uses a double-checked lock so that when multiple benchmarks run in parallel (e.g. during `sebs regression`), only one thread starts the container while the others wait. +- **Lifecycle** — individual deployment handlers (and `Cloudflare.shutdown()`) drop their local reference to the instance but do not stop the container. The container is stopped exactly once at process exit by the `atexit` hook, regardless of whether SeBS is invoked directly (`sebs benchmark invoke`) or through the regression suite. + +### Deployment Architecture + +SeBS supports two deployment paths for Cloudflare: **script-based Workers** (native Workers runtime) and **container-based Workers** (Cloudflare's managed container runtime, fronted by a Durable-Object-backed Worker). Both paths share the same credentials, R2/KV resources, and HTTP trigger; they differ only in how code is packaged and which Cloudflare runtime executes it. The deployment type is controlled by the benchmark's `container_deployment` flag. + +#### Python modules (`sebs/cloudflare/`) + +| File | Responsibility | +|------|----------------| +| `cloudflare.py` | `Cloudflare(System)` facade. Verifies credentials, enforces `SUPPORTED_BENCHMARKS`, resolves the `workers.dev` URL, and dispatches `package_code`/`create_function`/`update_function` to the correct handler via `_get_deployment_handler(container_deployment)`. | +| `workers.py` | `CloudflareWorkersDeployment` — native script packaging. Node.js is bundled with esbuild via `nodejs/Dockerfile.build`; Python generates a `pyproject.toml` and is validated via `python/Dockerfile.build` (Pyodide resolution happens server-side at deploy time). | +| `containers.py` | `CloudflareContainersDeployment` — container packaging. Copies the per-language `Dockerfile.function` into the code directory, injects the `worker.js` orchestrator (Node-only, required by `@cloudflare/containers`), merges `package.json`, runs `npm install`, and builds a local image as a cache anchor. | +| `cli.py` | `CloudflareCLI` — runs the `manage.cloudflare` Docker container with the Docker socket mounted and exposes `wrangler_deploy`, `pywrangler_deploy`, `docker_build`, `upload_package`. Used by both deployment handlers; `cloudflare.py` never calls `wrangler` directly. | +| `config.py` | `CloudflareCredentials` / `CloudflareConfig` — API token, account ID, R2 keys. | +| `resources.py` | `CloudflareSystemResources` — factories for R2 and KV/Durable Objects. | +| `function.py` | `CloudflareWorker(Function)` — cached function metadata. | +| `triggers.py` | `HTTPTrigger` — invokes the deployed Worker at `https://{name}.{account}.workers.dev`. | +| `r2.py`, `kvstore.py` | Object and NoSQL storage clients. | + +Wrangler templates live alongside the deployment code at `sebs/cloudflare/templates/wrangler-worker.toml` and `sebs/cloudflare/templates/wrangler-container.toml` so they ship with the pip-packaged `sebs`. + +#### Dockerfiles (`dockerfiles/cloudflare/`) + +| File | Purpose | +|------|---------| +| `Dockerfile.manage` | Builds the `manage.cloudflare` CLI image (Node + global `wrangler` + `pywrangler` via `uv` + Docker CLI). Driven by `cli.py`. | +| `nodejs/Dockerfile.build` | Build image for **script-based** Node.js workers. Pulled once per session; benchmark source is bind-mounted to `/mnt/function` at build time and `cloudflare_nodejs_installer.sh` runs `npm install`, `esbuild`, and the benchmark's `build.js`/`postprocess.js` inside it. | +| `python/Dockerfile.build` | Build image for **script-based** Python workers. Pulled once per session; benchmark source is bind-mounted to `/mnt/function` at build time and `cloudflare_python_installer.sh` validates that `pywrangler` accepts the generated `pyproject.toml`. | +| `nodejs/Dockerfile.function` | Runtime image for **container-based** Node.js functions. Parameterized via `ARG BASE_IMAGE` from `config/systems.json`. Copied into the package by `containers.py` and rebuilt by `wrangler deploy`. | +| `python/Dockerfile.function` | Runtime image for **container-based** Python functions. Same parameterization. | + +#### Script-based flow (`container_deployment=false`) + +1. `benchmark.build()` → `CloudflareWorkersDeployment.package_code` copies source files into the package directory. +2. `Benchmark.install_dependencies()` pulls the matching `spcleth/serverless-benchmarks:build.cloudflare..` build image (see [Build Images](#build-images) below), bind-mounts the package directory to `/mnt/function`, and runs `/sebs/installer.sh` (`cloudflare_nodejs_installer.sh` or `cloudflare_python_installer.sh`) inside the container. +3. `Cloudflare.create_function` → `_create_or_update_worker` renders `sebs/cloudflare/templates/wrangler-worker.toml` into the package. +4. `CloudflareCLI.wrangler_deploy` (Node) or `pywrangler_deploy` (Python) deploys via the `manage.cloudflare` container. +5. `HTTPTrigger` is attached using the `workers.dev` URL. + +#### Container-based flow (`container_deployment=true`) + +1. **Local image build** — `benchmark.build()` calls `container_client.build_base_image()` on the `_CloudflareContainerAdapter` in `cloudflare.py`, which delegates to `CloudflareContainersDeployment.package_code`. It copies `{language}/Dockerfile.function` as `Dockerfile`, adds `worker.js`, merges `package.json`, and builds a local Docker image tagged `:` (e.g. `my-benchmark-python-312:20260426-130338`). The correct `BASE_IMAGE` is passed via Docker build args (resolved from `systems.json`). A timestamp tag is used instead of `:latest` because Cloudflare's registry explicitly rejects `:latest` tags. + +2. **Registry push** — `Cloudflare.create_function` → `_create_or_update_worker` calls `CloudflareCLI.containers_push(:)`, which runs `wrangler containers push` inside the `manage.cloudflare` container. Wrangler uploads the locally-built image to Cloudflare's managed registry and returns the full registry URI: `registry.cloudflare.com//:`. + +3. **`wrangler.toml` generation** — `_generate_wrangler_toml` renders `sebs/cloudflare/templates/wrangler-container.toml`. The template defaults to `image = "./Dockerfile"` (a local build path). When a registry URI is available, `containers.py` replaces this field with the registry URI (`config['containers'][0]['image'] = container_uri`), so wrangler points directly at the pre-pushed image and skips rebuilding the Dockerfile entirely. + +4. **Deploy** — `CloudflareCLI.wrangler_deploy` runs `npm install && wrangler deploy` inside the `manage.cloudflare` container. `npm install` materializes `node_modules/@cloudflare/containers` (listed in `package.json`) so that wrangler's bundler can resolve the `worker.js` import. Wrangler then deploys the Worker script and creates the Durable-Object-backed container worker backed by the registry image. + +5. **Rollout and instance readiness wait** — `wrangler deploy` compares the newly pushed registry image digest against the image currently running in the container worker. If the digest has changed, Cloudflare starts a rollout: it pulls the new image, replaces running instances, and sets `active_rollout_id` on the container application record for the duration. SeBS polls `GET /accounts/{id}/containers/applications/{uuid}` every 20 s in two phases: first it waits for `active_rollout_id` to disappear (rollout complete, can take up to 10 minutes for large containers), then it waits for `health.instances.healthy >= max_instances`. The `health.instances` sub-object tracks runtime state and is not formally documented by Cloudflare (derived from observed API responses): `starting` = still booting (image pull + Firecracker init), `healthy` = passed health check and ready to serve, `active` = currently handling a request (always 0 until the first invocation). The readiness threshold is `max_instances`, not the top-level `instances` field — in practice `instances = max_instances + 1` because Cloudflare counts one extra Durable Object coordination instance that never appears as healthy. Only once an instance is confirmed running does SeBS proceed to invoke the benchmark, avoiding the "no Container instance available" Durable Object error that would otherwise occur on cold starts. If wrangler reported "no changes" (digest unchanged), no rollout is started and this wait is skipped entirely. + +6. `HTTPTrigger` is attached using the `workers.dev` URL. + +### Build Images + +Script-based Worker builds use pre-built build images that are pulled once and reused across all benchmarks via bind-mounts — this is the same pattern SeBS uses for other platforms (see [build.md](build.md)). The images are tagged `spcleth/serverless-benchmarks:build.cloudflare..` (e.g. `build.cloudflare.nodejs.18`, `build.cloudflare.python.3.12`) and are available on Docker Hub. + +To build and push updated images yourself (e.g. after modifying a `Dockerfile.build` or an installer script): + +```bash +# Build all Cloudflare toolchain images locally +sebs docker build --deployment cloudflare + +# Push them to Docker Hub (requires push access to the repository) +sebs docker push --deployment cloudflare +``` + +To use a different Docker Hub repository, change `['general']['docker_repository']` in `configs/systems.json`. + +### Trigger Support + +- **HTTP Trigger**: ✅ Fully supported - Workers are automatically accessible at `https://{name}.{account}.workers.dev` +- **Library Trigger**: ❌ Not currently supported + +### Platform Limitations + +- **Cold Start Detection**: Cloudflare does not expose cold start information. All invocations report `is_cold: false` in the metrics. This limitation means cold start metrics are not available for Cloudflare Workers benchmarks. +- **Memory/Timeout Configuration (Workers)**: Managed by Cloudflare (128MB memory, 30s CPU time on free tier) +- **Memory/Timeout Configuration (Containers)**: Managed by Cloudflare, available in different tiers: + + | Instance Type | vCPU | Memory | Disk | + |---------------|------|--------|------| + | lite | 1/16 | 256 MiB | 2 GB | + | basic | 1/4 | 1 GiB | 4 GB | + | standard-1 | 1/2 | 4 GiB | 8 GB | + | standard-2 | 1 | 6 GiB | 12 GB | + | standard-3 | 2 | 8 GiB | 16 GB | + | standard-4 | 4 | 12 GiB | 20 GB | +- **Wall-Clock Timing**: Cloudflare Workers freezes `Date.now()` and `performance.now()` between I/O operations as a timing side-channel mitigation, so the clock does not advance inside pure-compute sections. To record a meaningful wall-clock `compute_time`, the handler issues a throwaway self-fetch (a `HEAD /favicon` request) before sampling the end time. This I/O call unfreezes the timer. See the [Cloudflare security model docs](https://developers.cloudflare.com/workers/reference/security-model/#step-1-disallow-timers-and-multi-threading) for details. +- **Metrics Collection**: Uses response-based per-invocation metrics. During each function invocation, the worker handler measures performance metrics (CPU time, wall time, memory usage) and embeds them directly in the JSON response. SeBS extracts these metrics immediately from each response. When `download_metrics()` is called for postprocessing, it only aggregates the metrics that were already collected during invocations—no additional data is fetched from external services. This approach provides immediate per-invocation granularity without delays. Note that while Cloudflare does expose an Analytics Engine, it only provides aggregated metrics without individual request-level data, making it unsuitable for detailed benchmarking purposes. + +### Storage Configuration + +Cloudflare Workers integrate with Cloudflare R2 for object storage and Durable Objects for NoSQL storage. For detailed storage configuration, see the [storage documentation](storage.md#cloudflare-storage). + ## OpenWhisk SeBS expects users to deploy and configure an OpenWhisk instance. diff --git a/docs/storage.md b/docs/storage.md index 2f6cc54c..011aa0bb 100644 --- a/docs/storage.md +++ b/docs/storage.md @@ -129,6 +129,55 @@ healthy: 192.168.0.20:9012 ``` ``` +## Cloudflare Storage + +Cloudflare Workers integrate with cloud-native storage services provided by Cloudflare: + +### R2 Object Storage + +Cloudflare R2 provides S3-compatible object storage for benchmarks that require persistent file storage. SeBS automatically configures R2 buckets for benchmark input and output data. + +**Key Features:** +- S3-compatible API +- No egress fees +- Global edge storage +- Integrated with Workers through bindings + +**Configuration:** +R2 configuration is handled automatically by SeBS when deploying to Cloudflare Workers. The storage resources are defined in your deployment configuration and SeBS manages bucket creation and access. + +**Limitations:** +- Geographic location hints (locationHint) are not currently supported. R2 buckets are created with Cloudflare's automatic location selection, which places data near where it's most frequently accessed. + +### Container Upload Behavior (R2 Proxy) + +For Cloudflare container deployments, benchmark code does not talk to R2 directly. Instead, container wrappers call the Worker proxy endpoints (`/r2/upload`, `/r2/multipart-init`, `/r2/multipart-part`, `/r2/multipart-complete`). + +**Why a proxy (and not a direct storage wrapper like other platforms)?** +On other platforms (AWS/GCP/Azure), the storage wrapper can be an SDK call because the function runtime and object store share a credential/SDK surface. Cloudflare R2 is different: the supported access path for Workers is the R2 **binding** (`env.R2_BUCKET`), which is a runtime object injected only inside the Worker runtime. A Cloudflare container runs in a separate runtime and has no access to that binding, so a container-side "storage wrapper" has nowhere to call. The only direct alternative is R2's S3-compatible HTTPS API, which would require provisioning R2 access keys and shipping them into each container — a second credential model that diverges from how the native Worker benchmarks talk to R2. Routing container storage calls through the parent Worker keeps a single code path and single credential model for both deployment types; the container-side `storage.js` wrapper still exists and still exposes the SeBS storage interface, it just implements those operations by forwarding to the Worker that holds the binding. + +**Upload strategy:** +- Small payloads use a single upload request. +- Large payloads use multipart upload (10 MB threshold, 10 MB part size in current wrappers). +- Node.js container wrapper retries with multipart when single-upload fails with size/body-limit style errors. + +**Object keys and uniqueness:** +- Container wrappers generate unique output keys (suffix based on UUID fragment) before upload. +- This avoids collisions and keeps run-specific output objects distinct in regression and repeated invocations. + +### KVStore for NoSQL + +Cloudflare KV namespaces are used for NoSQL operations required by benchmarks such as CRUD API (130.crud-api). + +**Key Features:** +- Native Workers integration through KV bindings +- Simple key-value interface compatible with SeBS NoSQL wrapper operations +- Global edge distribution for read-heavy access patterns + +**Usage:** +SeBS configures KV namespace bindings automatically for Cloudflare deployments that require NoSQL storage. Benchmark wrappers access KV through the standard SeBS NoSQL interface (insert/update/get/query/delete). + + ## Lifecycle Management By default, storage containers are retained after experiments complete. This allows you to run multiple experiments without redeploying and repopulating storage. diff --git a/pyproject.toml b/pyproject.toml index a092d381..377f19b5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -39,6 +39,9 @@ dependencies = [ "pycurl>=7.43", "click>=7.1.2", "rich", + "patch-ng", + "tomli ; python_version < '3.11'", + "tomli_w", # Storage & Local "minio==5.0.10", @@ -106,6 +109,7 @@ packages = [ "sebs.aws", "sebs.azure", "sebs.gcp", + "sebs.cloudflare", "sebs.local", "sebs.openwhisk", "sebs.faas", @@ -149,6 +153,9 @@ sebs = ["py.typed"] "sebs.dockerfiles" = [ "**/*", ] +"sebs.cloudflare" = [ + "templates/*.toml", +] "sebs.tools" = [ "**/*.py", ] diff --git a/sebs/benchmark.py b/sebs/benchmark.py index 32cf62b9..7d9d8464 100644 --- a/sebs/benchmark.py +++ b/sebs/benchmark.py @@ -40,65 +40,94 @@ class LanguageSpec: """ Represents a language with its supported variants for a benchmark. - Parses the config language settings, supports both the legacy format - (e.g. "python") and the new dict format: + Parses the config.json ``languages`` entries. Supports three formats: - {"language": "nodejs", "variants": ["default", "bun", "llrt"]} + * Legacy string: ``"python"`` + → treated as ``{"default": "default"}`` - The legacy format is treated as having just the "default" variant. - """ + * New dict with simple (non-deployment-split) variants:: - def __init__(self, language: "Language", variants: List[str]): - """Initialize a language specification. + {"language": "nodejs", "variants": {"default": "default", "bun": "bun"}} - Args: - language: The programming language - variants: List of supported runtime variants for this language - """ + The value for each key is either the literal sentinel ``"default"`` + (meaning: use the base language directory, no overlay) or a subdirectory + name to use as an overlay (e.g. ``"cloudflare"`` → ``nodejs/cloudflare/``). + + * New dict with deployment-mode-split variants:: + + { + "language": "nodejs", + "variants": { + "default": "default", + "cloudflare": {"workers": "cloudflare", "containers": "default"} + } + } + + When the value is itself a dict, the keys are deployment modes + (``"workers"`` / ``"containers"``) and the values follow the same + sentinel / subdirectory convention. A missing mode key means the + benchmark is not supported in that deployment mode. + """ + + def __init__(self, language: "Language", variants: Dict[str, Any]): + """Store the language and its variant-to-directory mapping.""" self._language = language self._variants = variants @property def language(self) -> "Language": - """Get the programming language. - - Returns: - Language: The programming language - """ + """The programming language this spec applies to.""" return self._language @property - def variants(self) -> List[str]: - """Get the list of supported runtime variants. - - Returns: - List[str]: List of variant names (e.g., ["default", "pypy"]) - """ + def variants(self) -> Dict[str, Any]: + """Variant map: variant name → directory name or deployment-mode dict.""" return self._variants - @staticmethod - def deserialize(val) -> LanguageSpec: - """Deserialize a language specification from config. + def resolve_dir(self, variant: str, container_deployment: bool) -> str: + """Return the source subdirectory name for *variant* + deployment mode. - Args: - val: Either a string (legacy format) or dict with language and variants + Returns ``"default"`` (sentinel) when the base language directory should + be used without any overlay. Returns a subdirectory name (e.g. + ``"cloudflare"``) when an overlay should be applied from that subdir. - Returns: - LanguageSpec: Deserialized language specification + Raises ``RuntimeError`` when the variant or deployment mode is not + supported. """ + entry = self._variants.get(variant) + if entry is None: + raise RuntimeError( + f"Variant '{variant}' not declared for language {self._language.value}" + ) + if isinstance(entry, dict): + mode = "containers" if container_deployment else "workers" + dir_name = entry.get(mode) + if dir_name is None: + raise RuntimeError( + f"Variant '{variant}' does not support deployment mode '{mode}' " + f"for language {self._language.value}" + ) + return dir_name + return entry # str: "default" or a subdir name + + @staticmethod + def deserialize(val) -> "LanguageSpec": + """Build a LanguageSpec from a config.json language entry (string or dict).""" if isinstance(val, str): - return LanguageSpec(Language.deserialize(val), ["default"]) - return LanguageSpec( - Language.deserialize(val["language"]), - val.get("variants", ["default"]), - ) + # Legacy: "python" → only the default variant + return LanguageSpec(Language.deserialize(val), {"default": "default"}) + variants = val.get("variants") + if variants is None: + variants = {"default": "default"} + elif isinstance(variants, list): + # Old list format: ["default", "cloudflare"] + # Each name maps to itself ("default" stays as the sentinel). + variants = {v: v for v in variants} + # else: already the new dict format + return LanguageSpec(Language.deserialize(val["language"]), variants) def serialize(self) -> dict: - """Serialize the language specification to a dictionary. - - Returns: - dict: Dictionary with language and variants keys - """ + """Return a serializable dict of the language and its variant mapping.""" return { "language": self._language.value, "variants": self._variants, @@ -218,13 +247,20 @@ def supported_variants(self, language: Language) -> List[str]: or [] if the language has no implementation in this benchmark.""" for spec in self._language_specs: if spec.language == language: - return spec.variants + return list(spec.variants.keys()) return [] def supports(self, language: Language, variant: str) -> bool: """Return True when language + variant combination is declared in config.json.""" return variant in self.supported_variants(language) + def get_language_spec(self, language: Language) -> "LanguageSpec": + """Return the LanguageSpec for *language*, raising if not found.""" + for spec in self._language_specs: + if spec.language == language: + return spec + raise RuntimeError(f"Language {language.value} not declared in benchmark config") + @staticmethod def deserialize(json_object: dict) -> BenchmarkConfig: """ @@ -471,6 +507,38 @@ def language_variant(self) -> str: """ return self._language_variant + def select_variant(self, variant: str) -> None: + """Switch the active language variant and refresh the cache state. + + Should be called before build() whenever the deployment platform + needs to override the variant that was set from the experiment config. + Re-queries the cache with the updated variant key and re-applies + the update_code flag if it was set. + + Args: + variant: New variant name (e.g. "cloudflare"). + + Raises: + RuntimeError: If the variant is not declared for this benchmark. + """ + if not self.benchmark_config.supports(self.language, variant): + raise RuntimeError( + f"Variant '{variant}' is not declared for benchmark " + f"{self.benchmark} language {self.language_name}" + ) + self._language_variant = variant + self._output_dir = os.path.join( + self._output_dir_base, + self._language.value, + self._language_variant, + self._language_version, + self._architecture, + "container" if self._system_variant.is_container else "package", + ) + self.query_cache() + if self._experiment_config.update_code: + self._is_cached_valid = False + @property def language_version(self) -> str: """ @@ -544,8 +612,13 @@ def hash(self) -> str: """ path = os.path.join(self.benchmark_path, self.language_name) self._hash_value = Benchmark.hash_directory( - path, self._deployment_name, self.language, self._language_variant + path, + self._deployment_name, + self.language, + self._language_variant, + container_deployment=self._system_variant.is_container, ) + assert self._hash_value is not None return self._hash_value @hash.setter # noqa: A003 @@ -625,9 +698,9 @@ def __init__( self._docker_client = docker_client self._system_config = system_config self._code_location: Optional[str] = None + self._output_dir_base = os.path.join(output_dir, f"{benchmark}_code") self._output_dir = os.path.join( - output_dir, - f"{benchmark}_code", + self._output_dir_base, self._language.value, self._language_variant, self._language_version, @@ -663,7 +736,11 @@ def __init__( @staticmethod def hash_directory( - directory: str, deployment: str, language: Language, variant: str = "default" + directory: str, + deployment: str, + language: Language, + variant: str = "default", + container_deployment: bool = False, ): """ Compute MD5 hash of an entire directory. @@ -730,6 +807,18 @@ def hash_directory( else: with open(f, "rb") as opened_file: hash_sum.update(opened_file.read()) + # For Cloudflare Python containers, also hash the nodejs/container worker.js. + # worker.js is shared between Node.js and Python container builds + # (@cloudflare/containers is Node.js-only), so containers.py copies it from + # nodejs/container/ into every container build directory regardless of language. + # Python's wrapper glob would otherwise miss it and stale builds wouldn't invalidate. + if deployment == "cloudflare" and language == Language.PYTHON and container_deployment: + nodejs_worker = get_resource_path( + "benchmarks", "wrappers", "cloudflare", "nodejs", "container", "worker.js" + ) + if os.path.isfile(str(nodejs_worker)): + with open(str(nodejs_worker), "rb") as worker_file: + hash_sum.update(worker_file.read()) return hash_sum.hexdigest() def serialize(self) -> dict: @@ -804,52 +893,59 @@ def copy_code(self, output_dir: str) -> None: shutil.copy2(nodejs_package_json, os.path.join(output_dir, "package.json")) if self._language_variant != "default": - variant_dir = os.path.join(path, self._language_variant) - if not os.path.isdir(variant_dir): - raise RuntimeError( - "Variant directory not found for benchmark {} language {} " - "variant {}: {}".format( - self.benchmark, self.language_name, self._language_variant, variant_dir - ) - ) + lang_spec = self.benchmark_config.get_language_spec(self.language) + overlay_dir_name = lang_spec.resolve_dir( + self._language_variant, self._system_variant.is_container + ) - patch_file = os.path.join(variant_dir, "patch.diff") - if os.path.exists(patch_file): - # Patch-based variant: a unified diff (patch.diff) is applied on top of the - # default implementation. Use this when the variant only needs small - # targeted changes to the base code (e.g. swapping async I/O for sync I/O - # in a runtime that lacks full async support). - # Apply unified diff on top of the already-copied base files - import patch_ng - - pset = patch_ng.fromfile(patch_file) - if not pset or not pset.apply(strip=1, root=output_dir): + if overlay_dir_name != "default": + variant_dir = os.path.join(path, overlay_dir_name) + if not os.path.isdir(variant_dir): raise RuntimeError( - "Failed to apply patch {} for variant {}".format( - patch_file, self._language_variant + "Variant directory not found for benchmark {} language {} " + "variant {}: {}".format( + self.benchmark, self.language_name, self._language_variant, variant_dir + ) + ) + + # Variants come in two flavors and this is where we split between them: + # 1. Patch-based (patch.diff present): apply a unified diff on top of the + # already-copied base files. Use when the variant only needs small, + # targeted edits to the default implementation (e.g. swapping async I/O + # for sync I/O in a runtime that lacks full async support). + # 2. Copy-based (no patch.diff): overlay the variant directory's files on + # top of the base files, replacing any that collide. Use when the + # variant diverges enough that a patch would be unwieldy. + patch_file = os.path.join(variant_dir, "patch.diff") + if os.path.exists(patch_file): + import patch_ng + + pset = patch_ng.fromfile(patch_file) + if not pset or not pset.apply(strip=1, root=output_dir): + raise RuntimeError( + "Failed to apply patch {} for variant {}".format( + patch_file, self._language_variant + ) + ) + self.logging.info( + "Applied patch for variant {} ({})".format( + self._language_variant, patch_file + ) + ) + else: + for file_type in FILES[self.language]: + for f in glob.glob(os.path.join(variant_dir, file_type)): + shutil.copy2(f, output_dir) + nodejs_variant_pkg = os.path.join( + variant_dir, f"package.json.{self.language_version}" + ) + if os.path.exists(nodejs_variant_pkg): + shutil.copy2(nodejs_variant_pkg, os.path.join(output_dir, "package.json")) + self.logging.info( + "Applied file overlay for variant {} (dir: {})".format( + self._language_variant, overlay_dir_name ) ) - self.logging.info( - "Applied patch for variant {} ({})".format(self._language_variant, patch_file) - ) - else: - # Overlay-based variant: the variant directory contains a complete - # replacement set of source files that fully override the default - # implementation. All files from the variant directory are copied - # on top of the already-placed base files. Use this when the variant - # is substantially different from the default (e.g. a full rewrite). - for file_type in FILES[self.language]: - for f in glob.glob(os.path.join(variant_dir, file_type)): - shutil.copy2(f, output_dir) - # version-specific package.json override for Node.js - nodejs_variant_pkg = os.path.join( - variant_dir, f"package.json.{self.language_version}" - ) - if os.path.exists(nodejs_variant_pkg): - shutil.copy2(nodejs_variant_pkg, os.path.join(output_dir, "package.json")) - self.logging.info( - "Applied file overlay for variant {}".format(self._language_variant) - ) def add_benchmark_data(self, output_dir: str) -> None: """Add benchmark-specific data and assets to output directory. diff --git a/sebs/cli.py b/sebs/cli.py index ec7bab57..58c45981 100755 --- a/sebs/cli.py +++ b/sebs/cli.py @@ -114,7 +114,7 @@ def common_params(func): @click.option( "--deployment", default=None, - type=click.Choice(["azure", "aws", "gcp", "local", "openwhisk"]), + type=click.Choice(["azure", "aws", "gcp", "local", "openwhisk", "cloudflare"]), help="Cloud deployment to use.", ) @click.option( @@ -498,12 +498,29 @@ def package( help="Filter resource IDs and URls from output.", ) @common_params +@click.option( + "--cache", + default=os.path.join(os.path.curdir, "regression-cache"), + help="Location of experiments cache.", +) +@click.option( + "--output-dir", + default=os.path.join(os.path.curdir, "regression-output"), + help="Output directory for results.", +) +@click.option( + "--deployment-type", + default=None, + type=click.Choice(["functions", "containers"]), + help="Limit regression to a specific deployment type (functions or containers).", +) def regression( benchmark_input_size, benchmark_name, storage_configuration, selected_architecture, filter_output, + deployment_type, **kwargs, ): """Run regression test suite across benchmarks.""" @@ -529,6 +546,8 @@ def regression( config, kwargs["resource_prefix"], benchmark_name, + deployment_type, + benchmark_input_size, architecture, filter_output, ) @@ -988,7 +1007,7 @@ def docker_cmd(): @click.option( "--deployment", default=None, - type=click.Choice(["local", "aws", "azure", "gcp", "openwhisk"]), + type=click.Choice(["local", "aws", "azure", "gcp", "openwhisk", "cloudflare"]), help="Deployment platform to build images for", ) @click.option( @@ -1070,7 +1089,7 @@ def docker_build( @click.option( "--deployment", default=None, - type=click.Choice(["local", "aws", "azure", "gcp", "openwhisk"]), + type=click.Choice(["local", "aws", "azure", "gcp", "openwhisk", "cloudflare"]), help="Deployment platform to push images for", ) @click.option( diff --git a/sebs/cloudflare/__init__.py b/sebs/cloudflare/__init__.py new file mode 100644 index 00000000..d8d5c47e --- /dev/null +++ b/sebs/cloudflare/__init__.py @@ -0,0 +1,6 @@ +"""Cloudflare Workers serverless platform implementation.""" + +from sebs.cloudflare.cloudflare import Cloudflare +from sebs.cloudflare.config import CloudflareConfig + +__all__ = ["Cloudflare", "CloudflareConfig"] diff --git a/sebs/cloudflare/cli.py b/sebs/cloudflare/cli.py new file mode 100644 index 00000000..6f738a87 --- /dev/null +++ b/sebs/cloudflare/cli.py @@ -0,0 +1,250 @@ +"""Cloudflare CLI container management for wrangler-based deployments.""" + +import atexit +import io +import logging +import os +import tarfile +import threading +from typing import Optional + +import docker + +from sebs.config import SeBSConfig +from sebs.utils import LoggingBase + + +class CloudflareCLI(LoggingBase): + """ + Manages a Docker container with Cloudflare Wrangler and related tools pre-installed. + + This approach isolates Cloudflare CLI tools (wrangler, pywrangler) from the host system, + avoiding global npm/uv installations and ensuring consistent behavior across platforms. + """ + + _instance: Optional["CloudflareCLI"] = None + _lock: threading.Lock = threading.Lock() + + @staticmethod + def get_instance( + system_config: SeBSConfig, docker_client: docker.client.DockerClient + ) -> "CloudflareCLI": + """Return the shared CloudflareCLI instance, creating it on first use. + + Container and native workers deployments share one underlying CLI + container so that combined runs don't spawn duplicates. + Thread-safe: the first caller builds the container; concurrent callers wait. + """ + if CloudflareCLI._instance is None: + with CloudflareCLI._lock: + if CloudflareCLI._instance is None: + CloudflareCLI._instance = CloudflareCLI(system_config, docker_client) + atexit.register(CloudflareCLI.shutdown_instance) + return CloudflareCLI._instance + + def __init__(self, system_config: SeBSConfig, docker_client: docker.client.DockerClient): + """Pull the manage image if needed and start the CLI container.""" + super().__init__() + self._stopped = False + + repo_name = system_config.docker_repository() + sebs_version = system_config.version() + image_name = "manage.cloudflare" + versioned_tag = f"{image_name}-{sebs_version}" + try: + docker_client.images.get(repo_name + ":" + versioned_tag) + except docker.errors.ImageNotFound: + logging.info( + "Docker pull of image {repo}:{tag}".format(repo=repo_name, tag=versioned_tag) + ) + try: + docker_client.images.pull(repo_name, tag=versioned_tag) + except (docker.errors.APIError, docker.errors.ImageNotFound) as e: + raise RuntimeError( + "Docker pull of image {}:{} failed: {}".format(repo_name, versioned_tag, e) + ) + + # Start the container in detached mode + self.docker_instance = docker_client.containers.run( + image=repo_name + ":" + versioned_tag, + command="/bin/bash", + environment={ + "CONTAINER_UID": str(os.getuid()), + "CONTAINER_GID": str(os.getgid()), + "CONTAINER_USER": "docker_user", + }, + volumes={ + # Mount Docker socket so wrangler can build and push images to + # Cloudflare's registry during `wrangler deploy` for container workers. + "/var/run/docker.sock": {"bind": "/var/run/docker.sock", "mode": "rw"} + }, + remove=True, + stdout=True, + stderr=True, + detach=True, + tty=True, + ) + + self.logging.info(f"Started Cloudflare CLI container: {self.docker_instance.id}.") + + # Wait for container to be ready + while True: + try: + dkg = self.docker_instance.logs(stream=True, follow=True) + next(dkg).decode("utf-8") + break + except StopIteration: + pass + + @staticmethod + def typename() -> str: + """Return the canonical type name for this class.""" + return "Cloudflare.CLI" + + def execute(self, cmd: str, env: Optional[dict] = None): + """ + Execute the given command in Cloudflare CLI container. + Throws an exception on failure (commands are expected to execute successfully). + + Args: + cmd: Shell command to execute + env: Optional environment variables dict + + Returns: + Command output as bytes + """ + # Wrap command in sh -c to support shell features like cd, pipes, etc. + shell_cmd = ["/bin/sh", "-c", cmd] + exit_code, out = self.docker_instance.exec_run( + shell_cmd, + # Run as root since entrypoint creates docker_user but we don't wait for it + user="root", + environment=env, + ) + assert isinstance(out, bytes) + if exit_code != 0: + raise RuntimeError( + "Command {} failed at Cloudflare CLI docker!\n Output {}".format( + cmd, out.decode("utf-8") + ) + ) + return out + + def upload_package(self, directory: str, dest: str): + """ + Upload a directory to the Docker container. + + This is not an efficient and memory-intensive implementation. + So far, we didn't have very large functions that require many gigabytes. + + Since docker-py does not support a straightforward copy, and we can't + put_archive in chunks. + + Args: + directory: Local directory to upload + dest: Destination path in container + """ + handle = io.BytesIO() + with tarfile.open(fileobj=handle, mode="w:gz") as tar: + for f in os.listdir(directory): + tar.add(os.path.join(directory, f), arcname=f) + + # Move to the beginning of memory before writing + handle.seek(0) + self.execute("mkdir -p {}".format(dest)) + self.docker_instance.put_archive(path=dest, data=handle.read()) + + def check_wrangler_version(self) -> str: + """ + Check wrangler version. + + Returns: + Version string + """ + out = self.execute("wrangler --version") + return out.decode("utf-8").strip() + + def check_pywrangler_version(self) -> str: + """ + Check pywrangler version. + + Returns: + Version string + """ + out = self.execute("pywrangler --version") + return out.decode("utf-8").strip() + + def containers_push(self, tag: str, env: Optional[dict] = None) -> str: + """ + Push a locally-built image to Cloudflare's container registry. + + The image must already exist locally (built by docker_client.images.build). + The manage container shares the host Docker socket, so it can see and push + local images directly. + + Args: + tag: Local image tag (e.g. my-bench-python-312:latest) + env: Environment variables (must include CLOUDFLARE_API_TOKEN and + CLOUDFLARE_ACCOUNT_ID) + + Returns: + Registry URI (registry.cloudflare.com//:) + """ + out = self.execute(f"wrangler containers push {tag}", env=env) + output = out.decode("utf-8") + for line in output.splitlines(): + if "registry.cloudflare.com" in line: + parts = line.split() + for part in parts: + if part.startswith("registry.cloudflare.com"): + return part.strip() + raise RuntimeError( + f"Could not parse registry URI from wrangler containers push output:\n{output}" + ) + + def wrangler_deploy(self, package_dir: str, env: Optional[dict] = None) -> str: + """ + Deploy a worker using wrangler. + + Args: + package_dir: Path to package directory in container + env: Environment variables for deployment + + Returns: + Deployment output + """ + cmd = "cd {} && npm install && wrangler deploy".format(package_dir) + out = self.execute(cmd, env=env) + return out.decode("utf-8") + + def pywrangler_deploy(self, package_dir: str, env: Optional[dict] = None) -> str: + """ + Deploy a Python worker using pywrangler. + + Args: + package_dir: Path to package directory in container + env: Environment variables for deployment + + Returns: + Deployment output + """ + cmd = "cd {} && pywrangler deploy".format(package_dir) + out = self.execute(cmd, env=env) + return out.decode("utf-8") + + @staticmethod + def shutdown_instance(): + """Stop the shared CLI container and clear the singleton. + + Call this once at process teardown, after all parallel benchmarks + have finished. Individual deployment handlers must NOT call this — + they should just drop their local reference. + """ + with CloudflareCLI._lock: + instance = CloudflareCLI._instance + CloudflareCLI._instance = None + + if instance is not None and not instance._stopped: + instance._stopped = True + instance.logging.info("Stopping Cloudflare CLI Docker instance") + instance.docker_instance.stop() diff --git a/sebs/cloudflare/cloudflare.py b/sebs/cloudflare/cloudflare.py new file mode 100644 index 00000000..04603c3b --- /dev/null +++ b/sebs/cloudflare/cloudflare.py @@ -0,0 +1,1223 @@ +"""Cloudflare Workers platform implementation for SeBS.""" + +import os +import uuid +import time +from typing import cast, Dict, List, Optional, Tuple, Type + +import docker +import requests + +from sebs.cloudflare.config import CloudflareConfig +from sebs.cloudflare.function import CloudflareWorker +from sebs.cloudflare.resources import CloudflareSystemResources +from sebs.cloudflare.workers import CloudflareWorkersDeployment +from sebs.cloudflare.containers import CloudflareContainersDeployment +from sebs.benchmark import Benchmark +from sebs.cache import Cache +from sebs.config import SeBSConfig +from sebs.utils import LoggingHandlers +from sebs.faas.function import Function, ExecutionResult, Trigger, FunctionConfig +from sebs.experiments.config import SystemVariant +from sebs.faas.system import System +from sebs.faas.config import Resources +from sebs.sebs_types import Language + + +class _CloudflareContainerAdapter: + """Duck-typed adapter that satisfies benchmark.build()'s container_client contract. + + benchmark.build() calls container_client.build_base_image() when + container_deployment=True and asserts the client is not None. Cloudflare + builds its container images inside package_code (via containers.py), not + through a registry-backed DockerContainer, so this adapter bridges the gap + without touching the framework. + """ + + def __init__(self, containers_deployment: CloudflareContainersDeployment): + """Initialize the adapter with the given containers deployment handler.""" + self._containers = containers_deployment + # Populated by build_base_image() so create_function() can find the dir. + self.last_directory: Optional[str] = None + + def build_base_image( + self, + directory: str, + language, # sebs.sebs_types.Language enum + language_version: str, + architecture: str, + benchmark: str, + is_cached: bool, + builder_image: str, + ) -> Tuple[bool, str, float]: + """Delegate to containers.package_code; match benchmark.build() contract. + + Returns (rebuilt, image_tag, size_mb) so that: + _, self._container_uri, self._code_size = container_client.build_base_image(...) + works correctly in benchmark.build(). + """ + dir_result, size_bytes, image_tag = self._containers.package_code( + directory, + language.value, # Language enum → str + language_version, + architecture, + benchmark, + ) + self.last_directory = dir_result + size_mb = size_bytes / 1024.0 / 1024.0 + return (True, image_tag, size_mb) + + def push_to_registry( + self, + benchmark: str, + language_name: str, + language_version: str, + architecture: str, + ) -> str: + """ + Return a local cache label for the container image. + + Cloudflare container workers do not use a conventional image registry. + Instead, `wrangler deploy` reads `./Dockerfile` directly from the + package directory, builds the image, and pushes it to Cloudflare's + managed registry — all in one step. SeBS therefore never needs to + push an image to an external registry before deployment; this method + exists only to satisfy the `ContainerSystemInterface` contract and to + provide a stable cache key that `Benchmark` uses to detect whether a + previously-built image is still valid. + + The returned string is a local image tag of the form + ``--:latest``. It is + NOT a pushable URI and is not passed to any registry client. + """ + image_name = ( + f"{benchmark.replace('.', '-')}-{language_name}-" f"{language_version.replace('.', '')}" + ) + return f"{image_name}:latest" + + +class Cloudflare(System): + """ + Cloudflare Workers serverless platform implementation. + + Cloudflare Workers run on Cloudflare's edge network, providing + low-latency serverless execution globally. + """ + + # Benchmarks supported per (language, container_deployment) combination. + # Keys are (language_name, container_deployment). + # A value of None means all benchmarks are supported. + # Benchmark IDs are matched against the numeric prefix of the benchmark name + # (e.g. "110" matches "110.dynamic-html"). + SUPPORTED_BENCHMARKS: Dict[Tuple[str, bool], Optional[List[str]]] = { + ("python", False): ["110", "120", "130", "210", "311", "501", "502", "503"], + ("nodejs", False): ["110", "120", "130", "311"], + ("python", True): None, # all benchmarks supported + ("nodejs", True): ["110", "120", "130", "210", "311"], + } + + _config: CloudflareConfig + + @staticmethod + def name(): + """Return the platform name used in configuration and cache keys.""" + return "cloudflare" + + @staticmethod + def typename(): + """Return the human-readable type name for this platform.""" + return "Cloudflare" + + @staticmethod + def function_type() -> "Type[Function]": + """Return the Function subclass used by this platform.""" + return CloudflareWorker + + @property + def config(self) -> CloudflareConfig: + """Return the Cloudflare-specific platform configuration.""" + return self._config + + def is_benchmark_supported( + self, benchmark_name: str, language: str, container_deployment: bool + ) -> bool: + """Return True if the benchmark is supported for the given language/deployment type. + + Args: + benchmark_name: Full benchmark name, e.g. "110.dynamic-html" + language: Language name, e.g. "python" or "nodejs" + container_deployment: Whether this is a container deployment + + Returns: + True if supported, False otherwise + """ + allowed = self.SUPPORTED_BENCHMARKS.get((language, container_deployment)) + if allowed is None: + # None means all benchmarks are supported + return True + # Match by numeric prefix (the part before the first dot) + prefix = benchmark_name.split(".")[0] + return prefix in allowed + + def get_function(self, code_package: Benchmark, func_name: Optional[str] = None) -> Function: + """Override to validate benchmark support and auto-select cloudflare variant.""" + language = code_package.language_name + container_deployment = code_package.system_variant.is_container + benchmark_name = code_package.benchmark + if not self.is_benchmark_supported(benchmark_name, language, container_deployment): + deployment_type = "container" if container_deployment else "worker" + raise RuntimeError( + f"Benchmark '{benchmark_name}' is not supported for " + f"{language} {deployment_type} deployments on Cloudflare. " + "Supported benchmarks: " + f"{self.SUPPORTED_BENCHMARKS.get((language, container_deployment))}" + ) + + # For workers deployments, auto-promote the variant from "default" to + # "cloudflare" when the benchmark's config.json declares a "cloudflare" + # variant. Benchmark.__init__ sets the variant from the experiment config + # (CLI --language-variant flag), which defaults to "default". Promoting + # here ensures copy_code() applies the cloudflare/ source overlay and the + # cache key reflects the correct variant. + if code_package.language_variant == "default" and code_package.benchmark_config.supports( + code_package.language, self.name() + ): + code_package.select_variant(self.name()) + + # The cache stores functions under their formatted name (e.g. + # "container-311-compression-nodejs-18"), but callers pass the + # unformatted default name. Format it here so the cache lookup in + # super().get_function() finds the right entry. + if func_name is not None: + func_name = self.format_function_name(func_name, container_deployment) + + return super().get_function(code_package, func_name) + + def __init__( + self, + sebs_config: SeBSConfig, + config: CloudflareConfig, + cache_client: Cache, + docker_client: docker.client.DockerClient, + logger_handlers: LoggingHandlers, + ): + """Initialize the Cloudflare platform with credentials and deployment handlers.""" + super().__init__( + sebs_config, + cache_client, + docker_client, + CloudflareSystemResources(config, cache_client, docker_client, logger_handlers), + ) + self.logging_handlers = logger_handlers + self._config = config + self._api_base_url = "https://api.cloudflare.com/client/v4" + # cached workers.dev subdomain for the account + # This is different from the account ID and is required to build + # public worker URLs like ..workers.dev + self._workers_dev_subdomain: Optional[str] = None + + # Initialize deployment handlers + self._workers_deployment = CloudflareWorkersDeployment( + self.logging, sebs_config, docker_client, self.system_resources + ) + self._containers_deployment = CloudflareContainersDeployment( + self.logging, sebs_config, docker_client, self.system_resources + ) + # Adapter so benchmark.build() can call container_client.build_base_image() + self._container_adapter = _CloudflareContainerAdapter(self._containers_deployment) + + def initialize( + self, + config: Dict[str, str] = {}, + resource_prefix: Optional[str] = None, + quiet: bool = False, + ): + """ + Initialize the Cloudflare Workers platform. + + Args: + config: Additional configuration parameters + resource_prefix: Prefix for resource naming + """ + # Verify credentials are valid + self._verify_credentials() + self.initialize_resources(select_prefix=resource_prefix) + + def initialize_resources(self, select_prefix: Optional[str] = None, quiet: bool = False): + """ + Initialize Cloudflare resources. + + Overrides the base class method to handle R2 storage gracefully. + Cloudflare Workers can operate without R2 storage for many benchmarks. + + Args: + select_prefix: Optional prefix for resource naming + """ + deployments = self.find_deployments() + + # Check if we have an existing deployment + if deployments: + res_id = deployments[0] + self.config.resources.resources_id = res_id + self.logging.info(f"Using existing resource deployment {res_id}") + return + + # Create new resource ID + if select_prefix is not None: + res_id = f"{select_prefix}-{str(uuid.uuid1())[0:8]}" + else: + res_id = str(uuid.uuid1())[0:8] + + self.config.resources.resources_id = res_id + self.logging.info(f"Generating unique resource name {res_id}") + + # Try to create R2 bucket, but don't fail if R2 is not enabled + try: + self.system_resources.get_storage().get_bucket(Resources.StorageBucketType.BENCHMARKS) + self.logging.info("R2 storage initialized successfully") + except Exception as e: + self.logging.warning( + f"R2 storage initialization failed: {e}. " + "R2 must be enabled in your Cloudflare dashboard " + "to use storage-dependent benchmarks. " + "Continuing without R2 - only benchmarks that don't require storage will work." + ) + + @property + def container_client(self) -> _CloudflareContainerAdapter: # type: ignore[override] + """Return the Cloudflare-specific container build adapter. + + Overrides System.container_client (which returns None) so that + benchmark.build() can drive container image builds via + _CloudflareContainerAdapter.build_base_image() without needing an + external container registry. + """ + return self._container_adapter + + def _verify_credentials(self): + """Verify that the Cloudflare API credentials are valid.""" + # Check if credentials are set + if not self.config.credentials.api_token and not ( + self.config.credentials.email and self.config.credentials.api_key + ): + raise RuntimeError( + "Cloudflare API credentials are not set. Please set CLOUDFLARE_API_TOKEN " + "and CLOUDFLARE_ACCOUNT_ID environment variables." + ) + + if not self.config.credentials.account_id: + raise RuntimeError( + "Cloudflare Account ID is not set. Please set CLOUDFLARE_ACCOUNT_ID " + "environment variable." + ) + + headers = self._get_auth_headers() + + # Log credential type being used (without exposing the actual token) + if self.config.credentials.api_token: + token_preview = ( + self.config.credentials.api_token[:8] + "..." + if len(self.config.credentials.api_token) > 8 + else "***" + ) + self.logging.info(f"Using API Token authentication (starts with: {token_preview})") + else: + self.logging.info( + f"Using Email + API Key authentication (email: {self.config.credentials.email})" + ) + + response = requests.get(f"{self._api_base_url}/user/tokens/verify", headers=headers) + + if response.status_code != 200: + raise RuntimeError( + f"Failed to verify Cloudflare credentials: " + f"{response.status_code} - {response.text}\n" + "Please check that your CLOUDFLARE_API_TOKEN and CLOUDFLARE_ACCOUNT_ID are correct." + ) + + self.logging.info("Cloudflare credentials verified successfully") + + def _get_deployment_handler(self, container_deployment: bool): + """Get the appropriate deployment handler based on deployment type. + + Args: + container_deployment: Whether this is a container deployment + + Returns: + CloudflareWorkersDeployment or CloudflareContainersDeployment + """ + if container_deployment: + return self._containers_deployment + else: + return self._workers_deployment + + def package_code( + self, + directory: str, + language: Language, + language_version: str, + architecture: str, + benchmark: str, + is_cached: bool, + ) -> Tuple[str, int]: + """ + Package code for native Cloudflare Workers deployment using Wrangler. + + Called by benchmark.build() via the non-container path. Container + builds are driven by _CloudflareContainerAdapter.build_base_image() + through the container_client property instead. + + Args: + directory: Path to the code directory + language: Programming language enum + language_version: Programming language version + architecture: Target architecture (not used for Workers) + benchmark: Benchmark name + is_cached: Whether the code is cached + + Returns: + Tuple of (package_path, package_size) + """ + # Native worker deployment flow — always the cloudflare variant. + # workers.py returns a 3-tuple (path, size, ""); drop the unused 3rd element. + pkg_path, pkg_size, _ = self._workers_deployment.package_code( + directory, + language.value, + language_version, + benchmark, + is_cached, + language_variant="cloudflare", + ) + return (pkg_path, pkg_size) + + def _get_auth_headers(self) -> Dict[str, str]: + """Get authentication headers for Cloudflare API requests.""" + if self.config.credentials.api_token: + return { + "Authorization": f"Bearer {self.config.credentials.api_token}", + "Content-Type": "application/json", + } + elif self.config.credentials.email and self.config.credentials.api_key: + return { + "X-Auth-Email": self.config.credentials.email, + "X-Auth-Key": self.config.credentials.api_key, + "Content-Type": "application/json", + } + else: + raise RuntimeError("Invalid Cloudflare credentials configuration") + + def _generate_wrangler_toml( + self, + worker_name: str, + package_dir: str, + language: str, + account_id: str, + benchmark_name: Optional[str] = None, + code_package: Optional[Benchmark] = None, + container_deployment: bool = False, + container_uri: Optional[str] = None, + ) -> str: + """ + Generate wrangler.toml by delegating to the appropriate deployment handler. + + Args: + worker_name: Name of the worker + package_dir: Directory containing the worker code + language: Programming language (nodejs or python) + account_id: Cloudflare account ID + benchmark_name: Optional benchmark name for R2 file path prefix + code_package: Optional benchmark package for nosql configuration + container_deployment: Whether this is a container deployment + container_uri: Container image URI/tag + + Returns: + Path to the generated wrangler.toml file + """ + language_variant = code_package.language_variant if code_package else "cloudflare" + handler = self._get_deployment_handler(container_deployment) + return handler.generate_wrangler_toml( + worker_name, + package_dir, + language, + account_id, + benchmark_name, + code_package, + container_uri, + language_variant, + ) + + def create_function( + self, + code_package: Benchmark, + func_name: str, + system_variant: SystemVariant, + container_uri: str | None, + ) -> CloudflareWorker: + """ + Create a new Cloudflare Worker. + + If a worker with the same name already exists, it will be updated. + + Args: + code_package: Benchmark containing the function code + func_name: Name of the worker + system_variant: Selected deployment variant + container_uri: URI of container image + + Returns: + CloudflareWorker instance + """ + container_deployment = system_variant.is_container + # For container builds benchmark.build() goes through container_client.build_base_image(), + # which does NOT set code_package._code_location. Fall back in order: + # 1. _CloudflareContainerAdapter.last_directory (set when build actually ran this session) + # 2. code_package._output_dir (the on-disk build directory from a previous session — + # build() leaves it in place when the image cache is valid and the build is skipped) + package = code_package.code_location + if package is None and container_deployment: + package = self._container_adapter.last_directory + if package is None and container_deployment: + output_dir = code_package._output_dir + if os.path.isdir(output_dir): + package = output_dir + self.logging.info( + f"Using existing output directory for {code_package.benchmark}: {package}" + ) + + benchmark = code_package.benchmark + language = code_package.language_name + language_runtime = code_package.language_version + function_cfg = FunctionConfig.from_benchmark(code_package) + + func_name = self.format_function_name(func_name, container_deployment) + account_id = self.config.credentials.account_id + + if not account_id: + raise RuntimeError("Cloudflare account ID is required to create workers") + + # Check if worker already exists + existing_worker = self._get_worker(func_name, account_id) + + if package is None: + raise RuntimeError( + f"Code location is not set for {code_package.benchmark}. " + "The build step may not have completed successfully." + ) + + if existing_worker: + self.logging.info(f"Worker {func_name} already exists, updating it") + worker = CloudflareWorker( + func_name, + code_package.benchmark, + func_name, # script_id is the same as name + code_package.hash, + language_runtime, + function_cfg, + account_id, + ) + self.update_function(worker, code_package, system_variant, container_uri) + worker.updated_code = True + else: + self.logging.info(f"Creating new worker {func_name}") + + # Create the worker with all package files + self._create_or_update_worker( + func_name, + package, + account_id, + language, + benchmark, + code_package, + container_deployment, + container_uri, + ) + + worker = CloudflareWorker( + func_name, + code_package.benchmark, + func_name, + code_package.hash, + language_runtime, + function_cfg, + account_id, + ) + + # Add HTTPTrigger + from sebs.cloudflare.triggers import HTTPTrigger + + # Build worker URL using the account's workers.dev subdomain when possible. + # Falls back to account_id-based host or plain workers.dev with warnings. + worker_url = self._build_workers_dev_url(func_name, account_id) + http_trigger = HTTPTrigger(func_name, worker_url) + http_trigger.logging_handlers = self.logging_handlers + worker.add_trigger(http_trigger) + + return worker + + def _get_worker(self, worker_name: str, account_id: str) -> Optional[dict]: + """Get information about an existing worker.""" + headers = self._get_auth_headers() + url = f"{self._api_base_url}/accounts/{account_id}/workers/scripts/{worker_name}" + + response = requests.get(url, headers=headers) + + if response.status_code == 200: + try: + return response.json().get("result") + except Exception: + return None + elif response.status_code == 404: + return None + else: + self.logging.warning(f"Unexpected response checking worker: {response.status_code}") + return None + + def _create_or_update_worker( + self, + worker_name: str, + package_dir: str, + account_id: str, + language: str, + benchmark_name: Optional[str] = None, + code_package: Optional[Benchmark] = None, + container_deployment: bool = False, + container_uri: str | None = None, + ) -> dict: + """Create or update a Cloudflare Worker using Wrangler CLI in container. + + Args: + worker_name: Name of the worker + package_dir: Directory containing handler and all benchmark files + account_id: Cloudflare account ID + language: Programming language (nodejs or python) + benchmark_name: Optional benchmark name for R2 file path prefix + code_package: Optional benchmark package for nosql configuration + container_deployment: Whether this is a container deployment + container_uri: Container image URI/tag + + Returns: + Worker deployment result + """ + # Set up environment for Wrangler CLI in container + env = {} + + if self.config.credentials.api_token: + env["CLOUDFLARE_API_TOKEN"] = self.config.credentials.api_token + elif self.config.credentials.email and self.config.credentials.api_key: + env["CLOUDFLARE_EMAIL"] = self.config.credentials.email + env["CLOUDFLARE_API_KEY"] = self.config.credentials.api_key + + env["CLOUDFLARE_ACCOUNT_ID"] = account_id + + # Get CLI container instance from appropriate deployment handler + handler = self._get_deployment_handler(container_deployment) + cli = handler._get_cli() + + # Push the locally-built container image to Cloudflare's registry so that + # wrangler deploy can reference it directly instead of rebuilding from the + # Dockerfile. Must happen before generating wrangler.toml so the registry + # URI is written in from the start. + if container_deployment and container_uri: + self.logging.info(f"Pushing container image {container_uri} to Cloudflare registry...") + container_uri = cli.containers_push(container_uri, env=env) + self.logging.info(f"Image pushed to: {container_uri}") + + # Generate wrangler.toml for this worker (uses registry URI if available) + self._generate_wrangler_toml( + worker_name, + package_dir, + language, + account_id, + benchmark_name, + code_package, + container_deployment, + container_uri, + ) + + # Upload package directory to container + container_package_path = f"/tmp/workers/{worker_name}" + self.logging.info(f"Uploading package to container: {container_package_path}") + cli.upload_package(package_dir, container_package_path) + + try: + self.logging.info(f"Deploying worker {worker_name} using Wrangler in container...") + + # pywrangler is used for all native Python workers (packages must be + # synced via pyproject.toml before wrangler uploads the bundle). + # All other cases — nodejs, containers — use wrangler directly. + if not container_deployment and language == "python": + output = cli.pywrangler_deploy(container_package_path, env=env) + else: + output = cli.wrangler_deploy(container_package_path, env=env) + + self.logging.info(f"Worker {worker_name} deployed successfully") + self.logging.debug(f"Wrangler deploy output: {output}") + + # Wait for the worker to become reachable before returning. + account_id_val = env.get("CLOUDFLARE_ACCOUNT_ID") + worker_url = self._build_workers_dev_url(worker_name, account_id_val) + + if container_deployment: + container_name = self._containers_deployment._container_name_from_worker(worker_name) + # Cloudflare compares the newly pushed registry image against the + # image currently running in the container worker. If the image digest + # has changed, wrangler deploy triggers a rollout: Cloudflare pulls the + # new image, replaces the running instances, and sets active_rollout_id + # on the container application record until the rollout finishes. + # If nothing changed (same digest), wrangler reports "no changes" and + # no rollout is started — the container is already on the correct image. + if "no changes" in output.lower(): + self.logging.info( + f"Container {container_name} unchanged, skipping readiness wait." + ) + else: + # A rollout is in progress. Poll the Cloudflare REST API until + # active_rollout_id disappears, which signals that all container + # instances have been replaced and are serving the new image. + self.logging.info("Waiting for container rollout to complete...") + self._wait_for_container_rollout(container_name, account_id) + else: + self._wait_for_worker_ready(worker_name, worker_url) + + return {"success": True, "output": output} + + except RuntimeError as e: + error_msg = f"Wrangler deployment failed for worker {worker_name}: {str(e)}" + self.logging.error(error_msg) + raise RuntimeError(error_msg) + + def _wait_for_worker_ready( + self, worker_name: str, worker_url: str, max_wait_seconds: int = 60, poll_interval: int = 5 + ) -> None: + """Poll a native worker until it responds, confirming edge propagation.""" + self.logging.info( + f"Waiting up to {max_wait_seconds}s for worker {worker_name} to become reachable..." + ) + start = time.time() + while time.time() - start < max_wait_seconds: + try: + resp = requests.get(worker_url, timeout=10) + if resp.status_code not in (502, 503, 522, 524): + self.logging.info( + f"Worker {worker_name} is reachable (HTTP {resp.status_code})." + ) + return + except requests.exceptions.RequestException: + pass + time.sleep(poll_interval) + self.logging.warning( + f"Worker {worker_name} not confirmed reachable after {max_wait_seconds}s; " + "proceeding anyway — invocation retries will handle residual propagation delay." + ) + + def _get_container_id(self, container_name: str, account_id: str) -> Optional[str]: + """Resolve a container name to its UUID via the Cloudflare REST API. + + Lists all container applications for the account and returns the UUID + of the one whose name matches container_name, or None if not found yet. + """ + url = f"{self._api_base_url}/accounts/{account_id}/containers/applications" + headers = self._get_auth_headers() + try: + resp = requests.get(url, headers=headers, timeout=30) + if resp.status_code != 200: + return None + items = resp.json().get("result", []) + for item in items: + if item.get("name") == container_name: + return item.get("id") + except requests.exceptions.RequestException: + pass + return None + + def _wait_for_container_rollout( + self, + container_name: str, + account_id: str, + max_wait_seconds: int = 900, + poll_interval: int = 20, + ) -> None: + """Poll the Cloudflare API until the container has rolled out and an instance is running. + + This covers two sequential phases using the same + GET /accounts/{id}/containers/applications/{uuid} endpoint: + + Phase 1 — Rollout: Cloudflare pulls the new image and replaces instances. + active_rollout_id is set for the duration. Large containers (e.g. ML inference + images) can take up to 10 minutes. Do not lower max_wait_seconds aggressively. + + Phase 2 — Instance readiness: After the rollout finishes, Cloudflare must start + at least one container instance before it can accept requests. The top-level + `instances` field is the configured/desired count. Runtime state lives under + `health.instances`: `starting` = still booting, `healthy` = passed health check + and ready to serve, `active` = currently handling a request (always 0 until the + first invocation). We wait until `health.instances.healthy >= max_instances`. + Note: the top-level `instances` field equals `max_instances + 1` because + Cloudflare adds one extra Durable Object coordination instance that never + becomes healthy — `max_instances` is the correct readiness threshold. + This avoids the + first benchmark invocation hitting a "no Container instance available" error + from the Durable Object. + + Args: + container_name: Cloudflare container name (e.g. my-worker-containerworker) + account_id: Cloudflare account ID + max_wait_seconds: Maximum seconds to wait (covers both phases) + poll_interval: Seconds between polls + """ + headers = self._get_auth_headers() + start = time.time() + container_id: Optional[str] = None + rollout_complete = False + + while time.time() - start < max_wait_seconds: + elapsed = int(time.time() - start) + try: + if container_id is None: + container_id = self._get_container_id(container_name, account_id) + if container_id is None: + self.logging.info( + f"Container {container_name} not registered yet... ({elapsed}s elapsed)" + ) + time.sleep(poll_interval) + continue + self.logging.info(f"Resolved container ID: {container_id}") + + url = f"{self._api_base_url}/accounts/{account_id}/containers/applications/{container_id}" + resp = requests.get(url, headers=headers, timeout=30) + if resp.status_code == 200: + data = resp.json().get("result", resp.json()) + active_rollout = data.get("active_rollout_id") + + if active_rollout: + self.logging.info( + f"Container {container_name} rollout in progress " + f"(rollout_id={active_rollout}, {elapsed}s elapsed)" + ) + else: + if not rollout_complete: + self.logging.info( + f"Container {container_name} rollout complete, " + "waiting for an instance to start..." + ) + rollout_complete = True + + # Phase 2: wait for at least one healthy instance so the + # first benchmark invocation does not hit a cold Durable Object. + # The top-level `instances` field is the configured/desired count, + # not the runtime state. Actual readiness is in health.instances: + # healthy — booted, passed health check, ready to serve (what we need > 0) + # starting — still booting (image pull + firecracker init) + # active — currently handling a request (always 0 until first invocation) + # The top-level `instances` field equals max_instances + 1 in practice: + # Cloudflare appears to count one extra Durable Object coordination + # instance that never appears as healthy. The `health.instances` + # sub-object tracks runtime state per instance (not formally documented + # by Cloudflare at time of writing, derived from observed API responses): + # healthy — passed health check, ready to serve requests + # starting — still booting (image pull + firecracker init) + # active — currently handling a request (0 until first invocation) + # Use max_instances as the readiness threshold since that is the + # configured number of workload instances. + max_instances = data.get("max_instances", 0) + health_instances = data.get("health", {}).get("instances", {}) + healthy = health_instances.get("healthy", 0) + starting = health_instances.get("starting", 0) + self.logging.debug(f"Container {container_name} health: {health_instances}") + if max_instances > 0 and healthy >= max_instances: + self.logging.info( + f"Container {container_name} is ready " + f"({healthy}/{max_instances} instances healthy)." + ) + return + self.logging.info( + f"Container {container_name} awaiting all instances to become healthy " + f"(healthy={healthy}/{max_instances}, starting={starting}, {elapsed}s elapsed)" + ) + else: + self.logging.info( + f"Unexpected API response {resp.status_code} ({elapsed}s elapsed)" + ) + except requests.exceptions.RequestException as e: + self.logging.debug(f"API request failed ({elapsed}s): {e}") + + time.sleep(poll_interval) + + raise RuntimeError( + f"Container {container_name} did not become ready after {max_wait_seconds}s." + ) + + def _get_workers_dev_subdomain(self, account_id: str) -> Optional[str]: + """Fetch the workers.dev subdomain for the given account. + + Cloudflare exposes an endpoint that returns the account-level workers + subdomain (the readable name used in *.workers.dev), e.g. + GET /accounts/{account_id}/workers/subdomain + + Returns the subdomain string or None on failure. + """ + if self._workers_dev_subdomain: + return self._workers_dev_subdomain + + try: + headers = self._get_auth_headers() + url = f"{self._api_base_url}/accounts/{account_id}/workers/subdomain" + resp = requests.get(url, headers=headers) + if resp.status_code == 200: + body = resp.json() + sub = None + # result may contain 'subdomain' or nested structure + if isinstance(body, dict): + sub = body.get("result", {}).get("subdomain") + + if sub: + self._workers_dev_subdomain = sub + return sub + else: + raise RuntimeError( + "Could not find workers.dev subdomain in API response; " + "please enable the workers.dev subdomain in your Cloudflare dashboard." + ) + else: + self.logging.warning( + f"Failed to fetch workers.dev subdomain: {resp.status_code} - {resp.text}" + ) + return None + except Exception as e: + self.logging.warning(f"Error fetching workers.dev subdomain: {e}") + return None + + def _build_workers_dev_url(self, worker_name: str, account_id: Optional[str]) -> str: + """Build a best-effort public URL for a worker. + + Prefer using the account's readable workers.dev subdomain when available + (e.g. ..workers.dev). If we can't obtain that, fall + back to using the account_id as a last resort and log a warning. + """ + if account_id: + sub = self._get_workers_dev_subdomain(account_id) + return f"https://{worker_name}.{sub}.workers.dev" + # Last fallback: plain workers.dev (may not resolve without a subdomain) + self.logging.warning( + "No account ID available; using https://{name}.workers.dev which may not be reachable." + ) + return f"https://{worker_name}.workers.dev" + + def cached_function(self, function: Function): + """ + Handle a function retrieved from cache. + + Refreshes triggers and logging handlers, and verifies the worker still + exists on Cloudflare. If it has been deleted remotely, clear the hash + so the caller's hash-mismatch path triggers a full redeployment. + + Args: + function: The cached function + """ + for trigger in function.triggers(Trigger.TriggerType.HTTP): + trigger.logging_handlers = self.logging_handlers + + worker = cast(CloudflareWorker, function) + account_id = worker.account_id or self.config.credentials.account_id + if account_id and not self._get_worker(worker.name, account_id): + self.logging.info( + f"Cached worker {worker.name} no longer exists on Cloudflare " "— will redeploy." + ) + function.code_package_hash = "" + + def update_function( + self, + function: Function, + code_package: Benchmark, + system_variant: SystemVariant, + container_uri: str | None, + ): + """ + Update an existing Cloudflare Worker. + + Args: + function: Existing function instance to update + code_package: New benchmark containing the function code + system_variant: Selected deployment variant + container_uri: URI of container image + """ + container_deployment = system_variant.is_container + worker = cast(CloudflareWorker, function) + package = code_package.code_location + if package is None and container_deployment: + package = self._container_adapter.last_directory + language = code_package.language_name + benchmark = code_package.benchmark + + # Update the worker with all package files + account_id = worker.account_id or self.config.credentials.account_id + if not account_id: + raise RuntimeError("Account ID is required to update worker") + + if package is None and container_deployment: + output_dir = code_package._output_dir + if os.path.isdir(output_dir): + package = output_dir + if package is None: + raise RuntimeError( + f"Code location is not set for {benchmark}. " + "The build step may not have completed successfully." + ) + self._create_or_update_worker( + worker.name, + package, + account_id, + language, + benchmark, + code_package, + container_deployment, + container_uri, + ) + self.logging.info(f"Updated worker {worker.name}") + + # Update configuration if needed (no-op for containers: no runtime memory changes) + self.update_function_configuration(worker, code_package) + + def update_function_configuration(self, cached_function: Function, benchmark: Benchmark): + """ + Update the configuration of a Cloudflare Worker. + + Note: Cloudflare Workers have limited configuration options compared + to traditional FaaS platforms. Memory and timeout are managed by Cloudflare. + + Args: + cached_function: The function to update + benchmark: The benchmark with new configuration + """ + # Cloudflare Workers have fixed resource limits: + # - CPU time: 50ms (free), 50ms-30s (paid) + # - Memory: 128MB + # Most configuration is handled via wrangler.toml or API settings + + worker = cast(CloudflareWorker, cached_function) + + # For environment variables or KV namespaces, we would use the API here + # For now, we'll just log that configuration update was requested + self.logging.warning( + f"Configuration update requested for worker {worker.name}. " + "Note: Cloudflare Workers have limited runtime configuration options." + ) + + def default_function_name(self, code_package: Benchmark, resources=None) -> str: + """ + Generate a default function name for Cloudflare Workers. + + Args: + code_package: The benchmark package + resources: Optional resources (not used) + + Returns: + Default function name + """ + # Cloudflare Worker names must be lowercase and can contain hyphens + name = ( + f"{code_package.benchmark}-{code_package.language_name}-" + f"{code_package.language_version.replace('.', '')}" + ).lower() + if code_package.language_variant != "default": + name = f"{name}-{code_package.language_variant}" + return name + + @staticmethod + def format_function_name(name: str, container_deployment: bool = False) -> str: + """ + Format a function name to comply with Cloudflare Worker naming rules. + + Worker names must: + - Be lowercase + - Contain only alphanumeric characters and hyphens + - Not start or end with a hyphen + - Not start with a digit + + Args: + name: The original name + container_deployment: Whether this is a container worker + (adds 'w-' prefix if name starts with digit) + + Returns: + Formatted name + """ + # Convert to lowercase and replace invalid characters + formatted = name.lower().replace("_", "-").replace(".", "-") + # Remove any characters that aren't alphanumeric or hyphen + formatted = "".join(c for c in formatted if c.isalnum() or c == "-") + # Remove leading/trailing hyphens + formatted = formatted.strip("-") + # Ensure container worker names don't start with a digit (Cloudflare requirement) + # Only add prefix for container workers to differentiate from native workers + if container_deployment and formatted and formatted[0].isdigit(): + formatted = "container-" + formatted + return formatted + + def enforce_cold_start(self, functions: List[Function], code_package: Benchmark): + """ + Enforce cold start for Cloudflare Workers. + + Note: Cloudflare Workers don't have a traditional cold start mechanism + like AWS Lambda. Workers are instantiated on-demand at edge locations. + We can't force a cold start, but we can update the worker to invalidate caches. + + Args: + functions: List of functions to enforce cold start on + code_package: The benchmark package + """ + raise NotImplementedError( + "Cloudflare Workers do not support forced cold starts. " + "Workers are automatically instantiated on-demand at edge locations." + ) + + def download_metrics( + self, + function_name: str, + start_time: int, + end_time: int, + requests: Dict[str, ExecutionResult], + metrics: dict, + ): + """ + Extract per-invocation metrics from ExecutionResult objects. + + The metrics are extracted from the 'measurement' field in the benchmark + response, which is populated by the Cloudflare Worker handler during execution. + This approach avoids dependency on Analytics Engine and provides immediate, + accurate metrics for each invocation. + + Args: + function_name: Name of the worker + start_time: Start time (Unix timestamp in seconds) - not used + end_time: End time (Unix timestamp in seconds) - not used + requests: Dict mapping request_id -> ExecutionResult + metrics: Dict to store aggregated metrics + """ + if not requests: + self.logging.warning("No requests to extract metrics from") + return + + self.logging.info( + f"Extracting metrics from {len(requests)} invocations " f"of worker {function_name}" + ) + + # Aggregate statistics from all requests + total_invocations = len(requests) + cold_starts = 0 + warm_starts = 0 + cpu_times = [] + wall_times = [] + memory_values = [] + + for request_id, result in requests.items(): + # Count cold/warm starts + if result.stats.cold_start: + cold_starts += 1 + else: + warm_starts += 1 + + # Collect CPU times + if result.provider_times.execution > 0: + cpu_times.append(result.provider_times.execution) + + # Collect wall times (benchmark times) + if result.times.benchmark > 0: + wall_times.append(result.times.benchmark) + + # Collect memory usage + if result.stats.memory_used is not None and result.stats.memory_used > 0: + memory_values.append(result.stats.memory_used) + + # Set billing info for Cloudflare Workers + # Cloudflare billing: $0.50 per million requests + + # $12.50 per million GB-seconds of CPU time + if result.provider_times.execution > 0: + result.billing.memory = 128 # Cloudflare Workers: fixed 128MB + result.billing.billed_time = result.provider_times.execution # μs + + # GB-seconds calculation: (128MB / 1024MB/GB) * (cpu_time_us / 1000000 us/s) + cpu_time_seconds = result.provider_times.execution / 1_000_000.0 + gb_seconds = (128.0 / 1024.0) * cpu_time_seconds + result.billing.gb_seconds = int(gb_seconds * 1_000_000) # micro GB-seconds + + # Calculate statistics + metrics["cloudflare"] = { + "total_invocations": total_invocations, + "cold_starts": cold_starts, + "warm_starts": warm_starts, + "data_source": "response_measurements", + "note": "Per-invocation metrics extracted from benchmark response", + } + + if cpu_times: + metrics["cloudflare"]["avg_cpu_time_us"] = sum(cpu_times) // len(cpu_times) + metrics["cloudflare"]["min_cpu_time_us"] = min(cpu_times) + metrics["cloudflare"]["max_cpu_time_us"] = max(cpu_times) + metrics["cloudflare"]["cpu_time_measurements"] = len(cpu_times) + + if wall_times: + metrics["cloudflare"]["avg_wall_time_us"] = sum(wall_times) // len(wall_times) + metrics["cloudflare"]["min_wall_time_us"] = min(wall_times) + metrics["cloudflare"]["max_wall_time_us"] = max(wall_times) + metrics["cloudflare"]["wall_time_measurements"] = len(wall_times) + + if memory_values: + metrics["cloudflare"]["avg_memory_mb"] = sum(memory_values) / len(memory_values) + metrics["cloudflare"]["min_memory_mb"] = min(memory_values) + metrics["cloudflare"]["max_memory_mb"] = max(memory_values) + metrics["cloudflare"]["memory_measurements"] = len(memory_values) + + self.logging.info( + f"Extracted metrics from {total_invocations} invocations: " + f"{cold_starts} cold starts, {warm_starts} warm starts" + ) + + if cpu_times: + avg_cpu_ms = sum(cpu_times) / len(cpu_times) / 1000.0 + self.logging.info(f"Average CPU time: {avg_cpu_ms:.2f} ms") + + if wall_times: + avg_wall_ms = sum(wall_times) / len(wall_times) / 1000.0 + self.logging.info(f"Average wall time: {avg_wall_ms:.2f} ms") + + def create_trigger(self, function: Function, trigger_type: Trigger.TriggerType) -> Trigger: + """ + Create a trigger for a Cloudflare Worker. + + Args: + function: The function to create a trigger for + trigger_type: Type of trigger to create + + Returns: + The created trigger + """ + from sebs.cloudflare.triggers import HTTPTrigger + + worker = cast(CloudflareWorker, function) + + if trigger_type == Trigger.TriggerType.HTTP: + account_id = worker.account_id or self.config.credentials.account_id + worker_url = self._build_workers_dev_url(worker.name, account_id) + trigger = HTTPTrigger(worker.name, worker_url) + trigger.logging_handlers = self.logging_handlers + return trigger + else: + raise NotImplementedError( + f"Trigger type {trigger_type} is not supported for Cloudflare Workers" + ) + + def shutdown(self) -> None: + """ + Shutdown the Cloudflare system. + + Saves configuration to cache and shuts down deployment handler CLI containers. + """ + try: + self.cache_client.lock() + self.config.update_cache(self.cache_client) + finally: + self.cache_client.unlock() + + self._workers_deployment.shutdown() + self._containers_deployment.shutdown() diff --git a/sebs/cloudflare/config.py b/sebs/cloudflare/config.py new file mode 100644 index 00000000..c8a7a3dd --- /dev/null +++ b/sebs/cloudflare/config.py @@ -0,0 +1,313 @@ +"""Configuration classes for the Cloudflare Workers platform.""" + +import os +from typing import Optional, cast + +from sebs.cache import Cache +from sebs.faas.config import Config, Credentials, Resources +from sebs.utils import LoggingHandlers + + +class CloudflareCredentials(Credentials): + """ + Cloudflare API credentials. + + Two mutually exclusive authentication methods are supported; both are + functionally equivalent for every SeBS operation (API calls, R2, KV, + wrangler): + + - **API Token** (recommended): a scoped, revocable token created in the + Cloudflare dashboard. Env: ``CLOUDFLARE_API_TOKEN``. + - **Email + Global API Key** (legacy): the account email plus the + Global API Key. Grants broad account access; use only when scoped + tokens are not available. Env: ``CLOUDFLARE_EMAIL`` + + ``CLOUDFLARE_API_KEY``. + + Both methods additionally require ``CLOUDFLARE_ACCOUNT_ID``. + Optional R2 S3-compatible credentials (``CLOUDFLARE_R2_ACCESS_KEY_ID``, + ``CLOUDFLARE_R2_SECRET_ACCESS_KEY``) are needed for file uploads. + + See ``docs/platforms.md`` (Cloudflare Workers → Credentials) for full + setup instructions. + """ + + def __init__( + self, + api_token: Optional[str] = None, + email: Optional[str] = None, + api_key: Optional[str] = None, + account_id: Optional[str] = None, + r2_access_key_id: Optional[str] = None, + r2_secret_access_key: Optional[str] = None, + ): + """Store Cloudflare API credentials supplied at construction time.""" + super().__init__() + + self._api_token = api_token + self._email = email + self._api_key = api_key + self._account_id = account_id + self._r2_access_key_id = r2_access_key_id + self._r2_secret_access_key = r2_secret_access_key + + @staticmethod + def typename() -> str: + """Return the canonical type name for this credentials class.""" + return "Cloudflare.Credentials" + + @property + def api_token(self) -> Optional[str]: + """Scoped API token for Cloudflare authentication.""" + return self._api_token + + @property + def email(self) -> Optional[str]: + """Account email used with the Global API Key authentication method.""" + return self._email + + @property + def api_key(self) -> Optional[str]: + """Global API Key used with the email authentication method.""" + return self._api_key + + @property + def account_id(self) -> Optional[str]: + """Cloudflare account ID required for all API operations.""" + return self._account_id + + @property + def r2_access_key_id(self) -> Optional[str]: + """S3-compatible access key ID for R2 bucket operations.""" + return self._r2_access_key_id + + @property + def r2_secret_access_key(self) -> Optional[str]: + """S3-compatible secret access key for R2 bucket operations.""" + return self._r2_secret_access_key + + @staticmethod + def initialize(dct: dict) -> "CloudflareCredentials": + """Build a CloudflareCredentials instance from a plain dictionary.""" + return CloudflareCredentials( + dct.get("api_token"), + dct.get("email"), + dct.get("api_key"), + dct.get("account_id"), + dct.get("r2_access_key_id"), + dct.get("r2_secret_access_key"), + ) + + @staticmethod + def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Credentials: + """Load credentials from config dict, falling back to environment variables.""" + cached_config = cache.get_config("cloudflare") + ret: CloudflareCredentials + account_id: Optional[str] = None + + # Load cached values + if cached_config and "credentials" in cached_config: + account_id = cached_config["credentials"].get("account_id") + + # Check for new config + if "credentials" in config: + ret = CloudflareCredentials.initialize(config["credentials"]) + elif "CLOUDFLARE_API_TOKEN" in os.environ: + ret = CloudflareCredentials( + api_token=os.environ["CLOUDFLARE_API_TOKEN"], + account_id=os.environ.get("CLOUDFLARE_ACCOUNT_ID"), + r2_access_key_id=os.environ.get("CLOUDFLARE_R2_ACCESS_KEY_ID"), + r2_secret_access_key=os.environ.get("CLOUDFLARE_R2_SECRET_ACCESS_KEY"), + ) + elif "CLOUDFLARE_EMAIL" in os.environ and "CLOUDFLARE_API_KEY" in os.environ: + ret = CloudflareCredentials( + email=os.environ["CLOUDFLARE_EMAIL"], + api_key=os.environ["CLOUDFLARE_API_KEY"], + account_id=os.environ.get("CLOUDFLARE_ACCOUNT_ID"), + r2_access_key_id=os.environ.get("CLOUDFLARE_R2_ACCESS_KEY_ID"), + r2_secret_access_key=os.environ.get("CLOUDFLARE_R2_SECRET_ACCESS_KEY"), + ) + else: + raise RuntimeError( + "Cloudflare login credentials are missing! Please set " + "up environmental variables CLOUDFLARE_API_TOKEN and CLOUDFLARE_ACCOUNT_ID, " + "or CLOUDFLARE_EMAIL, CLOUDFLARE_API_KEY, and CLOUDFLARE_ACCOUNT_ID" + ) + + if account_id is not None and ret.account_id is not None and account_id != ret.account_id: + ret.logging.error( + f"The account id {ret.account_id} from provided credentials is different " + f"from the account id {account_id} found in the cache! Please change " + "your cache directory or create a new one!" + ) + raise RuntimeError( + f"Cloudflare login credentials do not match the account {account_id} in cache!" + ) + + ret.logging_handlers = handlers + return ret + + def update_cache(self, cache: Cache): + """Persist the account ID to the local cache.""" + if self._account_id: + cache.update_config( + val=self._account_id, keys=["cloudflare", "credentials", "account_id"] + ) + + def serialize(self) -> dict: + """Return a serializable dict of non-secret credential fields.""" + out = {} + if self._account_id: + out["account_id"] = self._account_id + return out + + +class CloudflareResources(Resources): + """ + Resources for Cloudflare Workers deployment. + """ + + def __init__(self): + """Initialize Cloudflare resources with no namespace ID assigned.""" + super().__init__(name="cloudflare") + self._namespace_id: Optional[str] = None + + @staticmethod + def typename() -> str: + """Return the canonical type name for this resources class.""" + return "Cloudflare.Resources" + + @property + def namespace_id(self) -> Optional[str]: + """KV namespace ID associated with this resource deployment.""" + return self._namespace_id + + @namespace_id.setter + def namespace_id(self, value: str): + """Set the KV namespace ID for this resource deployment.""" + self._namespace_id = value + + @staticmethod + def initialize(res: Resources, dct: dict): + """Populate a CloudflareResources instance from a config dictionary.""" + ret = cast(CloudflareResources, res) + super(CloudflareResources, CloudflareResources).initialize(ret, dct) + + if "namespace_id" in dct: + ret._namespace_id = dct["namespace_id"] + + return ret + + def serialize(self) -> dict: + """Return a serializable dict of Cloudflare resource fields.""" + out = {**super().serialize()} + if self._namespace_id: + out["namespace_id"] = self._namespace_id + return out + + def update_cache(self, cache: Cache): + """Persist resource IDs to the local cache.""" + super().update_cache(cache) + if self._namespace_id: + cache.update_config( + val=self._namespace_id, keys=["cloudflare", "resources", "namespace_id"] + ) + + @staticmethod + def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Resources: + """Load resources from cached or user-provided configuration.""" + ret = CloudflareResources() + cached_config = cache.get_config("cloudflare") + + # Load cached values + if cached_config and "resources" in cached_config: + CloudflareResources.initialize(ret, cached_config["resources"]) + ret.logging_handlers = handlers + ret.logging.info("Using cached resources for Cloudflare") + else: + # Check for new config + if "resources" in config: + CloudflareResources.initialize(ret, config["resources"]) + ret.logging_handlers = handlers + ret.logging.info( + "No cached resources for Cloudflare found, using user configuration." + ) + else: + CloudflareResources.initialize(ret, {}) + ret.logging_handlers = handlers + ret.logging.info("No resources for Cloudflare found, initialize!") + + return ret + + +class CloudflareConfig(Config): + """ + Configuration for Cloudflare Workers platform. + """ + + def __init__(self, credentials: CloudflareCredentials, resources: CloudflareResources): + """Initialize configuration with the given credentials and resources.""" + super().__init__(name="cloudflare") + self._credentials = credentials + self._resources = resources + + @staticmethod + def typename() -> str: + """Return the canonical type name for this configuration class.""" + return "Cloudflare.Config" + + @property + def credentials(self) -> CloudflareCredentials: + """Cloudflare API credentials for this configuration.""" + return self._credentials + + @property + def resources(self) -> CloudflareResources: + """Cloudflare resource identifiers for this deployment.""" + return self._resources + + @staticmethod + def initialize(cfg: Config, dct: dict): + """Apply region and other fields from a config dictionary to an existing instance.""" + config = cast(CloudflareConfig, cfg) + # Cloudflare Workers are globally distributed, no region needed + config._region = dct.get("region", "global") + + @staticmethod + def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Config: + """Build a CloudflareConfig from user config and cache, resolving credentials.""" + cached_config = cache.get_config("cloudflare") + credentials = cast( + CloudflareCredentials, CloudflareCredentials.deserialize(config, cache, handlers) + ) + resources = cast( + CloudflareResources, CloudflareResources.deserialize(config, cache, handlers) + ) + config_obj = CloudflareConfig(credentials, resources) + config_obj.logging_handlers = handlers + + # Load cached values + if cached_config: + config_obj.logging.info("Using cached config for Cloudflare") + CloudflareConfig.initialize(config_obj, cached_config) + else: + config_obj.logging.info("Using user-provided config for Cloudflare") + CloudflareConfig.initialize(config_obj, config) + + resources.region = config_obj.region + return config_obj + + def update_cache(self, cache: Cache): + """Persist region, credentials, and resources to the local cache.""" + cache.update_config(val=self.region, keys=["cloudflare", "region"]) + self.credentials.update_cache(cache) + self.resources.update_cache(cache) + + def serialize(self) -> dict: + """Return a serializable dict of the full Cloudflare configuration.""" + out = { + "name": "cloudflare", + "region": self._region, + "credentials": self._credentials.serialize(), + "resources": self._resources.serialize(), + } + return out diff --git a/sebs/cloudflare/containers.py b/sebs/cloudflare/containers.py new file mode 100644 index 00000000..d4e67b29 --- /dev/null +++ b/sebs/cloudflare/containers.py @@ -0,0 +1,388 @@ +""" +Cloudflare Container Workers deployment implementation. + +Handles packaging, Docker image building, and deployment of containerized +Cloudflare Workers using @cloudflare/containers. +""" + +import os +import shutil +import json +import subprocess + +import time +from importlib.resources import files + +try: + import tomllib # type: ignore[import-not-found] # Python 3.11+ +except ImportError: + import tomli as tomllib # type: ignore[no-redef, import-not-found] # Fallback for older Python +try: + import tomli_w +except ImportError: + import toml as tomli_w # type: ignore[no-redef, import-untyped] +from typing import Optional, Tuple + + +from sebs.benchmark import Benchmark +from sebs.cloudflare.cli import CloudflareCLI +from sebs.utils import get_resource_path + + +class CloudflareContainersDeployment: + """Handles Cloudflare container worker deployment operations.""" + + def __init__(self, logging, system_config, docker_client, system_resources): + """ + Initialize CloudflareContainersDeployment. + + Args: + logging: Logger instance + system_config: System configuration + docker_client: Docker client instance + system_resources: System resources manager + """ + self.logging = logging + self.system_config = system_config + self.docker_client = docker_client + self.system_resources = system_resources + self._base_image: Optional[str] = None + self._cli: Optional[CloudflareCLI] = None + + def _get_cli(self) -> CloudflareCLI: + """Get or initialize the Cloudflare CLI container.""" + if self._cli is None: + self._cli = CloudflareCLI.get_instance(self.system_config, self.docker_client) + # Verify wrangler is available + version = self._cli.check_wrangler_version() + self.logging.info(f"Cloudflare CLI container ready: {version}") + return self._cli + + def generate_wrangler_toml( + self, + worker_name: str, + package_dir: str, + language: str, + account_id: str, + benchmark_name: Optional[str] = None, + code_package: Optional[Benchmark] = None, + container_uri: Optional[str] = None, + language_variant: str = "default", + ) -> str: + """ + Generate a wrangler.toml configuration file for container workers. + + Args: + worker_name: Name of the worker + package_dir: Directory containing the worker code + language: Programming language (nodejs or python) + account_id: Cloudflare account ID + benchmark_name: Optional benchmark name for R2 file path prefix + code_package: Optional benchmark package for nosql configuration + container_uri: Container image URI/tag + + Returns: + Path to the generated wrangler.toml file + """ + # Load template + template_path = ( + files("sebs.cloudflare").joinpath("templates").joinpath("wrangler-container.toml") + ) + with template_path.open("rb") as f: + config = tomllib.load(f) + + # Update basic configuration + config["name"] = worker_name + config["account_id"] = account_id + + if container_uri and container_uri.startswith("registry.cloudflare.com"): + # Pre-built image already pushed to Cloudflare registry — point wrangler + # at it directly so it skips the Docker build step entirely. + config["containers"][0]["image"] = container_uri + else: + # Fallback: let wrangler build from the local Dockerfile. + if self._base_image: + config["containers"][0]["build_args"] = {"BASE_IMAGE": self._base_image} + + # Update container configuration with instance type if needed + if benchmark_name and ( + "411.image-recognition" in benchmark_name + or "311.compression" in benchmark_name + or "504.dna-visualisation" in benchmark_name + ): + self.logging.warning("Using standard-4 instance type for high resource benchmark") + config["containers"][0]["instance_type"] = "standard-4" + + # Add nosql KV namespace bindings if benchmark uses them + if code_package and code_package.uses_nosql: + # Get registered nosql tables for this benchmark + nosql_storage = self.system_resources.get_nosql_storage() + benchmark_for_nosql = benchmark_name or code_package.benchmark + if nosql_storage.retrieve_cache(benchmark_for_nosql): + nosql_tables = nosql_storage.get_tables(benchmark_for_nosql) + if nosql_tables: + config["kv_namespaces"] = config.get("kv_namespaces", []) + for table_name, namespace_id in nosql_tables.items(): + config["kv_namespaces"].append( + { + "binding": table_name, + "id": namespace_id, + } + ) + + # Add environment variables + if benchmark_name or (code_package and code_package.uses_nosql): + config["vars"] = {} + if benchmark_name: + config["vars"]["BENCHMARK_NAME"] = benchmark_name + if code_package and code_package.uses_nosql: + config["vars"]["NOSQL_STORAGE_DATABASE"] = "kvstore" + + # Add R2 bucket binding + from sebs.faas.config import Resources + + storage = self.system_resources.get_storage() + bucket_name = storage.get_bucket(Resources.StorageBucketType.BENCHMARKS) + if not bucket_name: + raise RuntimeError( + "R2 bucket binding not configured: benchmarks bucket name is empty. " + "Benchmarks requiring file access will not work properly." + ) + config["r2_buckets"] = [{"binding": "R2", "bucket_name": bucket_name}] + self.logging.info(f"R2 bucket '{bucket_name}' will be bound to worker as 'R2'") + + # Write wrangler.toml to package directory + toml_path = os.path.join(package_dir, "wrangler.toml") + try: + # Try tomli_w (writes binary) + with open(toml_path, "wb") as f: + tomli_w.dump(config, f) + except TypeError: + # Fallback to toml library (writes text) + with open(toml_path, "w") as f: + f.write(tomli_w.dumps(config)) + + self.logging.info(f"Generated wrangler.toml at {toml_path}") + return toml_path + + def package_code( + self, + directory: str, + language_name: str, + language_version: str, + architecture: str, + benchmark: str, + ) -> Tuple[str, int, str]: + """ + Package code for Cloudflare container worker deployment. + + Builds a Docker image and returns the image tag for deployment. + + Args: + directory: Path to the code directory + language_name: Programming language name + language_version: Programming language version + architecture: Target architecture + benchmark: Benchmark name + + Returns: + Tuple of (package_path, package_size, container_uri) + """ + self.logging.info(f"Packaging container for {language_name} {language_version}") + + # Get wrapper directory for container files + wrapper_base = str(get_resource_path("benchmarks", "wrappers", "cloudflare")) + wrapper_container_dir = os.path.join(wrapper_base, language_name, "container") + + if not os.path.exists(wrapper_container_dir): + raise RuntimeError(f"Container wrapper directory not found: {wrapper_container_dir}") + + # Overwrite the wrapper files staged by add_deployment_files() with the + # container-specific versions before doing anything else. + if language_name == "python": + for f in ["handler.py", "storage.py", "nosql.py"]: + src = os.path.join(wrapper_container_dir, f) + if os.path.exists(src): + shutil.copy2(src, os.path.join(directory, f)) + elif language_name == "nodejs": + # add_deployment_files() stages the ESM workers variants of + # storage.js and nosql.js; replace them with the CJS container + # versions so the Node.js HTTP server can require() them. + for f in ["storage.js", "nosql.js"]: + src = os.path.join(wrapper_container_dir, f) + if os.path.exists(src): + shutil.copy2(src, os.path.join(directory, f)) + self.logging.info(f"Replaced {f} with container-specific version") + + # For Python: move benchmark code into function/ so that relative imports + # work natively, matching the workers and AWS layout. + # handler.py and requirements.txt* stay at the top level. + if language_name == "python": + func_dir = os.path.join(directory, "function") + os.makedirs(func_dir, exist_ok=True) + open(os.path.join(func_dir, "__init__.py"), "w").close() + dont_move = {"function", "handler.py"} + for item in os.listdir(directory): + if item in dont_move or item.startswith("requirements"): + continue + shutil.move(os.path.join(directory, item), os.path.join(func_dir, item)) + self.logging.info(f"Moved {item} into function/ package") + + # Copy Dockerfile.function from dockerfiles/cloudflare/{language}/ + dockerfile_src = str( + get_resource_path("dockerfiles", "cloudflare", language_name, "Dockerfile.function") + ) + dockerfile_dest = os.path.join(directory, "Dockerfile") + if os.path.exists(dockerfile_src): + # Get base image from systems.json for container deployments + container_images = self.system_config.benchmark_container_images( + "cloudflare", language_name, architecture + ) + base_image = container_images.get(language_version) + if not base_image: + raise RuntimeError( + f"No container base image found in systems.json for " + f"{language_name} {language_version} on {architecture}" + ) + self._base_image = base_image + + shutil.copy2(dockerfile_src, dockerfile_dest) + self.logging.info(f"Copied Dockerfile from {dockerfile_src}") + + # For nodejs, copy the container handler (no function/ subdir for nodejs). + if language_name == "nodejs": + handler_file = "handler.js" + shutil.copy2( + os.path.join(wrapper_container_dir, handler_file), + os.path.join(directory, handler_file), + ) + self.logging.info(f"Copied container {handler_file}") + + nodejs_wrapper_dir = os.path.join(wrapper_base, "nodejs", "container") + worker_js_src = os.path.join(nodejs_wrapper_dir, "worker.js") + if os.path.exists(worker_js_src): + shutil.copy2(worker_js_src, os.path.join(directory, "worker.js")) + self.logging.info("Copied worker.js orchestration file from nodejs/container") + + # Copy init.sh if the benchmark needs it (e.g. video-processing downloads ffmpeg) + from sebs.utils import find_benchmark + + benchmark_path = find_benchmark(benchmark, "benchmarks") + if benchmark_path: + for path in [benchmark_path, os.path.join(benchmark_path, language_name)]: + init_sh = os.path.join(path, "init.sh") + if os.path.exists(init_sh): + shutil.copy2(init_sh, os.path.join(directory, "init.sh")) + self.logging.info(f"Copied init.sh from {path}") + break + + # ALL containers need @cloudflare/containers for worker.js orchestration. + # For nodejs benchmarks, preserve the existing package.json and add the + # dependency. For Python, create a minimal package.json with just the dep. + package_json_path = os.path.join(directory, "package.json") + if language_name == "nodejs": + if not os.path.exists(package_json_path): + raise RuntimeError( + f"package.json not found at {package_json_path} " + f"for nodejs benchmark '{benchmark}'" + ) + with open(package_json_path, "r") as pkg_r: + package_json = json.load(pkg_r) + else: + package_json = {} + package_json.setdefault("dependencies", {})["@cloudflare/containers"] = "*" + with open(package_json_path, "w") as pkg_w: + json.dump(package_json, pkg_w, indent=2) + + # For Python containers, promote the versioned requirements.txt to requirements.txt + if language_name == "python": + requirements_file = os.path.join(directory, "requirements.txt") + versioned_requirements = os.path.join(directory, f"requirements.txt.{language_version}") + if os.path.exists(versioned_requirements): + shutil.copy2(versioned_requirements, requirements_file) + self.logging.info(f"Copied requirements.txt.{language_version} to requirements.txt") + elif not os.path.exists(requirements_file): + open(requirements_file, "w").close() + self.logging.info("Created empty requirements.txt") + + # Build the image locally. cache.py requires docker_client.images.get() to + # succeed for container deployments, and the local image is what we push to + # Cloudflare's registry during deploy (wrangler containers push). + image_tag = self._build_container_image_local( + directory, benchmark, language_name, language_version + ) + + # Calculate package size (approximate, as it's a source directory) + total_size = 0 + for dirpath, dirnames, filenames in os.walk(directory): + for filename in filenames: + filepath = os.path.join(dirpath, filename) + total_size += os.path.getsize(filepath) + + self.logging.info(f"Container package prepared (image tag: {image_tag})") + + return (directory, total_size, image_tag) + + def _build_container_image_local( + self, + directory: str, + benchmark: str, + language_name: str, + language_version: str, + ) -> str: + """ + Build the container image locally. + + The local image is pushed to Cloudflare's registry via + `wrangler containers push` during deployment, so wrangler deploy can + reference it directly without rebuilding from the Dockerfile. + + Returns the local image tag. + """ + # Generate image tag + image_name = ( + f"{benchmark.replace('.', '-')}-{language_name}-{language_version.replace('.', '')}" + ) + version_tag = time.strftime("%Y%m%d-%H%M%S") + image_tag = f"{image_name}:{version_tag}" + + self.logging.info(f"Building container image {image_tag} for linux/amd64...") + + result = subprocess.run( + [ + "docker", + "buildx", + "build", + "--platform", + "linux/amd64", + "--load", + "--no-cache", + "-t", + image_tag, + directory, + ], + capture_output=True, + text=True, + ) + if result.returncode != 0: + self.logging.error(result.stderr) + raise RuntimeError(f"Docker build failed for {image_tag}:\n{result.stderr}") + + self.logging.info(f"Container image built: {image_tag}") + return image_tag + + @staticmethod + def _container_name_from_worker(worker_name: str) -> str: + """Return the Cloudflare container name for a given worker name. + + Cloudflare appends the Durable Object class name (lowercased) to the worker + name to form the container name, e.g.: + worker: container-311-compression-nodejs-18 + container: container-311-compression-nodejs-18-containerworker + """ + return f"{worker_name}-containerworker" + + def shutdown(self): + """Drop the local CLI reference. The shared container is owned by CloudflareCLI; + call CloudflareCLI.shutdown_instance() once at process teardown.""" + self._cli = None diff --git a/sebs/cloudflare/function.py b/sebs/cloudflare/function.py new file mode 100644 index 00000000..203aa341 --- /dev/null +++ b/sebs/cloudflare/function.py @@ -0,0 +1,66 @@ +"""Cloudflare Workers function and trigger definitions.""" + +from typing import Optional + +from sebs.faas.function import Function, FunctionConfig + + +class CloudflareWorker(Function): + """ + Cloudflare Workers function implementation. + + A Cloudflare Worker is a serverless function that runs on Cloudflare's edge network. + """ + + def __init__( + self, + name: str, + benchmark: str, + script_id: str, + code_package_hash: str, + runtime: str, + cfg: FunctionConfig, + account_id: Optional[str] = None, + ): + """Create a CloudflareWorker with the given script ID, runtime, and account.""" + super().__init__(benchmark, name, code_package_hash, cfg) + self.script_id = script_id + self.runtime = runtime + self.account_id = account_id + + @staticmethod + def typename() -> str: + """Return the canonical type name for this function class.""" + return "Cloudflare.Worker" + + def serialize(self) -> dict: + """Return a serializable dict including script ID, runtime, and account.""" + return { + **super().serialize(), + "script_id": self.script_id, + "runtime": self.runtime, + "account_id": self.account_id, + } + + @staticmethod + def deserialize(cached_config: dict) -> "CloudflareWorker": + """Reconstruct a CloudflareWorker from a cached configuration dict.""" + from sebs.cloudflare.triggers import HTTPTrigger + + cfg = FunctionConfig.deserialize(cached_config["config"]) + ret = CloudflareWorker( + cached_config["name"], + cached_config["benchmark"], + cached_config["script_id"], + cached_config["hash"], + cached_config["runtime"], + cfg, + cached_config.get("account_id"), + ) + + for trigger in cached_config["triggers"]: + trigger_type = HTTPTrigger if trigger["type"] == HTTPTrigger.typename() else None + assert trigger_type, "Unknown trigger type {}".format(trigger["type"]) + ret.add_trigger(trigger_type.deserialize(trigger)) + + return ret diff --git a/sebs/cloudflare/kvstore.py b/sebs/cloudflare/kvstore.py new file mode 100644 index 00000000..163df968 --- /dev/null +++ b/sebs/cloudflare/kvstore.py @@ -0,0 +1,441 @@ +"""Cloudflare KV namespace-backed NoSQL storage implementation.""" + +import hashlib +import json +import re +from collections import defaultdict +from typing import Dict, List, Optional, Tuple +from urllib.parse import quote + +import requests + +from sebs.cache import Cache +from sebs.cloudflare.config import CloudflareCredentials +from sebs.faas.config import Resources +from sebs.faas.nosql import NoSQLStorage + + +class KVStore(NoSQLStorage): + """ + Cloudflare KV-backed NoSQL storage for SeBS. + + Cloudflare KV is a flat key-value store: there are no tables, schemas, or + secondary indexes. The SeBS NoSQL abstraction (modelled after DynamoDB / + Cosmos DB / Datastore) is therefore layered on top of KV as follows. + + Table -> namespace mapping + -------------------------- + Each (benchmark, logical table) pair is mapped to exactly one KV namespace + -- the coarsest isolation unit KV offers. Namespaces are titled + + sebs-nosql--- + + with each component sanitized to ``[A-Za-z0-9_-]`` and a SHA1 suffix + appended when the title would exceed Cloudflare's 100-character limit + (see ``_namespace_title``). A one-namespace-per-table layout is used + instead of packing multiple tables into a shared namespace because: + + * Workers bind namespaces by id, so one binding per table is the natural + way to expose the logical table to the benchmark code. + * ``cleanup_tables`` / ``remove_table`` can drop a whole table by deleting + its namespace -- KV has no bulk-delete-by-prefix primitive. + * Key collisions between benchmarks or logical tables are impossible. + + Key mapping + ----------- + Items are stored as JSON values under composite keys: + + # (when a secondary key exists) + (otherwise) + + The primary and secondary key fields are also written back into the JSON + value so that clients reading an item do not have to re-parse the key. + + Secondary-key indices + --------------------- + KV exposes a ``list`` API, but from inside a Worker it is paginated, + eventually consistent, and scales with the total namespace size -- not + with the number of items under a given primary key. DynamoDB-style query + patterns ("give me every item with primary key = X") would therefore be + prohibitively expensive if implemented via ``list``. + + To support those queries with point reads only, ``write_to_table`` + additionally maintains a per-primary-key index entry: + + __sebs_idx__ -> JSON array of secondary-key values + + A query then becomes one ``GET`` for the index followed by one ``GET`` per + secondary value. The index is only written when a secondary key is + supplied; tables without a secondary key do not need it. The matching + read path lives in ``benchmarks/wrappers/cloudflare/*/nosql.*``. + """ + + NAMESPACE_ID_PATTERN = re.compile(r"^[a-fA-F0-9]{32}$") + + @staticmethod + def typename() -> str: + """Return the canonical type name for this storage class.""" + return "Cloudflare.KVStore" + + @staticmethod + def deployment_name() -> str: + """Return the deployment platform name.""" + return "cloudflare" + + def __init__( + self, + region: str, + cache_client: Cache, + resources: Resources, + credentials: CloudflareCredentials, + ): + """Initialize KV storage with Cloudflare credentials.""" + super().__init__(region, cache_client, resources) + self._credentials = credentials + # benchmark -> logical table name -> KV namespace id + self._tables: Dict[str, Dict[str, str]] = defaultdict(dict) + + def _account_id(self) -> str: + """Return the account ID, raising if not configured.""" + account_id = self._credentials.account_id + if not account_id: + raise RuntimeError("Cloudflare account ID is required for KV operations") + return account_id + + def _kv_api_base(self) -> str: + """Return the base URL for the Cloudflare KV namespace API.""" + account = self._account_id() + return f"https://api.cloudflare.com/client/v4/accounts/{account}/storage/kv/namespaces" + + def _get_auth_headers(self, content_type: str = "application/json") -> dict[str, str]: + """Get authentication headers for Cloudflare API requests.""" + if self._credentials.api_token: + return { + "Authorization": f"Bearer {self._credentials.api_token}", + "Content-Type": content_type, + } + elif self._credentials.email and self._credentials.api_key: + return { + "X-Auth-Email": self._credentials.email, + "X-Auth-Key": self._credentials.api_key, + "Content-Type": content_type, + } + else: + raise RuntimeError("Invalid Cloudflare credentials configuration") + + @classmethod + def _is_namespace_id(cls, value: str) -> bool: + """Return True if value matches the 32-character hex namespace ID pattern.""" + return bool(cls.NAMESPACE_ID_PATTERN.fullmatch(value)) + + def _resource_id(self) -> str: + """Return the resource prefix used in namespace titles.""" + if self._cloud_resources.has_resources_id: + return self._cloud_resources.resources_id + return "default" + + @staticmethod + def _sanitize_component(value: str) -> str: + """Replace characters not allowed in KV namespace titles with hyphens.""" + sanitized = re.sub(r"[^A-Za-z0-9_-]", "-", value) + return sanitized.strip("-") or "default" + + def _namespace_title(self, benchmark: str, table: str) -> str: + """Build a deterministic KV namespace title for the given benchmark and table.""" + title = ( + f"sebs-nosql-{self._sanitize_component(self._resource_id())}-" + f"{self._sanitize_component(benchmark)}-{self._sanitize_component(table)}" + ) + # KV namespace title has length constraints; keep a deterministic suffix if truncated. + max_len = 100 + if len(title) > max_len: + digest = hashlib.sha1(title.encode("utf-8")).hexdigest()[:12] + title = f"{title[: max_len - 13]}-{digest}" + return title + + def _list_namespaces(self) -> List[dict]: + """Fetch all KV namespaces for the account, following pagination.""" + namespaces: List[dict] = [] + page = 1 + per_page = 100 + + while True: + response = requests.get( + self._kv_api_base(), + params={"page": page, "per_page": per_page}, + headers=self._get_auth_headers(), + ) + response.raise_for_status() + payload = response.json() + + if not payload.get("success"): + raise RuntimeError(f"Failed to list KV namespaces: {payload.get('errors')}") + + page_items = payload.get("result", []) + namespaces.extend(page_items) + + page_info = payload.get("result_info", {}) or {} + total_pages = int(page_info.get("total_pages", 1)) + if page >= total_pages: + break + page += 1 + + return namespaces + + def _find_namespace_id_by_title(self, title: str) -> Optional[str]: + """Return the namespace ID whose title matches, or None if not found.""" + for namespace in self._list_namespaces(): + if namespace.get("title") == title: + return namespace.get("id") + return None + + def _delete_namespace(self, namespace_id: str) -> None: + """Delete the KV namespace with the given ID, ignoring 404 responses.""" + response = requests.delete( + f"{self._kv_api_base()}/{namespace_id}", + headers=self._get_auth_headers(), + ) + if response.status_code == 404: + return + response.raise_for_status() + + if response.content: + payload = response.json() + if not payload.get("success"): + raise RuntimeError( + f"Failed to delete KV namespace {namespace_id}: {payload.get('errors')}" + ) + + @staticmethod + def _compose_key( + primary_key: Tuple[str, str], secondary_key: Optional[Tuple[str, str]] = None + ) -> str: + """Build the KV storage key from primary and optional secondary key tuples.""" + if secondary_key is None: + return str(primary_key[1]) + return f"{primary_key[1]}#{secondary_key[1]}" + + @staticmethod + def _index_key(primary_value: str) -> str: + """Return the KV key used to store the secondary-key index for a primary value.""" + return f"__sebs_idx__{primary_value}" + + def _read_index(self, namespace_id: str, primary_value: str) -> List[str]: + """Fetch the list of secondary-key values stored in the index for primary_value.""" + index_key = quote(self._index_key(primary_value), safe="") + response = requests.get( + f"{self._kv_api_base()}/{namespace_id}/values/{index_key}", + headers=self._get_auth_headers(), + ) + if response.status_code == 404: + return [] + response.raise_for_status() + + raw = response.text + if not raw: + return [] + + try: + parsed = json.loads(raw) + except Exception: + return [] + + if not isinstance(parsed, list): + return [] + + return [str(v) for v in parsed] + + def _write_index(self, namespace_id: str, primary_value: str, values: List[str]) -> None: + """Persist the secondary-key index for primary_value to KV storage.""" + index_key = quote(self._index_key(primary_value), safe="") + response = requests.put( + f"{self._kv_api_base()}/{namespace_id}/values/{index_key}", + data=json.dumps(values, separators=(",", ":")).encode("utf-8"), + headers=self._get_auth_headers(content_type="text/plain;charset=UTF-8"), + ) + response.raise_for_status() + + def _get_tables(self) -> Dict[str, List[str]]: + """Return all cached table names grouped by benchmark.""" + tables = self.cache_client.get_nosql_configs(self.deployment_name()) + return {benchmark: list(v.values()) for benchmark, v in tables.items()} + + def get_tables(self, benchmark: str) -> Dict[str, str]: + """Return the table-name-to-namespace-ID mapping for the given benchmark.""" + return self._tables[benchmark] + + def _get_table_name(self, benchmark: str, table: str) -> Optional[str]: + """Return the namespace ID for the given benchmark and logical table name, or None.""" + if benchmark not in self._tables: + return None + if table not in self._tables[benchmark]: + return None + return self._tables[benchmark][table] + + def retrieve_cache(self, benchmark: str) -> bool: + """Load cached KV namespace mappings for a benchmark; return True if found.""" + if benchmark in self._tables: + return True + + cached_storage = self.cache_client.get_nosql_config(self.deployment_name(), benchmark) + if cached_storage is None: + return False + + cached_tables = cached_storage.get("tables", {}) + if not isinstance(cached_tables, dict): + return False + + # Ignore legacy Durable Objects cache entries (table -> table name). + if cached_tables and not all( + isinstance(v, str) and self._is_namespace_id(v) for v in cached_tables.values() + ): + self.logging.warning( + f"Ignoring legacy/non-KV cache for benchmark {benchmark}; creating KV namespaces." + ) + return False + + self._tables[benchmark] = cached_tables + self.logging.info(f"Retrieved cached KV namespace mappings for {benchmark}") + return True + + def update_cache(self, benchmark: str): + """Persist the current KV namespace mappings for a benchmark to the cache.""" + self.cache_client.update_nosql( + self.deployment_name(), + benchmark, + {"tables": self._tables[benchmark]}, + ) + self.logging.info(f"Updated cache for KV namespace mappings for {benchmark}") + + def create_table( + self, benchmark: str, name: str, primary_key: str, secondary_key: Optional[str] = None + ) -> str: + """Create or reuse a KV namespace for the given benchmark and table name.""" + # Unused in KV namespace allocation, kept for interface compatibility + _ = primary_key, secondary_key + + existing = self._get_table_name(benchmark, name) + if existing: + return existing + + namespace_title = self._namespace_title(benchmark, name) + + existing_namespace_id = self._find_namespace_id_by_title(namespace_title) + if existing_namespace_id: + self._tables[benchmark][name] = existing_namespace_id + self.logging.info( + f"Reusing existing KV namespace '{namespace_title}' ({existing_namespace_id})" + ) + return existing_namespace_id + + response = requests.post( + self._kv_api_base(), + json={"title": namespace_title}, + headers=self._get_auth_headers(), + ) + + # A concurrent run may have created it after our lookup. + if response.status_code >= 400: + existing_namespace_id = self._find_namespace_id_by_title(namespace_title) + if existing_namespace_id: + self._tables[benchmark][name] = existing_namespace_id + return existing_namespace_id + response.raise_for_status() + + payload = response.json() + if not payload.get("success"): + raise RuntimeError( + f"Failed to create KV namespace {namespace_title}: {payload.get('errors')}" + ) + + namespace_id = payload.get("result", {}).get("id") + if not namespace_id: + raise RuntimeError( + f"Cloudflare KV API did not return namespace id for {namespace_title}" + ) + + self._tables[benchmark][name] = namespace_id + self.logging.info( + f"Created KV namespace '{namespace_title}' ({namespace_id}) for benchmark {benchmark}" + ) + return namespace_id + + def write_to_table( + self, + benchmark: str, + table: str, + data: dict, + primary_key: Tuple[str, str], + secondary_key: Optional[Tuple[str, str]] = None, + ): + """Write a record to the KV namespace, updating the secondary-key index if needed.""" + namespace_id = self._get_table_name(benchmark, table) + if not namespace_id: + raise ValueError(f"Table {table} not found for benchmark {benchmark}") + + record = dict(data) + for key in (primary_key, secondary_key): + if key is not None: + record[key[0]] = key[1] + + composite_key = self._compose_key(primary_key, secondary_key) + value = json.dumps(record, separators=(",", ":"), default=str) + + response = requests.put( + f"{self._kv_api_base()}/{namespace_id}/values/{quote(composite_key, safe='')}", + data=value.encode("utf-8"), + headers=self._get_auth_headers(content_type="text/plain;charset=UTF-8"), + ) + response.raise_for_status() + + if secondary_key is not None: + primary_value = str(primary_key[1]) + secondary_value = str(secondary_key[1]) + index_values = self._read_index(namespace_id, primary_value) + if secondary_value not in index_values: + index_values.append(secondary_value) + self._write_index(namespace_id, primary_value, index_values) + + def clear_table(self, name: str) -> str: + """Log a warning; KV does not support bulk clear — use remove_table + create_table.""" + self.logging.warning( + "clear_table is not implemented for Cloudflare KV. " + "Use remove_table() + create_table() instead." + ) + return name + + def remove_table(self, name: str) -> str: + """Delete the KV namespace identified by logical name or namespace ID.""" + benchmark_to_modify: Optional[str] = None + logical_name_to_delete: Optional[str] = None + namespace_id_to_delete: Optional[str] = None + + for benchmark, tables in list(self._tables.items()): + for logical_name, namespace_id in list(tables.items()): + if name == logical_name or name == namespace_id: + benchmark_to_modify = benchmark + logical_name_to_delete = logical_name + namespace_id_to_delete = namespace_id + break + if namespace_id_to_delete: + break + + # Also allow direct removal by namespace id when not present in local mapping. + if namespace_id_to_delete is None and self._is_namespace_id(name): + namespace_id_to_delete = name + + if namespace_id_to_delete is None: + self.logging.warning(f"KV table '{name}' not found in local mapping.") + return name + + self._delete_namespace(namespace_id_to_delete) + + if benchmark_to_modify is not None and logical_name_to_delete is not None: + del self._tables[benchmark_to_modify][logical_name_to_delete] + + self.logging.info(f"Removed KV namespace {namespace_id_to_delete}") + return name + + def envs(self) -> dict: + """Return environment variables required by benchmarks to access KV storage.""" + return {"NOSQL_STORAGE_DATABASE": "kvstore"} diff --git a/sebs/cloudflare/pyodide_packages.py b/sebs/cloudflare/pyodide_packages.py new file mode 100644 index 00000000..d4e89320 --- /dev/null +++ b/sebs/cloudflare/pyodide_packages.py @@ -0,0 +1,304 @@ +""" +Pyodide packages supported by Cloudflare Python Workers. + +See https://developers.cloudflare.com/workers/languages/python/packages/ for the +authoritative list. Names use the canonical PyPI distribution casing so the +generated pyproject.toml mirrors what pyodide publishes. +""" + +from typing import FrozenSet, Optional + + +SUPPORTED_PYODIDE_PACKAGES: FrozenSet[str] = frozenset( + { + "affine", + "aiohappyeyeballs", + "aiohttp", + "aiosignal", + "altair", + "annotated-types", + "anyio", + "apsw", + "argon2-cffi", + "argon2-cffi-bindings", + "asciitree", + "astropy", + "astropy_iers_data", + "asttokens", + "async-timeout", + "atomicwrites", + "attrs", + "audioop-lts", + "autograd", + "awkward-cpp", + "b2d", + "bcrypt", + "beautifulsoup4", + "bilby.cython", + "biopython", + "bitarray", + "bitstring", + "bleach", + "blosc2", + "bokeh", + "boost-histogram", + "brotli", + "cachetools", + "casadi", + "cbor-diag", + "certifi", + "cffi", + "cffi_example", + "cftime", + "charset-normalizer", + "clarabel", + "click", + "cligj", + "clingo", + "cloudpickle", + "cmyt", + "cobs", + "colorspacious", + "contourpy", + "coolprop", + "coverage", + "cramjam", + "crc32c", + "cryptography", + "css-inline", + "cssselect", + "cvxpy-base", + "cycler", + "cysignals", + "cytoolz", + "decorator", + "demes", + "deprecation", + "diskcache", + "distlib", + "distro", + "docutils", + "donfig", + "ewah_bool_utils", + "exceptiongroup", + "executing", + "fastapi", + "fastcan", + "fastparquet", + "fiona", + "fonttools", + "freesasa", + "frozenlist", + "fsspec", + "future", + "galpy", + "gmpy2", + "gsw", + "h11", + "h3", + "h5py", + "highspy", + "html5lib", + "httpcore", + "httpx", + "idna", + "igraph", + "imageio", + "imgui-bundle", + "iminuit", + "iniconfig", + "inspice", + "ipython", + "jedi", + "Jinja2", + "jiter", + "joblib", + "jsonpatch", + "jsonpointer", + "jsonschema", + "jsonschema_specifications", + "kiwisolver", + "lakers-python", + "lazy_loader", + "lazy-object-proxy", + "libcst", + "lightgbm", + "logbook", + "lxml", + "lz4", + "MarkupSafe", + "matplotlib", + "matplotlib-inline", + "memory-allocator", + "micropip", + "mmh3", + "more-itertools", + "mpmath", + "msgpack", + "msgspec", + "msprime", + "multidict", + "munch", + "mypy", + "narwhals", + "ndindex", + "netcdf4", + "networkx", + "newick", + "nh3", + "nlopt", + "nltk", + "numcodecs", + "numpy", + "openai", + "opencv-python", + "optlang", + "orjson", + "packaging", + "pandas", + "parso", + "patsy", + "pcodec", + "peewee", + "pi-heif", + "Pillow", + "pillow-heif", + "pkgconfig", + "platformdirs", + "pluggy", + "ply", + "pplpy", + "primecountpy", + "prompt_toolkit", + "propcache", + "protobuf", + "pure-eval", + "py", + "pyclipper", + "pycparser", + "pycryptodome", + "pydantic", + "pydantic_core", + "pyerfa", + "pygame-ce", + "Pygments", + "pyheif", + "pyiceberg", + "pyinstrument", + "pylimer-tools", + "PyMuPDF", + "pynacl", + "pyodide-http", + "pyodide-unix-timezones", + "pyparsing", + "pyrsistent", + "pysam", + "pyshp", + "pytaglib", + "pytest", + "pytest-asyncio", + "pytest-benchmark", + "pytest_httpx", + "python-calamine", + "python-dateutil", + "python-flint", + "python-magic", + "python-sat", + "python-solvespace", + "pytz", + "pywavelets", + "pyxel", + "pyxirr", + "pyyaml", + "rasterio", + "rateslib", + "rebound", + "reboundx", + "referencing", + "regex", + "requests", + "retrying", + "rich", + "river", + "RobotRaconteur", + "rpds-py", + "ruamel.yaml", + "rustworkx", + "scikit-image", + "scikit-learn", + "scipy", + "screed", + "setuptools", + "shapely", + "simplejson", + "sisl", + "six", + "smart-open", + "sniffio", + "sortedcontainers", + "soundfile", + "soupsieve", + "sourmash", + "soxr", + "sparseqr", + "sqlalchemy", + "stack-data", + "starlette", + "statsmodels", + "strictyaml", + "svgwrite", + "swiglpk", + "sympy", + "tblib", + "termcolor", + "texttable", + "texture2ddecoder", + "threadpoolctl", + "tiktoken", + "tomli", + "tomli-w", + "toolz", + "tqdm", + "traitlets", + "traits", + "tree-sitter", + "tree-sitter-go", + "tree-sitter-java", + "tree-sitter-python", + "tskit", + "typing-extensions", + "tzdata", + "ujson", + "uncertainties", + "unyt", + "urllib3", + "vega-datasets", + "vrplib", + "wcwidth", + "webencodings", + "wordcloud", + "wrapt", + "xarray", + "xgboost", + "xlrd", + "xxhash", + "xyzservices", + "yarl", + "yt", + "zengl", + "zfpy", + "zstandard", + } +) + + +_CANONICAL_BY_LOWER = {name.lower(): name for name in SUPPORTED_PYODIDE_PACKAGES} + + +def get_canonical_pyodide_name(name: str) -> Optional[str]: + """Return the canonical Pyodide package name for ``name`` (O(1) lookup). + + Matching is case-insensitive. Returns ``None`` if the package is not + supported by the Cloudflare Python Workers runtime. + """ + return _CANONICAL_BY_LOWER.get(name.lower()) diff --git a/sebs/cloudflare/r2.py b/sebs/cloudflare/r2.py new file mode 100644 index 00000000..4e0d6579 --- /dev/null +++ b/sebs/cloudflare/r2.py @@ -0,0 +1,380 @@ +"""Cloudflare R2 object storage implementation.""" + +import os + +import requests +from sebs.cloudflare.config import CloudflareCredentials +from sebs.faas.storage import PersistentStorage +from sebs.faas.config import Resources +from sebs.cache import Cache + +from typing import List, Optional + + +class R2(PersistentStorage): + """Cloudflare R2 object storage backend for SeBS benchmarks.""" + + @staticmethod + def typename() -> str: + """Return the canonical type name for this storage class.""" + return "Cloudflare.R2" + + @staticmethod + def deployment_name() -> str: + """Return the deployment platform name.""" + return "cloudflare" + + @property + def replace_existing(self) -> bool: + """Whether existing objects should be overwritten on upload.""" + return self._replace_existing + + @replace_existing.setter + def replace_existing(self, val: bool): + """Set whether existing objects should be overwritten on upload.""" + self._replace_existing = val + + def __init__( + self, + region: str, + cache_client: Cache, + resources: Resources, + replace_existing: bool, + credentials: CloudflareCredentials, + ): + """Initialize R2 storage with Cloudflare credentials.""" + super().__init__(region, cache_client, resources, replace_existing) + self._credentials = credentials + self._s3_client = None + + def _get_auth_headers(self) -> dict[str, str]: + """Get authentication headers for Cloudflare API requests.""" + if self._credentials.api_token: + return { + "Authorization": f"Bearer {self._credentials.api_token}", + "Content-Type": "application/json", + } + elif self._credentials.email and self._credentials.api_key: + return { + "X-Auth-Email": self._credentials.email, + "X-Auth-Key": self._credentials.api_key, + "Content-Type": "application/json", + } + else: + raise RuntimeError("Invalid Cloudflare credentials configuration") + + def _get_s3_client(self): + """ + Get or initialize the S3-compatible client for R2 operations. + + :return: boto3 S3 client or None if credentials not available + """ + if self._s3_client is not None: + return self._s3_client + + # Check if we have S3-compatible credentials + if not self._credentials.r2_access_key_id or not self._credentials.r2_secret_access_key: + self.logging.warning( + "R2 S3-compatible API credentials not configured. " + "Set CLOUDFLARE_R2_ACCESS_KEY_ID and " + "CLOUDFLARE_R2_SECRET_ACCESS_KEY environment variables." + ) + return None + + try: + import boto3 + from botocore.config import Config + + account_id = self._credentials.account_id + + self._s3_client = boto3.client( + "s3", + endpoint_url=f"https://{account_id}.r2.cloudflarestorage.com", + aws_access_key_id=self._credentials.r2_access_key_id, + aws_secret_access_key=self._credentials.r2_secret_access_key, + config=Config(signature_version="s3v4"), + region_name="auto", + ) + + return self._s3_client + + except ImportError: + self.logging.warning("boto3 not available. Install with: pip install boto3") + return None + + def correct_name(self, name: str) -> str: + """Return the bucket name unchanged; R2 does not require name transformations.""" + return name + + def _create_bucket( + self, name: str, buckets: Optional[List[str]] = None, randomize_name: bool = False + ) -> str: + """Create an R2 bucket, reusing an existing one if the name is already present.""" + for bucket_name in buckets or []: + if name in bucket_name: + self.logging.info( + "Bucket {} for {} already exists, skipping.".format(bucket_name, name) + ) + return bucket_name + + account_id = self._credentials.account_id + + create_bucket_uri = f"https://api.cloudflare.com/client/v4/accounts/{account_id}/r2/buckets" + + # R2 API only accepts "name" parameter - locationHint is optional and must be one of: + # "apac", "eeur", "enam", "weur", "wnam" + # WARNING: locationHint is not currently supported by SeBS. Buckets are created + # with Cloudflare's automatic location selection. + params = {"name": name} + + self.logging.warning( + f"Creating R2 bucket '{name}' without locationHint. " + "Geographic location is determined automatically by Cloudflare." + ) + + try: + create_bucket_response = requests.post( + create_bucket_uri, json=params, headers=self._get_auth_headers() + ) + + # Log the response for debugging + if create_bucket_response.status_code >= 400: + try: + error_data = create_bucket_response.json() + self.logging.error( + f"R2 bucket creation failed. Status: {create_bucket_response.status_code}, " + f"Response: {error_data}" + ) + except Exception: + self.logging.error( + f"R2 bucket creation failed. Status: {create_bucket_response.status_code}, " + f"Response: {create_bucket_response.text}" + ) + + create_bucket_response.raise_for_status() + + bucket_info_json = create_bucket_response.json() + + if not bucket_info_json.get("success"): + self.logging.error(f"Failed to create R2 bucket: {bucket_info_json.get('errors')}") + raise RuntimeError(f"Failed to create R2 bucket {name}") + + bucket_name = bucket_info_json.get("result", {}).get("name", name) + self.logging.info(f"Created R2 bucket {bucket_name}") + return bucket_name + + except requests.exceptions.RequestException as e: + self.logging.error(f"Error creating R2 bucket {name}: {e}") + raise + + def download(self, bucket_name: str, key: str, filepath: str) -> None: + """ + Download a file from a bucket. + + :param bucket_name: + :param key: storage source filepath + :param filepath: local destination filepath + """ + s3_client = self._get_s3_client() + if s3_client is None: + self.logging.warning(f"Cannot download {key} from R2 - S3 client not available") + return + + try: + dirname = os.path.dirname(filepath) + if dirname: + os.makedirs(dirname, exist_ok=True) + s3_client.download_file(bucket_name, key, filepath) + self.logging.debug(f"Downloaded {key} from R2 bucket {bucket_name} to {filepath}") + except Exception as e: + self.logging.warning(f"Failed to download {key} from R2: {e}") + + def upload(self, bucket_name: str, filepath: str, key: str): + """ + Upload a file to R2 bucket using the S3-compatible API. + + Requires S3 credentials to be configured for the R2 bucket. + + :param bucket_name: R2 bucket name + :param filepath: local source filepath + :param key: R2 destination key/path + """ + s3_client = self._get_s3_client() + if s3_client is None: + self.logging.warning(f"Cannot upload {filepath} to R2 - S3 client not available") + return + + try: + with open(filepath, "rb") as f: + s3_client.put_object(Bucket=bucket_name, Key=key, Body=f) + + self.logging.debug(f"Uploaded {filepath} to R2 bucket {bucket_name} as {key}") + + except Exception as e: + self.logging.warning(f"Failed to upload {filepath} to R2: {e}") + + def upload_bytes(self, bucket_name: str, key: str, data: bytes): + """ + Upload bytes directly to R2 bucket using the S3-compatible API. + + :param bucket_name: R2 bucket name + :param key: R2 destination key/path + :param data: bytes to upload + """ + s3_client = self._get_s3_client() + if s3_client is None: + self.logging.warning("Cannot upload bytes to R2 - S3 client not available") + return + + try: + s3_client.put_object(Bucket=bucket_name, Key=key, Body=data) + + self.logging.debug(f"Uploaded {len(data)} bytes to R2 bucket {bucket_name} as {key}") + + except Exception as e: + self.logging.warning(f"Failed to upload bytes to R2: {e}") + + def list_bucket(self, bucket_name: str, prefix: str = "") -> List[str]: + """ + Retrieves list of files in a bucket using S3-compatible API. + + :param bucket_name: + :param prefix: optional prefix filter + :return: list of files in a given bucket + """ + s3_client = self._get_s3_client() + if s3_client is None: + raise RuntimeError( + f"Cannot list R2 bucket {bucket_name} - S3 client not available. " + "Ensure CLOUDFLARE_R2_ACCESS_KEY_ID and CLOUDFLARE_R2_SECRET_ACCESS_KEY are set." + ) + + try: + paginator = s3_client.get_paginator("list_objects_v2") + page_iterator = paginator.paginate(Bucket=bucket_name, Prefix=prefix) + + files = [] + for page in page_iterator: + if "Contents" in page: + for obj in page["Contents"]: + files.append(obj["Key"]) + + return files + + except Exception as e: + raise RuntimeError(f"Failed to list R2 bucket {bucket_name}: {str(e)}") from e + + def list_buckets(self, bucket_name: Optional[str] = None) -> List[str]: + """ + List all R2 buckets in the account. + + :param bucket_name: optional filter (not used for R2) + :return: list of bucket names + """ + account_id = self._credentials.account_id + + list_buckets_uri = f"https://api.cloudflare.com/client/v4/accounts/{account_id}/r2/buckets" + + try: + response = requests.get(list_buckets_uri, headers=self._get_auth_headers()) + + if response.status_code == 403: + try: + error_data = response.json() + detail = f"Response: {error_data}. " + except ValueError: + detail = "" + raise RuntimeError( + f"403 Forbidden accessing R2 buckets. {detail}" + "Your API token may need 'R2 Read and Write' permissions." + ) + + response.raise_for_status() + + data = response.json() + + if not data.get("success"): + raise RuntimeError(f"Failed to list R2 buckets: {data.get('errors')}") + + buckets = data.get("result", {}).get("buckets", []) + bucket_names = [bucket["name"] for bucket in buckets] + + self.logging.info(f"Found {len(bucket_names)} R2 buckets") + return bucket_names + + except requests.exceptions.RequestException as e: + raise RuntimeError(f"Error listing R2 buckets: {e}") from e + + def exists_bucket(self, bucket_name: str) -> bool: + """ + Check if a bucket exists. + + :param bucket_name: + :return: True if bucket exists + """ + buckets = self.list_buckets() + return bucket_name in buckets + + def clean_bucket(self, bucket_name: str): + """ + Remove all objects from a bucket. + + :param bucket_name: + """ + self.logging.warning(f"clean_bucket not fully implemented for R2 bucket {bucket_name}") + pass + + def remove_bucket(self, bucket: str): + """ + Delete a bucket. + + :param bucket: + """ + account_id = self._credentials.account_id + + delete_bucket_uri = ( + f"https://api.cloudflare.com/client/v4/accounts/{account_id}/r2/buckets/{bucket}" + ) + + try: + response = requests.delete(delete_bucket_uri, headers=self._get_auth_headers()) + response.raise_for_status() + + data = response.json() + + if data.get("success"): + self.logging.info(f"Successfully deleted R2 bucket {bucket}") + else: + self.logging.error(f"Failed to delete R2 bucket {bucket}: {data.get('errors')}") + + except requests.exceptions.RequestException as e: + self.logging.error(f"Error deleting R2 bucket {bucket}: {e}") + + def uploader_func(self, bucket_idx: int, file: str, filepath: str) -> None: + """ + Upload a file to a bucket (used for parallel uploads). + + :param bucket_idx: index of the bucket/prefix to upload to + :param file: destination file name/key + :param filepath: source file path + """ + # Skip upload when using cached buckets and not updating storage + if self.cached and not self.replace_existing: + return + + # Build the key with the input prefix + key = os.path.join(self.input_prefixes[bucket_idx], file) + + bucket_name = self.get_bucket(Resources.StorageBucketType.BENCHMARKS) + + # Check if file already exists (if not replacing existing files) + if not self.replace_existing: + for f in self.input_prefixes_files[bucket_idx]: + if key == f: + self.logging.info( + f"Skipping upload of {filepath} to {bucket_name} (already exists)" + ) + return + + # Upload the file + self.upload(bucket_name, filepath, key) diff --git a/sebs/cloudflare/resources.py b/sebs/cloudflare/resources.py new file mode 100644 index 00000000..77e2988a --- /dev/null +++ b/sebs/cloudflare/resources.py @@ -0,0 +1,97 @@ +"""Cloudflare system resources manager.""" + +import docker +from typing import Optional, cast + +from sebs.cache import Cache +from sebs.cloudflare.config import CloudflareConfig, CloudflareCredentials +from sebs.cloudflare.r2 import R2 +from sebs.cloudflare.kvstore import KVStore +from sebs.faas.resources import SystemResources +from sebs.faas.storage import PersistentStorage +from sebs.faas.nosql import NoSQLStorage +from sebs.utils import LoggingHandlers + + +class CloudflareSystemResources(SystemResources): + """ + System resources for Cloudflare Workers. + + Cloudflare Workers have a different resource model compared to + traditional cloud platforms. This class handles Cloudflare-specific + resources like KV namespaces and R2 storage. + """ + + def __init__( + self, + config: CloudflareConfig, + cache_client: Cache, + docker_client: docker.client.DockerClient, + logging_handlers: LoggingHandlers, + ): + """Initialize Cloudflare system resources with config and logging handlers.""" + super().__init__(config, cache_client, docker_client) + self._config = config + self.logging_handlers = logging_handlers + + @property + def config(self) -> CloudflareConfig: + """Return the Cloudflare-specific platform configuration.""" + return cast(CloudflareConfig, self._config) + + def _get_auth_headers(self) -> dict[str, str]: + """Get authentication headers for Cloudflare API requests.""" + credentials = cast(CloudflareCredentials, self._config.credentials) + if credentials.api_token: + return { + "Authorization": f"Bearer {credentials.api_token}", + "Content-Type": "application/json", + } + elif credentials.email and credentials.api_key: + return { + "X-Auth-Email": credentials.email, + "X-Auth-Key": credentials.api_key, + "Content-Type": "application/json", + } + else: + raise RuntimeError("Invalid Cloudflare credentials configuration") + + def get_storage(self, replace_existing: Optional[bool] = None) -> PersistentStorage: + """ + Get Cloudflare R2 storage instance. + + R2 is Cloudflare's S3-compatible object storage service. + This method will create a client for managing benchmark input/output data. + + Args: + replace_existing: Whether to replace existing files in storage + + Returns: + R2 storage instance + """ + if replace_existing is None: + replace_existing = False + + return R2( + region=self._config.region, + cache_client=self._cache_client, + resources=self._config.resources, + replace_existing=replace_existing, + credentials=cast(CloudflareCredentials, self._config.credentials), + ) + + def get_nosql_storage(self) -> NoSQLStorage: + """ + Get Cloudflare KV storage instance. + + KV namespaces provide key-value storage for Workers. + + Returns: + KVStore storage instance + """ + return KVStore( + region=self._config.region, + cache_client=self._cache_client, + resources=self._config.resources, + credentials=cast(CloudflareCredentials, self._config.credentials), + ) diff --git a/sebs/cloudflare/templates/wrangler-container.toml b/sebs/cloudflare/templates/wrangler-container.toml new file mode 100644 index 00000000..d8e08fe3 --- /dev/null +++ b/sebs/cloudflare/templates/wrangler-container.toml @@ -0,0 +1,25 @@ +# Template for Cloudflare Container Workers +# This file is read and modified by the deployment system + +name = "PLACEHOLDER_WORKER_NAME" +main = "worker.js" +compatibility_date = "2025-11-18" +account_id = "PLACEHOLDER_ACCOUNT_ID" +compatibility_flags = ["nodejs_compat"] + +[observability] +enabled = true + +[[containers]] +max_instances = 10 +class_name = "ContainerWorker" +image = "./Dockerfile" + +# Durable Object binding for Container class (required by @cloudflare/containers) +[[durable_objects.bindings]] +name = "CONTAINER_WORKER" +class_name = "ContainerWorker" + +[[migrations]] +tag = "v1" +new_sqlite_classes = ["ContainerWorker"] diff --git a/sebs/cloudflare/templates/wrangler-worker.toml b/sebs/cloudflare/templates/wrangler-worker.toml new file mode 100644 index 00000000..fd36127a --- /dev/null +++ b/sebs/cloudflare/templates/wrangler-worker.toml @@ -0,0 +1,7 @@ +# Template for native Cloudflare Workers +# This file is read and modified by the deployment system + +name = "PLACEHOLDER_WORKER_NAME" +main = "PLACEHOLDER_MAIN_FILE" +compatibility_date = "2025-11-18" +account_id = "PLACEHOLDER_ACCOUNT_ID" diff --git a/sebs/cloudflare/triggers.py b/sebs/cloudflare/triggers.py new file mode 100644 index 00000000..5b7a208c --- /dev/null +++ b/sebs/cloudflare/triggers.py @@ -0,0 +1,236 @@ +"""HTTP trigger implementation for Cloudflare Workers.""" + +from typing import Optional +import concurrent.futures +import json +import time +from datetime import datetime +from io import BytesIO + +from sebs.faas.function import Trigger, ExecutionResult + + +class ContainerProvisioningError(RuntimeError): + """Raised when Cloudflare reports the container is still provisioning.""" + + pass + + +class HTTPTrigger(Trigger): + """ + HTTP trigger for Cloudflare Workers. + Workers are automatically accessible via HTTPS endpoints. + """ + + def __init__(self, worker_name: str, url: Optional[str] = None): + """Initialize the HTTP trigger with the worker name and optional URL.""" + super().__init__() + self.worker_name = worker_name + self._url = url + + @staticmethod + def typename() -> str: + """Return the canonical type name for this trigger class.""" + return "Cloudflare.HTTPTrigger" + + @staticmethod + def trigger_type() -> Trigger.TriggerType: + """Return the trigger type enum value.""" + return Trigger.TriggerType.HTTP + + @property + def url(self) -> str: + """HTTPS endpoint URL for invoking the worker.""" + assert self._url is not None, "HTTP trigger URL has not been set" + return self._url + + @url.setter + def url(self, url: str): + """Set the HTTPS endpoint URL for the worker.""" + self._url = url + + def _http_invoke(self, payload: dict, url: str, verify_ssl: bool = True) -> ExecutionResult: + """ + Invoke a Cloudflare Worker via HTTP POST. + + Overrides the base implementation to add a browser-like User-Agent header. + Cloudflare's bot-protection returns HTTP 1010 for requests that look like + automated tools (empty or libcurl User-Agent), so we must set one explicitly. + """ + import pycurl + + c = pycurl.Curl() + c.setopt( + pycurl.HTTPHEADER, + [ + "Content-Type: application/json", + # Cloudflare bot-protection (error 1010) blocks requests with no/tool UA. + "User-Agent: Mozilla/5.0 (compatible; SeBS/1.0; " + "+https://github.com/spcl/serverless-benchmarks)", + ], + ) + c.setopt(pycurl.POST, 1) + c.setopt(pycurl.URL, url) + if not verify_ssl: + c.setopt(pycurl.SSL_VERIFYHOST, 0) + c.setopt(pycurl.SSL_VERIFYPEER, 0) + data = BytesIO() + c.setopt(pycurl.WRITEFUNCTION, data.write) + + c.setopt(pycurl.POSTFIELDS, json.dumps(payload)) + begin = datetime.now() + c.perform() + end = datetime.now() + status_code = c.getinfo(pycurl.RESPONSE_CODE) + conn_time = c.getinfo(pycurl.PRETRANSFER_TIME) + receive_time = c.getinfo(pycurl.STARTTRANSFER_TIME) + c.close() + + try: + output = json.loads(data.getvalue()) + if "body" in output: + if isinstance(output["body"], dict): + output = output["body"] + else: + output = json.loads(output["body"]) + + if status_code == 502: + self.logging.info("Container returned 502 (still starting?), will retry...") + raise ContainerProvisioningError("502 gateway error from container worker") + + # Check for Cloudflare error code 1042 (CPU time limit / worker not ready) + # Output may be a plain string like "error code: 1042" rather than a dict. + output_str = str(output) + if "1042" in output_str and "error code" in output_str: + self.logging.info("Worker returned error 1042 (CPU time limit), will retry...") + raise ContainerProvisioningError(f"Error 1042 from worker: {output_str}") + + container_not_ready_phrases = ( + "The container is not running", + "Failed to start container", + ) + if any(p in output_str for p in container_not_ready_phrases): + self.logging.info("Container not yet running, will retry...") + raise ContainerProvisioningError(f"Container startup error: {output_str[:200]}") + + if status_code != 200: + self.logging.error(f"Invocation on URL {url} failed!") + self.logging.error(f"Output: {output}") + raise RuntimeError(f"Failed invocation of function! Output: {output}") + + self.logging.debug("Invoke of function was successful") + result = ExecutionResult.from_times(begin, end) + result.times.http_startup = conn_time + result.times.http_first_byte_return = receive_time + if "request_id" not in output: + raise RuntimeError(f"Cannot process allocation with output: {output}") + result.request_id = output["request_id"] + result.parse_benchmark_output(output) + return result + except json.decoder.JSONDecodeError: + raw = data.getvalue() + raw_text = raw.decode() if raw else "" + provisioning_phrases = ( + "no Container instance available", + "provisioning the Container", + "currently provisioning", + "The container is not running", + "Failed to start container", + ) + if "1042" in raw_text and "error code" in raw_text: + self.logging.info("Worker returned error 1042 (CPU time limit), will retry...") + raise ContainerProvisioningError(f"Error 1042 from worker: {raw_text[:200]}") + if status_code == 502 or any( + p.lower() in raw_text.lower() for p in provisioning_phrases + ): + self.logging.info(f"Container still provisioning (URL {url}): {raw_text[:120]}") + raise ContainerProvisioningError(f"Container not yet available: {raw_text[:200]}") + self.logging.error(f"Invocation on URL {url} failed!") + if raw_text: + self.logging.error(f"Output: {raw_text}") + else: + self.logging.error("No output provided!") + raise RuntimeError(f"Failed invocation of function! Output: {raw_text}") + + def sync_invoke(self, payload: dict) -> ExecutionResult: + """ + Synchronously invoke a Cloudflare Worker via HTTP. + + For container workers, the deployment path already waits until an instance + is running before returning, so provisioning retries here are a last-resort + safety net only (e.g. the instance was recycled between deployment and the + first invocation). + """ + self.logging.debug(f"Invoke function {self.url}") + max_provisioning_retries = 2 + provisioning_retry_wait = 15 # seconds between retries + for attempt in range(max_provisioning_retries + 1): + try: + result = self._http_invoke(payload, self.url) + break + except ContainerProvisioningError: + if attempt < max_provisioning_retries: + self.logging.info( + f"Container not yet ready, waiting {provisioning_retry_wait}s " + f"before retry (attempt {attempt + 1}/{max_provisioning_retries})..." + ) + time.sleep(provisioning_retry_wait) + else: + raise + + # Extract measurement data from the response if available + if result.output and "result" in result.output: # type: ignore[union-attr] + result_data = result.output["result"] + if isinstance(result_data, dict) and "measurement" in result_data: + measurement = result_data["measurement"] + + # Extract timing metrics if provided by the benchmark + if isinstance(measurement, dict): + # CPU time in microseconds + if "cpu_time_us" in measurement: + result.provider_times.execution = measurement["cpu_time_us"] + elif "cpu_time_ms" in measurement: + result.provider_times.execution = int(measurement["cpu_time_ms"] * 1000) + + # Wall time in microseconds + if "wall_time_us" in measurement: + result.times.benchmark = measurement["wall_time_us"] + elif "wall_time_ms" in measurement: + result.times.benchmark = int(measurement["wall_time_ms"] * 1000) + + # Cold/warm start detection + if "is_cold" in measurement: + result.stats.cold_start = measurement["is_cold"] + + # Memory usage if available + if "memory_used_mb" in measurement: + result.stats.memory_used = measurement["memory_used_mb"] + + # Store the full measurement for later analysis + result.output["measurement"] = measurement + + self.logging.debug(f"Extracted measurements: {measurement}") + + return result + + def async_invoke(self, payload: dict) -> concurrent.futures.Future: + """ + Asynchronously invoke a Cloudflare Worker via HTTP. + """ + pool = concurrent.futures.ThreadPoolExecutor() + fut = pool.submit(self.sync_invoke, payload) + return fut + + def serialize(self) -> dict: + """Return a serializable dict with the trigger type, worker name, and URL.""" + return { + "type": self.typename(), + "worker_name": self.worker_name, + "url": self._url, + } + + @staticmethod + def deserialize(obj: dict) -> "HTTPTrigger": + """Reconstruct an HTTPTrigger from a serialized dict.""" + trigger = HTTPTrigger(obj["worker_name"], obj.get("url")) + return trigger diff --git a/sebs/cloudflare/workers.py b/sebs/cloudflare/workers.py new file mode 100644 index 00000000..4add1ccc --- /dev/null +++ b/sebs/cloudflare/workers.py @@ -0,0 +1,312 @@ +""" +Cloudflare Workers native deployment implementation. + +Handles packaging, deployment, and management of native Cloudflare Workers +(non-container deployments using JavaScript/Python runtime). +""" + +import os +import re +import shutil +from importlib.resources import files + +try: + import tomllib # type: ignore[import-not-found] # Python 3.11+ +except ImportError: + import tomli as tomllib # type: ignore[no-redef, import-not-found] # Fallback for older Python +try: + import tomli_w +except ImportError: + import toml as tomli_w # type: ignore[no-redef, import-untyped] +from typing import Optional, Tuple + +from sebs.benchmark import Benchmark +from sebs.cloudflare.cli import CloudflareCLI +from sebs.cloudflare.pyodide_packages import get_canonical_pyodide_name + + +class CloudflareWorkersDeployment: + """Handles native Cloudflare Workers deployment operations.""" + + def __init__(self, logging, system_config, docker_client, system_resources): + """ + Initialize CloudflareWorkersDeployment. + + Args: + logging: Logger instance + system_config: System configuration + docker_client: Docker client instance + system_resources: System resources manager + """ + self.logging = logging + self.system_config = system_config + self.docker_client = docker_client + self.system_resources = system_resources + self._cli: Optional[CloudflareCLI] = None + + def _get_cli(self) -> CloudflareCLI: + """Get or initialize the Cloudflare CLI container.""" + if self._cli is None: + self._cli = CloudflareCLI.get_instance(self.system_config, self.docker_client) + # Verify wrangler is available + version = self._cli.check_wrangler_version() + self.logging.info(f"Cloudflare CLI container ready: {version}") + return self._cli + + def generate_wrangler_toml( + self, + worker_name: str, + package_dir: str, + language: str, + account_id: str, + benchmark_name: Optional[str] = None, + code_package: Optional[Benchmark] = None, + container_uri: Optional[str] = None, + language_variant: str = "cloudflare", + ) -> str: + """ + Generate a wrangler.toml configuration file for native workers. + + Args: + worker_name: Name of the worker + package_dir: Directory containing the worker code + language: Programming language (nodejs or python) + account_id: Cloudflare account ID + benchmark_name: Optional benchmark name for R2 file path prefix + code_package: Optional benchmark package for nosql configuration + + Returns: + Path to the generated wrangler.toml file + """ + # Load template + template_path = ( + files("sebs.cloudflare").joinpath("templates").joinpath("wrangler-worker.toml") + ) + with template_path.open("rb") as f: + config = tomllib.load(f) + + # Update basic configuration + config["name"] = worker_name + config["account_id"] = account_id + + # Add language- and variant-specific configuration. + # For Node.js workers, we always bundle through build.js into dist/, + # regardless of language variant (default/cloudflare), because the + # wrangler entrypoint points to dist/handler.js. + if language == "nodejs": + config["main"] = "dist/handler.js" + config["compatibility_flags"] = ["nodejs_compat"] + config["no_bundle"] = True + config["rules"] = [ + {"type": "ESModule", "globs": ["**/*.js"], "fallthrough": True}, + {"type": "Text", "globs": ["**/*.html"], "fallthrough": True}, + ] + elif language == "python": + config["main"] = "handler.py" + config["compatibility_flags"] = ["python_workers"] + else: + config["main"] = "dist/handler.js" if language == "nodejs" else "handler.py" + + # Add NoSQL KV namespace bindings if benchmark uses them + if code_package and code_package.uses_nosql: + benchmark_for_nosql = benchmark_name or code_package.benchmark + nosql_storage = self.system_resources.get_nosql_storage() + if nosql_storage.retrieve_cache(benchmark_for_nosql): + nosql_tables = nosql_storage.get_tables(benchmark_for_nosql) + if nosql_tables: + config["kv_namespaces"] = [] + for table_name, namespace_id in nosql_tables.items(): + config["kv_namespaces"].append( + { + "binding": table_name, + "id": namespace_id, + } + ) + + # Add environment variables + if benchmark_name or (code_package and code_package.uses_nosql): + config["vars"] = {} + if benchmark_name: + config["vars"]["BENCHMARK_NAME"] = benchmark_name + if code_package and code_package.uses_nosql: + config["vars"]["NOSQL_STORAGE_DATABASE"] = "kvstore" + + # Add R2 bucket binding + try: + from sebs.faas.config import Resources + + storage = self.system_resources.get_storage() + bucket_name = storage.get_bucket(Resources.StorageBucketType.BENCHMARKS) + if bucket_name: + config["r2_buckets"] = [{"binding": "R2", "bucket_name": bucket_name}] + self.logging.info(f"R2 bucket '{bucket_name}' will be bound to worker as 'R2'") + except Exception as e: + self.logging.warning( + f"R2 bucket binding not configured: {e}. " + f"Benchmarks requiring file access will not work properly." + ) + + # Write wrangler.toml to package directory + toml_path = os.path.join(package_dir, "wrangler.toml") + os.makedirs(package_dir, exist_ok=True) + try: + # Try tomli_w (writes binary) + with open(toml_path, "wb") as f: + tomli_w.dump(config, f) + except TypeError: + # Fallback to toml library (writes text) + with open(toml_path, "w") as f: + f.write(tomli_w.dumps(config)) + + self.logging.info(f"Generated wrangler.toml at {toml_path}") + return toml_path + + def package_code( + self, + directory: str, + language_name: str, + language_version: str, + benchmark: str, + is_cached: bool, + language_variant: str = "cloudflare", + ) -> Tuple[str, int, str]: + """ + Package code for native Cloudflare Workers deployment. + + Args: + directory: Path to the code directory + language_name: Programming language name + language_version: Programming language version + benchmark: Benchmark name + is_cached: Whether the code is cached + + Returns: + Tuple of (package_path, package_size, container_uri) + """ + # Install dependencies and bundle. + # Dependency installation (npm install / pip install) is handled by + # Benchmark.install_dependencies() via the canonical SeBS build-image + # pipeline (bind-mount + /sebs/installer.sh). package_code only needs + # to do the language-specific file preparation that happens before or + # after that step. + if language_name == "nodejs": + pass # install_dependencies handles npm install + esbuild bundle + + elif language_name == "python": + requirements_file = os.path.join(directory, "requirements.txt") + if os.path.exists(f"{requirements_file}.{language_version}"): + src = f"{requirements_file}.{language_version}" + dest = requirements_file + shutil.move(src, dest) + self.logging.info(f"move {src} to {dest}") + + if language_variant in ("cloudflare", "default"): + needed_pkg = [] + if os.path.exists(requirements_file): + with open(requirements_file, "r") as reqf: + reqtext = reqf.read() + unsupported = [] + seen = set() + for raw_line in reqtext.splitlines(): + line = raw_line.split("#", 1)[0].strip() + if not line: + continue + name = re.split(r"[<>=!~;\s\[]", line, maxsplit=1)[0].strip() + if not name: + continue + canonical = get_canonical_pyodide_name(name) + if canonical is None: + unsupported.append(name) + continue + if canonical not in seen: + needed_pkg.append(canonical) + seen.add(canonical) + if unsupported: + raise RuntimeError( + "The following packages from requirements.txt are not " + "supported by the Cloudflare Python Workers (Pyodide) " + f"runtime: {', '.join(unsupported)}. See " + "https://developers.cloudflare.com/workers/languages/python/packages/ " + "for the list of supported packages." + ) + + project_file = os.path.join(directory, "pyproject.toml") + pyproject_config = { + "project": { + "name": f"{benchmark.replace('.', '-')}-python-" + f"{language_version.replace('.', '')}", + "version": "0.1.0", + "description": "dummy description", + "requires-python": f">={language_version}", + "dependencies": needed_pkg, + }, + "dependency-groups": { + "dev": ["workers-py", "workers-runtime-sdk"], + }, + } + try: + with open(project_file, "wb") as pf: + tomli_w.dump(pyproject_config, pf) + except TypeError: + with open(project_file, "w") as pf: + pf.write(tomli_w.dumps(pyproject_config)) + # Pyodide Workers require all function files in a function/ subdir + funcdir = os.path.join(directory, "function") + if not os.path.exists(funcdir): + os.makedirs(funcdir) + + dont_move = ["handler.py", "function", "python_modules", "pyproject.toml"] + for thing in os.listdir(directory): + if thing not in dont_move: + src = os.path.join(directory, thing) + dest = os.path.join(directory, "function", thing) + shutil.move(src, dest) + + # Validation (pyproject.toml parse + pywrangler check) is + # performed by install_dependencies via cloudflare_python_installer.sh. + + # Create package structure + CONFIG_FILES = { + "nodejs": ["handler.js", "package.json", "node_modules"], + "python": ["handler.py", "requirements.txt", "python_modules"], + } + + if language_name not in CONFIG_FILES: + raise NotImplementedError( + f"Language {language_name} is not yet supported for Cloudflare Workers" + ) + + # Verify the handler exists + handler_file = "handler.js" if language_name == "nodejs" else "handler.py" + package_path = os.path.join(directory, handler_file) + + if not os.path.exists(package_path): + if not os.path.exists(directory): + raise RuntimeError( + f"Package directory {directory} does not exist. " + "The benchmark build process may have failed to create the deployment package." + ) + raise RuntimeError( + f"Handler file {handler_file} not found in {directory}. " + f"Available files: " + f"{', '.join(os.listdir(directory)) if os.path.exists(directory) else 'none'}" + ) + + # Calculate total size of the package directory + total_size = 0 + for dirpath, dirnames, filenames in os.walk(directory): + for filename in filenames: + filepath = os.path.join(dirpath, filename) + total_size += os.path.getsize(filepath) + + mbytes = total_size / 1024.0 / 1024.0 + self.logging.info( + f"Worker package size: {mbytes:.2f} MB (Python: missing vendored modules)" + ) + + return (directory, total_size, "") + + def shutdown(self): + """Drop the local CLI reference. The shared container is owned by CloudflareCLI; + call CloudflareCLI.shutdown_instance() once at process teardown.""" + self._cli = None diff --git a/sebs/config.py b/sebs/config.py index e23d460f..6014d9b1 100644 --- a/sebs/config.py +++ b/sebs/config.py @@ -203,6 +203,16 @@ def benchmark_base_images( architecture ] + def benchmark_container_images( + self, deployment_name: str, language_name: str, architecture: str + ) -> Dict[str, str]: + """Get container base images for container deployments.""" + return ( + self._system_config[deployment_name]["languages"][language_name] + .get("container_images", {}) + .get(architecture, {}) + ) + def version(self) -> str: """Get the SeBS framework version. diff --git a/sebs/experiments/config.py b/sebs/experiments/config.py index d88ee6ea..dd881f99 100644 --- a/sebs/experiments/config.py +++ b/sebs/experiments/config.py @@ -34,6 +34,8 @@ class SystemVariant: # GCP specific "function-gen1", "function-gen2", + # Cloudflare worker deployment + "workers", ] def __init__(self, value: str): diff --git a/sebs/faas/config.py b/sebs/faas/config.py index 8e4e6784..7da30433 100644 --- a/sebs/faas/config.py +++ b/sebs/faas/config.py @@ -431,6 +431,15 @@ def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> "Confi from sebs.openwhisk.config import OpenWhiskConfig implementations["openwhisk"] = OpenWhiskConfig.deserialize + + # Cloudflare is available by default (like local) + try: + from sebs.cloudflare.config import CloudflareConfig + + implementations["cloudflare"] = CloudflareConfig.deserialize + except ImportError: + pass + func = implementations.get(name) assert func, "Unknown config type!" return func(config[name] if name in config else config, cache, handlers) diff --git a/sebs/faas/function.py b/sebs/faas/function.py index 0d62642c..4e1d3c8d 100644 --- a/sebs/faas/function.py +++ b/sebs/faas/function.py @@ -547,6 +547,7 @@ class Python(Enum): DEFAULT = "default" PYPY = "pypy" + CLOUDFLARE = "cloudflare" class NodeJS(Enum): """Node.js runtime variants.""" @@ -554,6 +555,7 @@ class NodeJS(Enum): DEFAULT = "default" BUN = "bun" LLRT = "llrt" + CLOUDFLARE = "cloudflare" class Java(Enum): """Java runtime variants. diff --git a/sebs/regression.py b/sebs/regression.py index 58920c79..f6f414c6 100644 --- a/sebs/regression.py +++ b/sebs/regression.py @@ -84,8 +84,13 @@ architectures_openwhisk = ["x64"] deployments_openwhisk = ["container"] +# Cloudflare-specific configurations +architectures_cloudflare = ["x64"] + # User-defined config passed during initialization, set in regression_suite() cloud_config: Optional[dict] = None +# Input size for benchmark test data ("test" | "small" | "large"), set in regression_suite() +benchmark_input_size: str = "test" RESOURCE_PREFIX = "regr" LOGGING_REDACTED = False @@ -242,7 +247,7 @@ def test(self): # Prepare input data for the benchmark input_config = benchmark.prepare_input( deployment_client.system_resources, - size="test", + size=benchmark_input_size, replace_existing=experiment_config.update_storage, ) @@ -1188,6 +1193,130 @@ def get_deployment(self, benchmark_name, architecture, deployment_type): return deployment_client +class CloudflareTestSequencePythonWorkers( + unittest.TestCase, + metaclass=TestSequenceMeta, + benchmarks=benchmarks_python, + architectures=architectures_cloudflare, + deployments=["workers"], + deployment_name="cloudflare", + triggers=[Trigger.TriggerType.HTTP], +): + """Test suite for Python benchmarks on Cloudflare Workers.""" + + def get_deployment(self, benchmark_name, architecture, deployment_type): + """Return an initialized Cloudflare deployment client for Python workers.""" + deployment_name = "cloudflare" + assert cloud_config, "Cloud configuration is required" + + config_copy = copy.deepcopy(cloud_config) + config_copy["experiments"]["architecture"] = architecture + config_copy["experiments"]["container_deployment"] = False + + f = f"regression_{deployment_name}_{benchmark_name}_{architecture}_{deployment_type}.log" + deployment_client = self.client.get_deployment( + config_copy, + logging_filename=os.path.join(self.client.output_dir, f), + ) + + with CloudflareTestSequencePythonWorkers.lock: + deployment_client.initialize(resource_prefix="regr") + return deployment_client + + +class CloudflareTestSequencePythonContainers( + unittest.TestCase, + metaclass=TestSequenceMeta, + benchmarks=benchmarks_python, + architectures=architectures_cloudflare, + deployments=["container"], + deployment_name="cloudflare", + triggers=[Trigger.TriggerType.HTTP], +): + """Test suite for Python benchmarks on Cloudflare Containers.""" + + def get_deployment(self, benchmark_name, architecture, deployment_type): + """Return an initialized Cloudflare deployment client for Python containers.""" + deployment_name = "cloudflare" + assert cloud_config, "Cloud configuration is required" + + config_copy = copy.deepcopy(cloud_config) + config_copy["experiments"]["architecture"] = architecture + config_copy["experiments"]["container_deployment"] = True + + f = f"regression_{deployment_name}_{benchmark_name}_{architecture}_{deployment_type}.log" + deployment_client = self.client.get_deployment( + config_copy, + logging_filename=os.path.join(self.client.output_dir, f), + ) + + with CloudflareTestSequencePythonContainers.lock: + deployment_client.initialize(resource_prefix="regr") + return deployment_client + + +class CloudflareTestSequenceNodejsWorkers( + unittest.TestCase, + metaclass=TestSequenceMeta, + benchmarks=benchmarks_nodejs, + architectures=architectures_cloudflare, + deployments=["workers"], + deployment_name="cloudflare", + triggers=[Trigger.TriggerType.HTTP], +): + """Test suite for Node.js benchmarks on Cloudflare Workers.""" + + def get_deployment(self, benchmark_name, architecture, deployment_type): + """Return an initialized Cloudflare deployment client for Node.js workers.""" + deployment_name = "cloudflare" + assert cloud_config, "Cloud configuration is required" + + config_copy = copy.deepcopy(cloud_config) + config_copy["experiments"]["architecture"] = architecture + config_copy["experiments"]["container_deployment"] = False + + f = f"regression_{deployment_name}_{benchmark_name}_{architecture}_{deployment_type}.log" + deployment_client = self.client.get_deployment( + config_copy, + logging_filename=os.path.join(self.client.output_dir, f), + ) + + with CloudflareTestSequenceNodejsWorkers.lock: + deployment_client.initialize(resource_prefix="regr") + return deployment_client + + +class CloudflareTestSequenceNodejsContainers( + unittest.TestCase, + metaclass=TestSequenceMeta, + benchmarks=benchmarks_nodejs, + architectures=architectures_cloudflare, + deployments=["container"], + deployment_name="cloudflare", + triggers=[Trigger.TriggerType.HTTP], +): + """Test suite for Node.js benchmarks on Cloudflare Containers.""" + + def get_deployment(self, benchmark_name, architecture, deployment_type): + """Return an initialized Cloudflare deployment client for Node.js containers.""" + deployment_name = "cloudflare" + assert cloud_config, "Cloud configuration is required" + + config_copy = copy.deepcopy(cloud_config) + config_copy["experiments"]["architecture"] = architecture + config_copy["experiments"]["container_deployment"] = True + + f = f"regression_{deployment_name}_{benchmark_name}_{architecture}_{deployment_type}.log" + deployment_client = self.client.get_deployment( + config_copy, + logging_filename=os.path.join(self.client.output_dir, f), + ) + + with CloudflareTestSequenceNodejsContainers.lock: + deployment_client.initialize(resource_prefix="regr") + return deployment_client + + # Stream result handler for concurrent test execution # Based on https://stackoverflow.com/questions/22484805/ # a-simple-working-example-for-testtools-concurrentstreamtestsuite @@ -1306,6 +1435,21 @@ def filter_out_benchmarks( and language_version in ["3.8", "3.9", "3.10", "3.11", "3.12"] and deployment_type == "function-gen1"): return "411.image-recognition" not in benchmark + + # Cloudflare: only certain benchmarks are supported per language/deployment-type. + # None means all benchmarks are supported for that combination. + if deployment_name == "cloudflare": + from sebs.cloudflare.cloudflare import Cloudflare + is_container = deployment_type == "container" + allowed = Cloudflare.SUPPORTED_BENCHMARKS.get((language, is_container)) + if allowed is not None: + # benchmark is the test method name, e.g. "test_cloudflare_120.uploader_x64_workers" + # Extract the numeric benchmark prefix (e.g. "120") from before the first "." + if "." in benchmark: + benchmark_id = benchmark.split(".")[-2].split("_")[-1] + else: + benchmark_id = benchmark.split("_")[-1] + return benchmark_id in allowed # fmt: on # All other benchmarks are supported @@ -1319,6 +1463,8 @@ def regression_suite( deployment_config: dict, resource_prefix: str | None = None, benchmark_name: Optional[str] = None, + deployment_type: Optional[str] = None, + input_size: str = "test", selected_architecture: str | None = None, filter_output: bool = False, ): @@ -1354,9 +1500,10 @@ def regression_suite( # Create the test suite suite = unittest.TestSuite() - # Make cloud_config available to test classes - global cloud_config + # Make cloud_config and input size available to test classes + global cloud_config, benchmark_input_size cloud_config = deployment_config + benchmark_input_size = input_size # Extract runtime configuration language = experiment_config["runtime"]["language"] @@ -1417,6 +1564,38 @@ def regression_suite( unittest.defaultTestLoader.loadTestsFromTestCase(OpenWhiskTestSequenceJava) ) + # Add Cloudflare tests if requested + if "cloudflare" in providers: + assert ( + "cloudflare" in cloud_config["deployment"] + ), "Cloudflare provider requested but not in deployment config" + if language == "python": + if deployment_type != "containers": + suite.addTest( + unittest.defaultTestLoader.loadTestsFromTestCase( + CloudflareTestSequencePythonWorkers + ) + ) + if deployment_type != "functions": + suite.addTest( + unittest.defaultTestLoader.loadTestsFromTestCase( + CloudflareTestSequencePythonContainers + ) + ) + elif language == "nodejs": + if deployment_type != "containers": + suite.addTest( + unittest.defaultTestLoader.loadTestsFromTestCase( + CloudflareTestSequenceNodejsWorkers + ) + ) + if deployment_type != "functions": + suite.addTest( + unittest.defaultTestLoader.loadTestsFromTestCase( + CloudflareTestSequenceNodejsContainers + ) + ) + # Prepare the list of tests to run tests: List[unittest.TestCase] = [] # mypy is confused here about the type diff --git a/sebs/sebs.py b/sebs/sebs.py index d99aac4c..6beb042d 100644 --- a/sebs/sebs.py +++ b/sebs/sebs.py @@ -214,6 +214,10 @@ def get_deployment( from sebs.openwhisk import OpenWhisk implementations["openwhisk"] = OpenWhisk + if has_platform("cloudflare"): + from sebs.cloudflare import Cloudflare + + implementations["cloudflare"] = Cloudflare # Validate deployment platform if name not in implementations: diff --git a/sebs/utils.py b/sebs/utils.py index 4ebdcb82..043fd2c3 100644 --- a/sebs/utils.py +++ b/sebs/utils.py @@ -689,7 +689,7 @@ def has_platform(name: str) -> bool: import google.cloud.devtools # noqa: F401 return True - elif name in ("local", "openwhisk"): + elif name in ("local", "openwhisk", "cloudflare"): # these don't have specific dependencies return True else: