From e8f780af413964ff92ab4fdb64aa381836b16b98 Mon Sep 17 00:00:00 2001 From: Mohammad Bagher Abiyat <37929992+Aslemammad@users.noreply.github.com> Date: Sun, 5 Apr 2026 07:55:42 +0330 Subject: [PATCH 1/2] feat: add registry proxy and sumdb prototype --- REGISTRY_PROXY_PLAN.md | 175 +++++++ apps/demo/package.json | 12 + apps/demo/src/generate-keys.ts | 14 + apps/demo/src/integration.test.ts | 398 +++++++++++++++ apps/demo/src/run-demo.ts | 70 +++ apps/demo/src/verify-lib.ts | 116 +++++ apps/demo/src/verify.ts | 37 ++ apps/demo/tsconfig.json | 14 + apps/registry-core/package.json | 9 + apps/registry-core/src/crypto.ts | 102 ++++ apps/registry-core/src/index.ts | 4 + apps/registry-core/src/merkle.test.ts | 89 ++++ apps/registry-core/src/merkle.ts | 148 ++++++ apps/registry-core/src/protocol.ts | 184 +++++++ apps/registry-core/src/registry-discovery.ts | 60 +++ apps/registry-core/tsconfig.json | 14 + apps/registry-proxy/package.json | 9 + apps/registry-proxy/src/cli.ts | 41 ++ apps/registry-proxy/src/server.ts | 492 +++++++++++++++++++ apps/registry-proxy/tsconfig.json | 14 + apps/sumdb/package.json | 9 + apps/sumdb/src/cli.ts | 30 ++ apps/sumdb/src/server.ts | 167 +++++++ apps/sumdb/src/store.ts | 251 ++++++++++ apps/sumdb/tsconfig.json | 14 + config/registries.ts | 18 + package.json | 7 + pnpm-lock.yaml | 8 + pnpm-workspace.yaml | 1 + 29 files changed, 2507 insertions(+) create mode 100644 REGISTRY_PROXY_PLAN.md create mode 100644 apps/demo/package.json create mode 100644 apps/demo/src/generate-keys.ts create mode 100644 apps/demo/src/integration.test.ts create mode 100644 apps/demo/src/run-demo.ts create mode 100644 apps/demo/src/verify-lib.ts create mode 100644 apps/demo/src/verify.ts create mode 100644 apps/demo/tsconfig.json create mode 100644 apps/registry-core/package.json create mode 100644 apps/registry-core/src/crypto.ts create mode 100644 apps/registry-core/src/index.ts create mode 100644 apps/registry-core/src/merkle.test.ts create mode 100644 apps/registry-core/src/merkle.ts create mode 100644 apps/registry-core/src/protocol.ts create mode 100644 apps/registry-core/src/registry-discovery.ts create mode 100644 apps/registry-core/tsconfig.json create mode 100644 apps/registry-proxy/package.json create mode 100644 apps/registry-proxy/src/cli.ts create mode 100644 apps/registry-proxy/src/server.ts create mode 100644 apps/registry-proxy/tsconfig.json create mode 100644 apps/sumdb/package.json create mode 100644 apps/sumdb/src/cli.ts create mode 100644 apps/sumdb/src/server.ts create mode 100644 apps/sumdb/src/store.ts create mode 100644 apps/sumdb/tsconfig.json create mode 100644 config/registries.ts diff --git a/REGISTRY_PROXY_PLAN.md b/REGISTRY_PROXY_PLAN.md new file mode 100644 index 0000000000..cc10ae0c82 --- /dev/null +++ b/REGISTRY_PROXY_PLAN.md @@ -0,0 +1,175 @@ +# Registry Proxy + SumDB Status + +## Summary + +This file now reflects the current implemented state. + +What exists today: + +- `registry.npmx.dev` is an npm-compatible proxy surface +- `sum.npmx.dev` is a minimal transparency log for immutable tarball assertions +- sumdb logs only tarball records +- every logged record includes: + - `keyId` + - `name` + - `version` + - `type` + - `digest` + - `size` + - `url` + - `integrity` + - `signature` +- `keyId` is the actual package-signing key from npm-style `dist.signatures` +- the proxy verifies the upstream package signature before ingest +- sumdb verifies the same logged signature again using trusted public keys loaded at startup +- sumdb does not know registry base URLs, labels, or proxy-specific routes +- the proxy no longer exposes custom routes like `/registries` +- package-pattern routing has been removed; the proxy fetches from the first configured source registry + +## Implemented Behavior + +### 1. Registry catalog and fetch source + +The checked-in registry catalog in [config/registries.ts](/Users/mohammadbagherabiyat/oss/npmx.dev/config/registries.ts) is now just an ordered list of source registries: + +- `label` +- `registryBaseUrl` + +Current behavior: + +- source-registry keys are fetched from each registry’s `/-/npm/v1/keys` +- the full catalog is used to collect trusted public keys for startup/bootstrap +- the proxy uses the first configured registry as its fetch source +- there are no per-package or per-scope routing rules anymore + +### 2. Proxy behavior + +Implemented in [apps/registry-proxy/src/server.ts](/Users/mohammadbagherabiyat/oss/npmx.dev/apps/registry-proxy/src/server.ts): + +- supports npm-compatible packument routes +- supports npm-compatible tarball passthrough routes +- exposes `GET /-/npm/v1/keys` +- preserves source-registry tarball URLs in packuments so lockfiles do not point at the proxy +- caches packuments on disk +- verifies upstream `dist.signatures` before ingesting a tarball into sumdb +- sends the minimal tarball record directly to sumdb + +Not present anymore: + +- `/registries` +- proxy-side registry registration behavior +- package-pattern routing logic +- witness/envelope signing between proxy and sumdb + +### 3. SumDB behavior + +Implemented in [apps/sumdb/src/server.ts](/Users/mohammadbagherabiyat/oss/npmx.dev/apps/sumdb/src/server.ts) and [apps/sumdb/src/store.ts](/Users/mohammadbagherabiyat/oss/npmx.dev/apps/sumdb/src/store.ts): + +- accepts only tarball ingest records +- requires `integrity` +- verifies the logged `signature` against the trusted responsible public key for `keyId` +- rejects unknown key IDs +- stores only the minimal logged record in the Merkle tree +- serves lookup, checkpoint, inclusion proof, consistency proof, and tile endpoints +- signs checkpoints with its own npm-style P-256 keypair + +Startup model: + +- trusted responsible public keys are fetched outside the sumdb core +- the sumdb CLI bootstraps them from the checked-in registry catalog before creating the server +- the server/store itself remains registry-URL-agnostic + +### 4. Logged record and canonical leaf + +Implemented in [apps/registry-core/src/protocol.ts](/Users/mohammadbagherabiyat/oss/npmx.dev/apps/registry-core/src/protocol.ts): + +Logged record fields: + +- `keyId` +- `name` +- `version` +- `type` +- `digest` +- `size` +- `url` +- `integrity` +- `signature` + +The Merkle leaf is built from exactly those fields and nothing else. + +Why packuments are not logged: + +- they are mutable registry metadata, not immutable versioned artifacts +- they do not have one stable package-version integrity value like tarballs do +- dropping them keeps sumdb closer to a Go-sumdb-style artifact log + +## Public Interfaces + +### Ingest API + +`POST /ingest` currently accepts: + +- `keyId` +- `name` +- `version` +- `type` +- `digest` +- `size` +- `url` +- `integrity` +- `signature` + +Validation: + +- `type` must be `tarball` +- `integrity` must be present +- `signature` must verify for `${name}@${version}:${integrity}` + +### Proxy API surface + +Currently exposed: + +- `GET /` +- `GET /@scope/` +- tarball passthrough routes +- `GET /-/npm/v1/keys` + +Currently not exposed: + +- `/registries` +- proxy-specific registry inspection endpoints + +### Sumdb API surface + +Currently exposed: + +- `GET /lookup/:keyId/:packageName/:version` +- `GET /latest-checkpoint` +- `GET /checkpoint/:treeSize` +- `GET /proof/inclusion/:leafIndex` +- `GET /proof/consistency/:from/:to` +- `GET /tile/...` +- `POST /ingest` + +## Verification And Tests + +Verified today: + +- minimal-leaf and Merkle tests pass +- end-to-end proxy + sumdb install flow passes with real installs +- lockfile tarball URLs point to the source registry, not the proxy +- proxy no longer exposes `/registries` +- logged records include the upstream package signature +- sumdb verifies the logged signature using startup-loaded trusted public keys + +Main test entrypoints: + +- `node --test --experimental-strip-types apps/registry-core/src/*.test.ts` +- `node --test --experimental-strip-types apps/demo/src/integration.test.ts` + +## Assumptions + +- trust is per published signing key, not per registry hostname +- the registry catalog is the source of truth for source-registry order and external key fetching +- the first configured registry is the active fetch source for the proxy +- duplicate tarball ingests are deduplicated by canonical leaf diff --git a/apps/demo/package.json b/apps/demo/package.json new file mode 100644 index 0000000000..e7b13834c7 --- /dev/null +++ b/apps/demo/package.json @@ -0,0 +1,12 @@ +{ + "name": "@npmx/registry-demo", + "version": "0.0.0", + "private": true, + "type": "module", + "scripts": { + "keys": "node --experimental-strip-types src/generate-keys.ts", + "run": "node --experimental-strip-types src/run-demo.ts", + "test": "node --test --experimental-strip-types src/integration.test.ts", + "verify": "node --experimental-strip-types src/verify.ts" + } +} diff --git a/apps/demo/src/generate-keys.ts b/apps/demo/src/generate-keys.ts new file mode 100644 index 0000000000..237e32d859 --- /dev/null +++ b/apps/demo/src/generate-keys.ts @@ -0,0 +1,14 @@ +import { generateRegistryKeyPair } from '../../registry-core/src/index.ts' + +const sumDb = generateRegistryKeyPair() +const registry = generateRegistryKeyPair() + +console.log('# SumDB') +console.log(`SUMDB_PUBLIC_KEY<<'EOF'\n${sumDb.publicKeyPem}EOF`) +console.log(`SUMDB_PRIVATE_KEY<<'EOF'\n${sumDb.privateKeyPem}EOF`) +console.log(`SUMDB_KEY_ID=${sumDb.keyId}`) +console.log('') +console.log('# Registry proxy') +console.log(`REGISTRY_PUBLIC_KEY<<'EOF'\n${registry.publicKeyPem}EOF`) +console.log(`REGISTRY_PRIVATE_KEY<<'EOF'\n${registry.privateKeyPem}EOF`) +console.log(`REGISTRY_KEY_ID=${registry.keyId}`) diff --git a/apps/demo/src/integration.test.ts b/apps/demo/src/integration.test.ts new file mode 100644 index 0000000000..0e48479e32 --- /dev/null +++ b/apps/demo/src/integration.test.ts @@ -0,0 +1,398 @@ +import assert from 'node:assert/strict' +import { execFile } from 'node:child_process' +import { once } from 'node:events' +import { mkdir, mkdtemp, readFile, rm } from 'node:fs/promises' +import net from 'node:net' +import { tmpdir } from 'node:os' +import path from 'node:path' +import test from 'node:test' +import { promisify } from 'node:util' +import { registryCatalog } from '../../../config/registries.ts' +import { + collectPublishedKeys, + createPackageSignatureText, + fetchRegistryKeys, + generateRegistryKeyPair, + hydrateSourceRegistries, + npmKeyToPublicKeyPem, + resolveSourceRegistry, +} from '../../registry-core/src/index.ts' +import { createRegistryProxyServer } from '../../registry-proxy/src/server.ts' +import { createSumDbServer } from '../../sumdb/src/server.ts' +import { verifyPackageFromSumDb } from './verify-lib.ts' +import crypto from 'node:crypto' + +const execFileAsync = promisify(execFile) + +function logStep(message: string) { + console.log(`[e2e] ${message}`) +} + +async function getFreePort() { + const server = net.createServer() + server.listen(0, '127.0.0.1') + await once(server, 'listening') + const address = server.address() + server.close() + await once(server, 'close') + + if (!address || typeof address === 'string') { + throw new Error('Unable to determine a free port') + } + + return address.port +} + +async function startServer(server: net.Server, port: number) { + server.listen(port, '127.0.0.1') + await once(server, 'listening') +} + +async function stopServer(server: net.Server) { + if ('closeIdleConnections' in server && typeof server.closeIdleConnections === 'function') { + server.closeIdleConnections() + } + if ('closeAllConnections' in server && typeof server.closeAllConnections === 'function') { + server.closeAllConnections() + } + server.close() + await once(server, 'close') +} + +async function npmInstall(input: { + directory: string + registryBaseUrl: string + packageName: string +}) { + await execFileAsync('npm', ['init', '-y'], { + cwd: input.directory, + }) + + await execFileAsync( + 'npm', + [ + 'install', + `--registry=${input.registryBaseUrl}`, + '--ignore-scripts', + '--no-audit', + '--no-fund', + input.packageName, + ], + { + cwd: input.directory, + env: { + ...process.env, + npm_config_cache: path.join(input.directory, '.npm-cache'), + }, + }, + ) +} + +async function fetchResolvedTarballPath(projectDir: string, packagePath: string) { + const lock = JSON.parse(await readFile(path.join(projectDir, 'package-lock.json'), 'utf8')) as { + packages: Record + } + const entry = lock.packages[packagePath] + if (!entry) { + throw new Error(`Missing package lock entry for ${packagePath}`) + } + + const targetPath = path.join(projectDir, `${packagePath.replaceAll('/', '__')}-${entry.version}.tgz`) + await execFileAsync('curl', ['-sSLo', targetPath, entry.resolved]) + return { + resolved: entry.resolved, + version: entry.version, + tarballPath: targetPath, + } +} + +async function fetchLookup(sumDbBaseUrl: string, keyId: string, packageName: string, version: string) { + const response = await fetch( + `${sumDbBaseUrl}/lookup/${encodeURIComponent(keyId)}/${packageName + .split('/') + .map(segment => encodeURIComponent(segment)) + .join('/')}/${encodeURIComponent(version)}`, + ) + return (await response.json()) as { + records: Array<{ + type: string + digest: string + integrity: string + signature: string + keyId: string + leafIndex: number + }> + } +} + +async function fetchPackumentFromRegistry(registryBaseUrl: string, packageName: string) { + const encodedPackageName = packageName.startsWith('@') + ? packageName.replace('/', '%2f') + : packageName + const response = await fetch(`${registryBaseUrl}/${encodedPackageName}`) + if (!response.ok) { + throw new Error(`Unable to fetch packument for ${packageName} from ${registryBaseUrl}`) + } + return (await response.json()) as Record +} + +function getVerifiedSigningKeyId(input: { + packageName: string + version: string + packument: Record + candidateKeys: NpmKey[] +}) { + const versions = input.packument.versions as Record | undefined + const versionMetadata = versions?.[input.version] as Record | undefined + const dist = versionMetadata?.dist as Record | undefined + const integrity = dist?.integrity + const signatures = Array.isArray(dist?.signatures) ? (dist?.signatures as Array>) : [] + + if (typeof integrity !== 'string' || !integrity || signatures.length === 0) { + throw new Error(`Missing integrity or signatures for ${input.packageName}@${input.version}`) + } + + for (const signature of signatures) { + const keyId = typeof signature.keyid === 'string' ? signature.keyid : undefined + const sig = typeof signature.sig === 'string' ? signature.sig : undefined + if (!keyId || !sig) { + continue + } + + const matchingKey = input.candidateKeys.find(key => key.keyid === keyId) + if (!matchingKey) { + continue + } + + const verified = crypto.verify( + 'sha256', + Buffer.from(createPackageSignatureText(input.packageName, input.version, integrity)), + npmKeyToPublicKeyPem(matchingKey), + Buffer.from(sig, 'base64'), + ) + if (verified) { + return { + keyId, + integrity, + } + } + } + + throw new Error(`No verified signing key found for ${input.packageName}@${input.version}`) +} + +async function findRecordedKeyId(input: { + sumDbBaseUrl: string + keyIds: string[] + packageName: string + version: string +}) { + for (const keyId of input.keyIds) { + const lookup = await fetchLookup(input.sumDbBaseUrl, keyId, input.packageName, input.version) + if (lookup.records.length > 0) { + return { + keyId, + lookup, + } + } + } + + throw new Error(`No sumdb record found for ${input.packageName}@${input.version}`) +} + +test( + 'proxy + sumdb E2E install flow verifies minimal tarball records with logged upstream signatures', + { timeout: 120_000 }, + async () => { + logStep('allocating temp workspace and ports') + const tempRoot = await mkdtemp(path.join(tmpdir(), 'npmx-registry-e2e-')) + const sumDbPort = await getFreePort() + const proxyPort = await getFreePort() + const sumDbBaseUrl = `http://127.0.0.1:${sumDbPort}` + const proxyBaseUrl = `http://127.0.0.1:${proxyPort}` + const sumDbKeys = generateRegistryKeyPair() + const proxyKeys = generateRegistryKeyPair() + + logStep('hydrating source registries and collecting trusted package-signing keys') + const hydratedRegistries = await hydrateSourceRegistries(registryCatalog) + const trustedResponsibleKeys = collectPublishedKeys(hydratedRegistries) + + const sumDb = await createSumDbServer({ + port: sumDbPort, + dataDir: path.join(tempRoot, 'sumdb'), + sumDbPrivateKey: sumDbKeys.privateKeyPem, + sumDbPublicKey: sumDbKeys.publicKeyPem, + allowedRegistryKeys: null, + trustedResponsibleKeys, + }) + + logStep(`starting sumdb on ${sumDbBaseUrl}`) + await startServer(sumDb.server, sumDbPort) + + logStep('creating proxy with registry catalog and runtime-fetched npm keys') + const proxy = await createRegistryProxyServer({ + port: proxyPort, + baseUrl: proxyBaseUrl, + sourceRegistries: registryCatalog, + cacheDir: path.join(tempRoot, 'proxy-cache'), + sumDbBaseUrl, + registryPrivateKey: proxyKeys.privateKeyPem, + registryPublicKey: proxyKeys.publicKeyPem, + }) + + logStep(`starting proxy on ${proxyBaseUrl}`) + await startServer(proxy.server, proxyPort) + + try { + const firstProject = path.join(tempRoot, 'is-number-project') + const secondProject = path.join(tempRoot, 'scoped-project') + + await mkdir(firstProject, { recursive: true }) + await mkdir(secondProject, { recursive: true }) + + logStep('installing is-number through the proxy') + await npmInstall({ + directory: firstProject, + registryBaseUrl: proxyBaseUrl, + packageName: 'is-number', + }) + logStep('installing @jridgewell/resolve-uri through the proxy') + await npmInstall({ + directory: secondProject, + registryBaseUrl: proxyBaseUrl, + packageName: '@jridgewell/resolve-uri', + }) + + logStep('reading resolved tarball URLs from package-lock.json') + const firstTarball = await fetchResolvedTarballPath(firstProject, 'node_modules/is-number') + const secondTarball = await fetchResolvedTarballPath( + secondProject, + 'node_modules/@jridgewell/resolve-uri', + ) + logStep(`resolved is-number tarball: ${firstTarball.resolved}`) + logStep(`resolved @jridgewell/resolve-uri tarball: ${secondTarball.resolved}`) + + assert.ok(firstTarball.resolved.startsWith('https://registry.npmjs.org/')) + assert.ok(secondTarball.resolved.startsWith('https://registry.npmjs.org/')) + assert.equal(firstTarball.resolved.startsWith(proxyBaseUrl), false) + assert.equal(secondTarball.resolved.startsWith(proxyBaseUrl), false) + + logStep('fetching source-registry keys and verifying the actual package-signing key IDs') + const yarnKeys = await fetchRegistryKeys('https://registry.yarnpkg.com') + const npmKeys = await fetchRegistryKeys('https://registry.npmjs.org') + const firstPackument = await fetchPackumentFromRegistry('https://registry.yarnpkg.com', 'is-number') + const secondPackument = await fetchPackumentFromRegistry('https://registry.npmjs.org', '@jridgewell/resolve-uri') + const firstSigning = getVerifiedSigningKeyId({ + packageName: 'is-number', + version: firstTarball.version, + packument: firstPackument, + candidateKeys: yarnKeys, + }) + const secondSigning = getVerifiedSigningKeyId({ + packageName: '@jridgewell/resolve-uri', + version: secondTarball.version, + packument: secondPackument, + candidateKeys: npmKeys, + }) + logStep(`is-number signed by ${firstSigning.keyId}`) + logStep(`@jridgewell/resolve-uri signed by ${secondSigning.keyId}`) + + logStep('verifying tarballs against sumdb checkpoints and proofs') + const firstVerification = await verifyPackageFromSumDb({ + sumDbBaseUrl, + registryKeyId: firstSigning.keyId, + packageName: 'is-number', + version: firstTarball.version, + tarballPath: firstTarball.tarballPath, + }) + const secondVerification = await verifyPackageFromSumDb({ + sumDbBaseUrl, + registryKeyId: secondSigning.keyId, + packageName: '@jridgewell/resolve-uri', + version: secondTarball.version, + tarballPath: secondTarball.tarballPath, + }) + assert.equal(firstVerification.ok, true) + assert.equal(secondVerification.ok, true) + logStep(`verified is-number under ${firstSigning.keyId}`) + logStep(`verified @jridgewell/resolve-uri under ${secondSigning.keyId}`) + + logStep('checking the sumdb stores only minimal tarball records') + const firstLookup = await fetchLookup(sumDbBaseUrl, firstSigning.keyId, 'is-number', firstTarball.version) + const firstRecord = firstLookup.records.find(record => record.type === 'tarball') + assert.ok(firstRecord) + assert.equal(firstRecord!.keyId, firstSigning.keyId) + assert.equal(firstRecord!.integrity, firstSigning.integrity) + assert.ok(typeof firstRecord!.digest === 'string' && firstRecord!.digest.startsWith('sha512-')) + assert.ok(typeof firstRecord!.signature === 'string' && firstRecord!.signature.length > 0) + assert.ok(typeof firstRecord!.leafIndex === 'number') + + const secondLookup = await fetchLookup( + sumDbBaseUrl, + secondSigning.keyId, + '@jridgewell/resolve-uri', + secondTarball.version, + ) + const secondRecord = secondLookup.records.find(record => record.type === 'tarball') + assert.ok(secondRecord) + assert.equal(secondRecord!.keyId, secondSigning.keyId) + assert.equal(secondRecord!.integrity, secondSigning.integrity) + assert.ok(typeof secondRecord!.signature === 'string' && secondRecord!.signature.length > 0) + + logStep('verifying the proxy stays npm-compatible and does not expose custom registry routes') + const proxyKeysResponse = await fetch(`${proxyBaseUrl}/-/npm/v1/keys`) + const proxyKeysPayload = (await proxyKeysResponse.json()) as { + keys: Array<{ keyid: string; keytype: string; scheme: string; key: string }> + } + assert.equal(proxyKeysPayload.keys.length, 1) + assert.match(proxyKeysPayload.keys[0]!.keyid, /^SHA256:/) + assert.equal(proxyKeysPayload.keys[0]!.keytype, 'ecdsa-sha2-nistp256') + assert.equal(proxyKeysPayload.keys[0]!.scheme, 'ecdsa-sha2-nistp256') + + const proxyRootResponse = await fetch(`${proxyBaseUrl}/`) + assert.equal(proxyRootResponse.status, 404) + logStep('custom proxy introspection routes are gone; only npm-compatible paths remain') + + logStep('confirming there are no alternate key IDs recorded for the same package versions') + const recordedFirst = await findRecordedKeyId({ + sumDbBaseUrl, + keyIds: trustedResponsibleKeys.map(key => key.keyid), + packageName: 'is-number', + version: firstTarball.version, + }) + const recordedSecond = await findRecordedKeyId({ + sumDbBaseUrl, + keyIds: trustedResponsibleKeys.map(key => key.keyid), + packageName: '@jridgewell/resolve-uri', + version: secondTarball.version, + }) + assert.equal(recordedFirst.keyId, firstSigning.keyId) + assert.equal(recordedSecond.keyId, secondSigning.keyId) + } finally { + logStep('shutting down proxy and sumdb') + await stopServer(proxy.server) + await stopServer(sumDb.server) + await rm(tempRoot, { recursive: true, force: true }) + } + }, +) + +test('source registry resolution uses the first configured registry', () => { + const npmKeys = generateRegistryKeyPair().npmKey + const registries = [ + { + label: 'first', + registryBaseUrl: 'https://registry.first.example', + keysEndpoint: 'https://registry.first.example/-/npm/v1/keys', + npmKeys: [npmKeys], + }, + { + label: 'second', + registryBaseUrl: 'https://registry.second.example', + keysEndpoint: 'https://registry.second.example/-/npm/v1/keys', + npmKeys: [npmKeys], + }, + ] + + assert.equal(resolveSourceRegistry(registries).label, 'first') +}) diff --git a/apps/demo/src/run-demo.ts b/apps/demo/src/run-demo.ts new file mode 100644 index 0000000000..c6b4973b00 --- /dev/null +++ b/apps/demo/src/run-demo.ts @@ -0,0 +1,70 @@ +import path from 'node:path' +import process from 'node:process' +import { spawn } from 'node:child_process' +import { generateRegistryKeyPair } from '../../registry-core/src/index.ts' + +function run(command: string, args: string[], env: NodeJS.ProcessEnv) { + return spawn(command, args, { + stdio: 'inherit', + env, + }) +} + +const cwd = process.cwd() +const sumDbPort = process.env.SUMDB_PORT ?? '4318' +const proxyPort = process.env.PROXY_PORT ?? '4317' +const sumDbKeys = + process.env.SUMDB_PRIVATE_KEY && process.env.SUMDB_PUBLIC_KEY + ? { + privateKeyPem: process.env.SUMDB_PRIVATE_KEY, + publicKeyPem: process.env.SUMDB_PUBLIC_KEY, + } + : generateRegistryKeyPair() +const proxyKeys = + process.env.REGISTRY_PRIVATE_KEY && process.env.REGISTRY_PUBLIC_KEY + ? { + privateKeyPem: process.env.REGISTRY_PRIVATE_KEY, + publicKeyPem: process.env.REGISTRY_PUBLIC_KEY, + } + : generateRegistryKeyPair() + +console.log('Starting sumdb and registry proxy for the demo.') +console.log(`sumdb: http://127.0.0.1:${sumDbPort}`) +console.log(`proxy: http://127.0.0.1:${proxyPort}`) +console.log('') +console.log('Provide SUMDB_PRIVATE_KEY / SUMDB_PUBLIC_KEY and REGISTRY_PRIVATE_KEY / REGISTRY_PUBLIC_KEY to keep stable identities across restarts.') + +const sumDb = run( + process.execPath, + ['--experimental-strip-types', path.resolve(cwd, 'apps/sumdb/src/cli.ts')], + { + ...process.env, + PORT: sumDbPort, + SUMDB_PRIVATE_KEY: sumDbKeys.privateKeyPem, + SUMDB_PUBLIC_KEY: sumDbKeys.publicKeyPem, + REGISTRY_PUBLIC_KEY: proxyKeys.publicKeyPem, + }, +) + +const proxy = run( + process.execPath, + ['--experimental-strip-types', path.resolve(cwd, 'apps/registry-proxy/src/cli.ts')], + { + ...process.env, + PORT: proxyPort, + PROXY_BASE_URL: `http://127.0.0.1:${proxyPort}`, + SUMDB_BASE_URL: `http://127.0.0.1:${sumDbPort}`, + REGISTRY_PRIVATE_KEY: proxyKeys.privateKeyPem, + REGISTRY_PUBLIC_KEY: proxyKeys.publicKeyPem, + }, +) + +process.on('SIGINT', () => { + sumDb.kill('SIGINT') + proxy.kill('SIGINT') + process.exit(0) +}) + +console.log('') +console.log(`Next step:`) +console.log(`npm install --registry=http://127.0.0.1:${proxyPort} is-number`) diff --git a/apps/demo/src/verify-lib.ts b/apps/demo/src/verify-lib.ts new file mode 100644 index 0000000000..7b47fe7090 --- /dev/null +++ b/apps/demo/src/verify-lib.ts @@ -0,0 +1,116 @@ +import fs from 'node:fs/promises' +import { + createArtifactDigest, + verifyCheckpoint, + verifyConsistencyProof, + verifyInclusionProof, + type ConsistencyProofResponse, + type InclusionProofResponse, + type SignedCheckpoint, + type SumDbLeafRecord, +} from '../../registry-core/src/index.ts' + +export interface VerifyPackageInput { + sumDbBaseUrl: string + registryKeyId: string + packageName: string + version: string + tarballPath?: string +} + +export interface VerifyPackageResult { + ok: true + packageName: string + version: string + registryKeyId: string + leafIndex: number + checkpointTreeSize: number +} + +export async function verifyPackageFromSumDb(input: VerifyPackageInput): Promise { + // Verification walks the same chain a third party would: lookup the recorded leaf, + // verify the latest checkpoint signature, then verify inclusion/consistency proofs. + const lookupResponse = await fetch( + `${input.sumDbBaseUrl}/lookup/${encodeURIComponent(input.registryKeyId)}/${input.packageName + .split('/') + .map(segment => encodeURIComponent(segment)) + .join('/')}/${encodeURIComponent(input.version)}`, + ) + + if (!lookupResponse.ok) { + throw new Error(`Lookup failed with ${lookupResponse.status}`) + } + + const lookup = (await lookupResponse.json()) as { + treeSize: number + records: Array + } + + const tarballRecord = lookup.records.find(record => record.type === 'tarball') + if (!tarballRecord) { + throw new Error(`No tarball record found for ${input.packageName}@${input.version}`) + } + + const checkpointResponse = await fetch(`${input.sumDbBaseUrl}/latest-checkpoint`) + const checkpoint = (await checkpointResponse.json()) as SignedCheckpoint + + if (!verifyCheckpoint(checkpoint)) { + throw new Error('Checkpoint signature verification failed') + } + + const inclusionResponse = await fetch( + `${input.sumDbBaseUrl}/proof/inclusion/${tarballRecord.leafIndex}?treeSize=${lookup.treeSize}`, + ) + const inclusion = (await inclusionResponse.json()) as InclusionProofResponse + const inclusionVerified = verifyInclusionProof({ + leaf: tarballRecord.canonicalLeaf, + leafIndex: tarballRecord.leafIndex, + treeSize: inclusion.treeSize, + proof: inclusion.hashes, + expectedRoot: checkpoint.rootHash, + }) + + if (!inclusionVerified) { + throw new Error('Inclusion proof verification failed') + } + + if (lookup.treeSize > 1) { + const consistencyResponse = await fetch( + `${input.sumDbBaseUrl}/proof/consistency/${lookup.treeSize - 1}/${lookup.treeSize}`, + ) + const consistency = (await consistencyResponse.json()) as ConsistencyProofResponse + const previousCheckpointResponse = await fetch(`${input.sumDbBaseUrl}/checkpoint/${lookup.treeSize - 1}`) + const previousCheckpoint = (await previousCheckpointResponse.json()) as SignedCheckpoint + const consistencyVerified = verifyConsistencyProof({ + fromTreeSize: consistency.fromTreeSize, + toTreeSize: consistency.toTreeSize, + oldRoot: previousCheckpoint.rootHash, + newRoot: checkpoint.rootHash, + proof: consistency.hashes, + }) + if (!consistencyVerified) { + throw new Error('Consistency proof verification failed') + } + } + + if (input.tarballPath) { + // When a local tarball is provided, we also confirm the checkpointed digest matches the + // bytes that were actually downloaded during the install flow. + const tarballBytes = await fs.readFile(input.tarballPath) + const digest = createArtifactDigest(tarballBytes) + if (digest !== tarballRecord.digest) { + throw new Error( + `Local tarball digest mismatch: expected ${tarballRecord.digest} got ${digest}`, + ) + } + } + + return { + ok: true, + packageName: input.packageName, + version: input.version, + registryKeyId: input.registryKeyId, + leafIndex: tarballRecord.leafIndex, + checkpointTreeSize: checkpoint.treeSize, + } +} diff --git a/apps/demo/src/verify.ts b/apps/demo/src/verify.ts new file mode 100644 index 0000000000..d42161da61 --- /dev/null +++ b/apps/demo/src/verify.ts @@ -0,0 +1,37 @@ +import process from 'node:process' +import { verifyPackageFromSumDb } from './verify-lib.ts' + +function parseArgs(argv: string[]) { + const args = new Map() + for (let index = 0; index < argv.length; index += 2) { + const key = argv[index] + const value = argv[index + 1] + if (!key?.startsWith('--') || !value) continue + args.set(key.slice(2), value) + } + return args +} + +const args = parseArgs(process.argv.slice(2)) +const sumDbBaseUrl = args.get('sumdb-base-url') ?? 'http://127.0.0.1:4318' +const registryKeyId = args.get('registry-key-id') +const packageName = args.get('package') +const version = args.get('version') +const tarballPath = args.get('tarball-path') + +if (!registryKeyId || !packageName || !version) { + console.error( + 'Usage: node --experimental-strip-types apps/demo/src/verify.ts --registry-key-id --package --version [--sumdb-base-url ] [--tarball-path ]', + ) + process.exit(1) +} + +const result = await verifyPackageFromSumDb({ + sumDbBaseUrl, + registryKeyId, + packageName, + version, + tarballPath, +}) + +console.log(JSON.stringify(result, null, 2)) diff --git a/apps/demo/tsconfig.json b/apps/demo/tsconfig.json new file mode 100644 index 0000000000..26e8ddf779 --- /dev/null +++ b/apps/demo/tsconfig.json @@ -0,0 +1,14 @@ +{ + "compilerOptions": { + "target": "ES2022", + "module": "nodenext", + "strict": true, + "esModuleInterop": true, + "skipLibCheck": true, + "noEmit": true, + "allowImportingTsExtensions": true, + "types": ["node"] + }, + "include": ["src/**/*.ts"], + "exclude": ["node_modules", "dist"] +} diff --git a/apps/registry-core/package.json b/apps/registry-core/package.json new file mode 100644 index 0000000000..26578f2666 --- /dev/null +++ b/apps/registry-core/package.json @@ -0,0 +1,9 @@ +{ + "name": "@npmx/registry-core", + "version": "0.0.0", + "private": true, + "type": "module", + "scripts": { + "test": "node --test --experimental-strip-types src/*.test.ts" + } +} diff --git a/apps/registry-core/src/crypto.ts b/apps/registry-core/src/crypto.ts new file mode 100644 index 0000000000..df149ceb82 --- /dev/null +++ b/apps/registry-core/src/crypto.ts @@ -0,0 +1,102 @@ +import crypto, { type BinaryLike, createPrivateKey, createPublicKey, KeyObject } from 'node:crypto' + +function normalizeKeyObject(key: string | KeyObject): KeyObject { + return typeof key === 'string' ? createPublicKey(key) : key +} + +export function toBase64Url(input: BinaryLike): string { + return Buffer.from(input).toString('base64url') +} + +export function fromBase64Url(input: string): Buffer { + return Buffer.from(input, 'base64url') +} + +export function sha256(input: BinaryLike): Buffer { + return crypto.createHash('sha256').update(input).digest() +} + +export function sha512(input: BinaryLike): Buffer { + return crypto.createHash('sha512').update(input).digest() +} + +export function sha512Base64(input: BinaryLike): string { + return sha512(input).toString('base64') +} + +export function sha256Hex(input: BinaryLike): string { + return sha256(input).toString('hex') +} + +export type NpmKey = { + expires: string | null + keyid: string + keytype: 'ecdsa-sha2-nistp256' + scheme: 'ecdsa-sha2-nistp256' + key: string +} + +export function deriveKeyId(publicKey: string | KeyObject): string { + const key = normalizeKeyObject(publicKey) + const der = key.export({ format: 'der', type: 'spki' }) + // npm uses a SHA256 fingerprint over the SPKI public key bytes as the stable key identity. + return `SHA256:${sha256(der).toString('base64')}` +} + +export function signText(privateKeyPem: string, text: string): string { + const privateKey = createPrivateKey(privateKeyPem) + return crypto.sign('sha256', Buffer.from(text), privateKey).toString('base64') +} + +export function verifyText(publicKeyPem: string, text: string, signature: string): boolean { + const publicKey = createPublicKey(publicKeyPem) + return crypto.verify('sha256', Buffer.from(text), publicKey, Buffer.from(signature, 'base64')) +} + +export function exportPublicKeyBase64(publicKey: string | KeyObject): string { + const key = normalizeKeyObject(publicKey) + return key.export({ format: 'der', type: 'spki' }).toString('base64') +} + +export function npmKeyToPublicKeyPem(key: NpmKey): string { + return createPublicKey({ + key: Buffer.from(key.key, 'base64'), + format: 'der', + type: 'spki', + }) + .export({ format: 'pem', type: 'spki' }) + .toString() +} + +export function publicKeyPemToNpmKey(publicKeyPem: string, expires: string | null = null): NpmKey { + // We publish our own keys in the same shape npm serves from /-/npm/v1/keys so + // signature verification logic can treat upstream and local registries uniformly. + return { + expires, + keyid: deriveKeyId(publicKeyPem), + keytype: 'ecdsa-sha2-nistp256', + scheme: 'ecdsa-sha2-nistp256', + key: exportPublicKeyBase64(publicKeyPem), + } +} + +export function generateRegistryKeyPair(): { + privateKeyPem: string + publicKeyPem: string + keyId: string + npmKey: NpmKey +} { + // P-256 keeps our local registry and sumdb keys compatible with npm's published key format. + const { privateKey, publicKey } = crypto.generateKeyPairSync('ec', { + namedCurve: 'prime256v1', + }) + const privateKeyPem = privateKey.export({ format: 'pem', type: 'pkcs8' }).toString() + const publicKeyPem = publicKey.export({ format: 'pem', type: 'spki' }).toString() + + return { + privateKeyPem, + publicKeyPem, + keyId: deriveKeyId(publicKeyPem), + npmKey: publicKeyPemToNpmKey(publicKeyPem), + } +} diff --git a/apps/registry-core/src/index.ts b/apps/registry-core/src/index.ts new file mode 100644 index 0000000000..696e7abe73 --- /dev/null +++ b/apps/registry-core/src/index.ts @@ -0,0 +1,4 @@ +export * from './crypto.ts' +export * from './merkle.ts' +export * from './protocol.ts' +export * from './registry-discovery.ts' diff --git a/apps/registry-core/src/merkle.test.ts b/apps/registry-core/src/merkle.test.ts new file mode 100644 index 0000000000..864c2122a6 --- /dev/null +++ b/apps/registry-core/src/merkle.test.ts @@ -0,0 +1,89 @@ +import test from 'node:test' +import assert from 'node:assert/strict' +import { + getConsistencyProof, + getInclusionProof, + merkleRoot, + verifyConsistencyProof, + verifyInclusionProof, +} from './merkle.ts' +import { + createArtifactDigest, + createCheckpointText, + createLeafRecord, + deriveKeyId, + generateRegistryKeyPair, + signCheckpoint, + verifyCheckpoint, +} from './index.ts' + +function makeLeaf(index: number, keyId: string) { + return createLeafRecord({ + keyId, + name: index % 2 === 0 ? 'is-number' : '@scope/demo', + version: index % 2 === 0 ? '1.0.0' : '2.0.0', + type: 'tarball', + digest: createArtifactDigest(Buffer.from(`leaf-${index}`)), + size: index + 1, + url: `https://registry.example.test/artifact/${index}`, + integrity: createArtifactDigest(Buffer.from(`integrity-${index}`)), + }).canonicalLeaf +} + +test('inclusion proofs verify against the calculated root', () => { + const { keyId } = generateRegistryKeyPair() + const leaves = Array.from({ length: 8 }, (_, index) => makeLeaf(index, keyId)) + const root = merkleRoot(leaves) + + for (const [index, leaf] of leaves.entries()) { + const proof = getInclusionProof(leaves, index) + assert.equal( + verifyInclusionProof({ + leaf, + leafIndex: index, + treeSize: leaves.length, + proof, + expectedRoot: root, + }), + true, + ) + } +}) + +test('consistency proofs verify tree growth', () => { + const { keyId } = generateRegistryKeyPair() + const leaves = Array.from({ length: 9 }, (_, index) => makeLeaf(index, keyId)) + + for (let fromSize = 1; fromSize < leaves.length; fromSize++) { + const proof = getConsistencyProof(leaves, fromSize, leaves.length) + assert.equal( + verifyConsistencyProof({ + fromTreeSize: fromSize, + toTreeSize: leaves.length, + oldRoot: merkleRoot(leaves.slice(0, fromSize)), + newRoot: merkleRoot(leaves), + proof, + }), + true, + `expected proof for ${fromSize} -> ${leaves.length} to verify`, + ) + } +}) + +test('checkpoint signing and key ids are deterministic', () => { + const { privateKeyPem, publicKeyPem, keyId } = generateRegistryKeyPair() + const checkpoint = signCheckpoint( + { + treeSize: 3, + rootHash: merkleRoot([makeLeaf(1, keyId)]), + issuedAt: '2026-01-01T00:00:00.000Z', + keyId, + }, + privateKeyPem, + publicKeyPem, + ) + + assert.equal(verifyCheckpoint(checkpoint), true) + assert.equal(checkpoint.keyId, deriveKeyId(publicKeyPem)) + assert.match(createCheckpointText(checkpoint), /^npmx-sumdb-checkpoint-v1/m) +}) diff --git a/apps/registry-core/src/merkle.ts b/apps/registry-core/src/merkle.ts new file mode 100644 index 0000000000..9e83ce83e1 --- /dev/null +++ b/apps/registry-core/src/merkle.ts @@ -0,0 +1,148 @@ +import { sha256, toBase64Url, fromBase64Url } from './crypto.ts' + +function hashEmpty(): Buffer { + return sha256(Buffer.alloc(0)) +} + +function hashLeafBytes(data: Buffer): Buffer { + return sha256(Buffer.concat([Buffer.from([0]), data])) +} + +function hashNodeBytes(left: Buffer, right: Buffer): Buffer { + return sha256(Buffer.concat([Buffer.from([1]), left, right])) +} + +function largestPowerOfTwoLessThan(n: number): number { + let power = 1 + while (power * 2 < n) { + power *= 2 + } + return power +} + +function buildLeafHashes(leaves: string[]): Buffer[] { + return leaves.map(leaf => hashLeafBytes(Buffer.from(leaf))) +} + +function merkleRootFromHashedLeaves(leaves: Buffer[]): Buffer { + if (leaves.length === 0) { + return hashEmpty() + } + + if (leaves.length === 1) { + return leaves[0]! + } + + const split = largestPowerOfTwoLessThan(leaves.length) + return hashNodeBytes( + merkleRootFromHashedLeaves(leaves.slice(0, split)), + merkleRootFromHashedLeaves(leaves.slice(split)), + ) +} + +function inclusionProofBuffers(leaves: Buffer[], index: number): Buffer[] { + if (index < 0 || index >= leaves.length) { + throw new Error(`Leaf index ${index} is outside the tree of size ${leaves.length}`) + } + + if (leaves.length <= 1) { + return [] + } + + const split = largestPowerOfTwoLessThan(leaves.length) + if (index < split) { + return [...inclusionProofBuffers(leaves.slice(0, split), index), merkleRootFromHashedLeaves(leaves.slice(split))] + } + + return [ + ...inclusionProofBuffers(leaves.slice(split), index - split), + merkleRootFromHashedLeaves(leaves.slice(0, split)), + ] +} + +export function hashLeaf(leaf: string): string { + return toBase64Url(hashLeafBytes(Buffer.from(leaf))) +} + +export function hashNode(left: string, right: string): string { + return toBase64Url(hashNodeBytes(fromBase64Url(left), fromBase64Url(right))) +} + +export function merkleRoot(leaves: string[]): string { + return toBase64Url(merkleRootFromHashedLeaves(buildLeafHashes(leaves))) +} + +export function merkleRootFromLeafHashes(leafHashes: string[]): string { + return toBase64Url(merkleRootFromHashedLeaves(leafHashes.map(hash => fromBase64Url(hash)))) +} + +export function getLeafHashes(leaves: string[]): string[] { + return buildLeafHashes(leaves).map(hash => toBase64Url(hash)) +} + +export function getInclusionProof(leaves: string[], index: number, treeSize = leaves.length): string[] { + return inclusionProofBuffers(buildLeafHashes(leaves.slice(0, treeSize)), index).map(hash => toBase64Url(hash)) +} + +export function getConsistencyProof(leaves: string[], fromTreeSize: number, toTreeSize = leaves.length): string[] { + if (fromTreeSize < 1 || fromTreeSize > toTreeSize) { + throw new Error(`Invalid consistency proof range ${fromTreeSize} -> ${toTreeSize}`) + } + + if (fromTreeSize === toTreeSize) { + return [] + } + + return getLeafHashes(leaves.slice(0, toTreeSize)) +} + +export function verifyInclusionProof(input: { + leaf: string + leafIndex: number + treeSize: number + proof: string[] + expectedRoot: string +}): boolean { + let index = input.leafIndex + let size = input.treeSize + let hash = hashLeafBytes(Buffer.from(input.leaf)) + + for (const item of input.proof) { + const sibling = fromBase64Url(item) + if (index % 2 === 1 || index === size - 1) { + hash = hashNodeBytes(sibling, hash) + } else { + hash = hashNodeBytes(hash, sibling) + } + + index = Math.floor(index / 2) + size = Math.ceil(size / 2) + } + + return toBase64Url(hash) === input.expectedRoot +} + +export function verifyConsistencyProof(input: { + fromTreeSize: number + toTreeSize: number + oldRoot: string + newRoot: string + proof: string[] +}): boolean { + if (input.fromTreeSize === input.toTreeSize) { + return input.oldRoot === input.newRoot && input.proof.length === 0 + } + + if (input.fromTreeSize < 1 || input.fromTreeSize > input.toTreeSize) { + return false + } + + if (input.proof.length !== input.toTreeSize) { + return false + } + + return ( + merkleRootFromLeafHashes(input.proof.slice(0, input.fromTreeSize)) === input.oldRoot && + merkleRootFromLeafHashes(input.proof) === input.newRoot + ) +} diff --git a/apps/registry-core/src/protocol.ts b/apps/registry-core/src/protocol.ts new file mode 100644 index 0000000000..0dad8ad38f --- /dev/null +++ b/apps/registry-core/src/protocol.ts @@ -0,0 +1,184 @@ +import type { NpmKey } from './crypto.ts' +import { deriveKeyId, sha256Hex, sha512Base64, signText, verifyText } from './crypto.ts' + +export type ArtifactType = 'tarball' + +export interface RegistryIdentity { + keyId: string + publicKey: string + baseUrl: string + upstreamBaseUrl: string +} + +export interface SourceRegistry { + label: string + registryBaseUrl: string + keysEndpoint: string + npmKeys: NpmKey[] +} + +export interface IngestRecord { + keyId: string + name: string + version: string + type: ArtifactType + digest: string + size: number + url: string + integrity: string + signature: string +} + +export interface SumDbLeafRecord extends IngestRecord { + canonicalLeaf: string + leafHash: string +} + +export interface CheckpointPayload { + treeSize: number + rootHash: string + issuedAt: string + keyId: string +} + +export interface SignedCheckpoint extends CheckpointPayload { + signature: string + publicKey: string +} + +export interface InclusionProofResponse { + leafHash: string + leafIndex: number + treeSize: number + hashes: string[] +} + +export interface ConsistencyProofResponse { + fromTreeSize: number + toTreeSize: number + hashes: string[] +} + +export function stableStringify(value: unknown): string { + if (value === undefined) { + return 'null' + } + + if (value === null || typeof value !== 'object') { + return JSON.stringify(value) + } + + if (Array.isArray(value)) { + return `[${value.map(item => stableStringify(item)).join(',')}]` + } + + const entries = Object.entries(value as Record).sort(([left], [right]) => + left.localeCompare(right), + ) + + // The sumdb and proxy use this when hashing or persisting records so object key order + // never changes the signed/checkpointed bytes. + return `{${entries + .filter(([, item]) => item !== undefined) + .map(([key, item]) => `${JSON.stringify(key)}:${stableStringify(item)}`) + .join(',')}}` +} + +export function createRegistryIdentity(input: { + publicKey: string + baseUrl: string + upstreamBaseUrl: string +}): RegistryIdentity { + return { + keyId: deriveKeyId(input.publicKey), + publicKey: input.publicKey, + baseUrl: input.baseUrl, + upstreamBaseUrl: input.upstreamBaseUrl, + } +} + +export function resolveSourceRegistry(registries: SourceRegistry[]): SourceRegistry { + const matched = registries[0] + if (!matched) { + throw new Error('No source registry configured') + } + + // With package-level routing removed, the first configured registry is the fetch source. + return matched +} + +export function createArtifactDigest(input: Buffer | string): string { + return `sha512-${sha512Base64(input)}` +} + +export function createCanonicalLeaf(record: IngestRecord): string { + // The logged leaf is intentionally minimal and immutable: only the responsible signing key + // plus the artifact facts needed for independent digest verification are checkpointed. + return [ + 'v1', + record.keyId, + record.name, + record.version, + record.type, + record.digest, + String(record.size), + record.url, + record.integrity, + record.signature, + ].join(' ') +} + +export function createLeafHash(canonicalLeaf: string): string { + return sha256Hex(Buffer.from(canonicalLeaf)) +} + +export function createLeafRecord(record: IngestRecord): SumDbLeafRecord { + const canonicalLeaf = createCanonicalLeaf(record) + return { + ...record, + canonicalLeaf, + leafHash: createLeafHash(canonicalLeaf), + } +} + +export function createPackageSignatureText(name: string, version: string, integrity: string): string { + // npm signs `${name}@${version}:${dist.integrity}` for each published version. + return `${name}@${version}:${integrity}` +} + +export function createCheckpointText(payload: CheckpointPayload): string { + // The checkpoint text is deliberately line-oriented so it is easy to inspect and sign. + return [ + 'npmx-sumdb-checkpoint-v1', + payload.treeSize, + payload.rootHash, + payload.issuedAt, + payload.keyId, + ].join('\n') +} + +export function signCheckpoint(payload: CheckpointPayload, privateKeyPem: string, publicKey: string) { + return { + ...payload, + signature: signText(privateKeyPem, createCheckpointText(payload)), + publicKey, + } satisfies SignedCheckpoint +} + +export function verifyCheckpoint(checkpoint: SignedCheckpoint): boolean { + return verifyText(checkpoint.publicKey, createCheckpointText(checkpoint), checkpoint.signature) +} + +export function createRegistryMetadata(publicKey: string) { + return { + keyId: deriveKeyId(publicKey), + publicKey, + } +} + +export function assertRegistryKeyMatchesPublicKey(registryKeyId: string, publicKey: string) { + const derivedKeyId = deriveKeyId(publicKey) + if (registryKeyId !== derivedKeyId) { + throw new Error(`Registry key mismatch: expected ${derivedKeyId} but received ${registryKeyId}`) + } +} diff --git a/apps/registry-core/src/registry-discovery.ts b/apps/registry-core/src/registry-discovery.ts new file mode 100644 index 0000000000..71674292e7 --- /dev/null +++ b/apps/registry-core/src/registry-discovery.ts @@ -0,0 +1,60 @@ +import type { NpmKey } from './crypto.ts' +import type { SourceRegistry } from './protocol.ts' + +export interface ConfiguredSourceRegistry { + label: string + registryBaseUrl: string + npmKeys?: NpmKey[] +} + +export async function fetchRegistryKeys(registryBaseUrl: string): Promise { + const response = await fetch(`${registryBaseUrl}/-/npm/v1/keys`) + if (!response.ok) { + throw new Error(`Unable to fetch keys for ${registryBaseUrl}: ${response.status}`) + } + + const payload = (await response.json()) as { keys?: NpmKey[] } + if (!payload.keys?.length) { + throw new Error(`Registry ${registryBaseUrl} did not return any keys`) + } + + return payload.keys +} + +export async function hydrateSourceRegistries( + configured: ConfiguredSourceRegistry[], + fallbackUpstreamBaseUrl?: string, +): Promise { + const effective = + configured.length > 0 + ? configured + : [ + { + label: 'npm', + registryBaseUrl: fallbackUpstreamBaseUrl ?? 'https://registry.npmjs.org', + }, + ] + + return await Promise.all( + effective.map(async registry => ({ + label: registry.label, + registryBaseUrl: registry.registryBaseUrl, + keysEndpoint: `${registry.registryBaseUrl}/-/npm/v1/keys`, + npmKeys: registry.npmKeys ?? (await fetchRegistryKeys(registry.registryBaseUrl)), + })), + ) +} + +export function collectPublishedKeys(registries: SourceRegistry[]): NpmKey[] { + const keys = new Map() + + for (const registry of registries) { + for (const key of registry.npmKeys) { + if (!keys.has(key.keyid)) { + keys.set(key.keyid, key) + } + } + } + + return [...keys.values()].sort((left, right) => left.keyid.localeCompare(right.keyid)) +} diff --git a/apps/registry-core/tsconfig.json b/apps/registry-core/tsconfig.json new file mode 100644 index 0000000000..26e8ddf779 --- /dev/null +++ b/apps/registry-core/tsconfig.json @@ -0,0 +1,14 @@ +{ + "compilerOptions": { + "target": "ES2022", + "module": "nodenext", + "strict": true, + "esModuleInterop": true, + "skipLibCheck": true, + "noEmit": true, + "allowImportingTsExtensions": true, + "types": ["node"] + }, + "include": ["src/**/*.ts"], + "exclude": ["node_modules", "dist"] +} diff --git a/apps/registry-proxy/package.json b/apps/registry-proxy/package.json new file mode 100644 index 0000000000..544e582a3d --- /dev/null +++ b/apps/registry-proxy/package.json @@ -0,0 +1,9 @@ +{ + "name": "@npmx/registry-proxy", + "version": "0.0.0", + "private": true, + "type": "module", + "scripts": { + "dev": "node --experimental-strip-types src/cli.ts" + } +} diff --git a/apps/registry-proxy/src/cli.ts b/apps/registry-proxy/src/cli.ts new file mode 100644 index 0000000000..b9e2bd8538 --- /dev/null +++ b/apps/registry-proxy/src/cli.ts @@ -0,0 +1,41 @@ +import path from 'node:path' +import process from 'node:process' +import type { NpmKey } from '../../registry-core/src/index.ts' +import { registryCatalog } from '../../../config/registries.ts' +import { createRegistryProxyServer } from './server.ts' + +const port = Number(process.env.PROXY_PORT ?? process.env.PORT ?? 4317) +const baseUrl = process.env.PROXY_BASE_URL ?? `http://127.0.0.1:${port}` +const upstreamBaseUrl = process.env.UPSTREAM_NPM_REGISTRY ?? 'https://registry.npmjs.org' +const cacheDir = process.env.PROXY_CACHE_DIR ?? path.resolve(process.cwd(), '.data/proxy-cache') +const sumDbBaseUrl = process.env.SUMDB_BASE_URL +const sourceRegistries = process.env.SOURCE_REGISTRIES_JSON + ? (JSON.parse(process.env.SOURCE_REGISTRIES_JSON) as Array<{ + label: string + registryBaseUrl: string + npmKeys?: NpmKey[] + }>) + // The checked-in catalog is the normal source of truth; the env var is just an escape hatch + // for experiments without editing repo config. + : registryCatalog + +const { server, identity, sourceRegistries: resolvedSourceRegistries } = await createRegistryProxyServer({ + port, + baseUrl, + upstreamBaseUrl, + sourceRegistries, + cacheDir, + sumDbBaseUrl, + registryPrivateKey: process.env.REGISTRY_PRIVATE_KEY, + registryPublicKey: process.env.REGISTRY_PUBLIC_KEY, +}) + +server.listen(port, '127.0.0.1', () => { + console.log(`[proxy] listening on ${baseUrl}`) + console.log(`[proxy] registry key id: ${identity.keyId}`) + console.log( + `[proxy] source registries: ${resolvedSourceRegistries.map(registry => `${registry.label}=${registry.registryBaseUrl}`).join(', ')}`, + ) + console.log(`[proxy] cache dir: ${cacheDir}`) + console.log(`[proxy] sumdb: ${sumDbBaseUrl ?? 'disabled'}`) +}) diff --git a/apps/registry-proxy/src/server.ts b/apps/registry-proxy/src/server.ts new file mode 100644 index 0000000000..81d456ee13 --- /dev/null +++ b/apps/registry-proxy/src/server.ts @@ -0,0 +1,492 @@ +import crypto from 'node:crypto' +import fs from 'node:fs/promises' +import http from 'node:http' +import path from 'node:path' +import { URL } from 'node:url' +import { + createArtifactDigest, + createRegistryIdentity, + hydrateSourceRegistries, + npmKeyToPublicKeyPem, + publicKeyPemToNpmKey, + resolveSourceRegistry, + stableStringify, + type ConfiguredSourceRegistry, + type IngestRecord, + type RegistryIdentity, + type SourceRegistry, +} from '../../registry-core/src/index.ts' +import { generateRegistryKeyPair } from '../../registry-core/src/crypto.ts' + +interface ProxyServerOptions { + port: number + baseUrl: string + upstreamBaseUrl?: string + sourceRegistries?: ConfiguredSourceRegistry[] + cacheDir: string + sumDbBaseUrl?: string + registryPrivateKey?: string + registryPublicKey?: string +} + +type VerifiedVersionSignature = { + integrity: string + upstreamSignature: string + responsibleKeyId: string +} + +function createRequestId(): string { + return crypto.randomUUID() +} + +function sanitizeCacheSegment(value: string): string { + // npm-style key IDs contain characters like "/" and ":" that are valid in identifiers + // but unsafe in filenames, so cache paths must normalize them. + return value.replaceAll(/[^a-zA-Z0-9._-]+/g, '_') +} + +function cachePathForRegistry(cacheDir: string, registryLabel: string, packageName: string): string { + return path.join( + cacheDir, + `${sanitizeCacheSegment(registryLabel)}__${sanitizeCacheSegment(packageName)}.json`, + ) +} + +function isPackumentPath(pathname: string): boolean { + const parts = pathname.split('/').filter(Boolean) + return parts.length === 1 || (parts.length === 2 && parts[0]?.startsWith('@')) +} + +function parseTarballRequest(pathname: string): { packageName: string; version: string } | null { + const parts = pathname.split('/').filter(Boolean) + if (parts.length === 3 && parts[1] === '-') { + const packageName = decodeURIComponent(parts[0]!) + const filename = decodeURIComponent(parts[2]!) + const prefix = `${packageName}-` + if (!filename.startsWith(prefix) || !filename.endsWith('.tgz')) { + return null + } + + return { + packageName, + version: filename.slice(prefix.length, -4), + } + } + + if (parts.length === 4 && parts[0]?.startsWith('@') && parts[2] === '-') { + const scope = decodeURIComponent(parts[0]!) + const name = decodeURIComponent(parts[1]!) + const packageName = `${scope}/${name}` + const filename = decodeURIComponent(parts[3]!) + const prefix = `${name}-` + if (!filename.startsWith(prefix) || !filename.endsWith('.tgz')) { + return null + } + + return { + packageName, + version: filename.slice(prefix.length, -4), + } + } + + return null +} + +function parsePackageName(pathname: string): string | null { + const parts = pathname.split('/').filter(Boolean) + if (parts.length === 1) { + return decodeURIComponent(parts[0]!) + } + if (parts.length === 2 && parts[0]?.startsWith('@')) { + return `${decodeURIComponent(parts[0]!)}\/${decodeURIComponent(parts[1]!)}` + } + return null +} + +function rewritePackumentTarballs(packument: Record): Record { + const cloned = structuredClone(packument) + // We intentionally leave tarball URLs pointing at the source registry. The proxy still + // observes and checkpoints installs, but lockfiles should record the origin URL. + return cloned +} + +async function ingestRecord(sumDbBaseUrl: string | undefined, record: IngestRecord) { + if (!sumDbBaseUrl) { + return { checkpointed: false, reason: 'SUMDB_BASE_URL not configured' } + } + + try { + const response = await fetch(`${sumDbBaseUrl}/ingest`, { + method: 'POST', + headers: { + 'content-type': 'application/json', + }, + body: stableStringify(record), + }) + + if (!response.ok) { + const errorBody = await response.text() + return { + checkpointed: false, + reason: `sumdb returned ${response.status}${errorBody ? `: ${errorBody.trim()}` : ''}`, + } + } + + return { + checkpointed: true, + reason: 'ingested', + } + } catch (error) { + return { + checkpointed: false, + reason: error instanceof Error ? error.message : String(error), + } + } +} + +function verifyVersionSignature(input: { + packageName: string + version: string + versionMetadata: Record + sourceRegistry: SourceRegistry +}): VerifiedVersionSignature | null { + const dist = input.versionMetadata.dist + if (!dist || typeof dist !== 'object') { + return null + } + + const typedDist = dist as Record + const integrity = typedDist.integrity + if (typeof integrity !== 'string' || !integrity) { + return null + } + + const signatures = Array.isArray(typedDist.signatures) + ? (typedDist.signatures as Array>) + : [] + if (signatures.length === 0) { + return null + } + + for (const entry of signatures) { + const keyId = typeof entry?.keyid === 'string' ? entry.keyid : undefined + const upstreamSignature = typeof entry?.sig === 'string' ? entry.sig : undefined + if (!keyId || !upstreamSignature) { + continue + } + + const matchingKey = input.sourceRegistry.npmKeys.find(key => key.keyid === keyId) + if (!matchingKey) { + continue + } + + // npm signs `${name}@${version}:${dist.integrity}`. We verify exactly that string against + // the key set published by the chosen source registry. + const verified = crypto.verify( + 'sha256', + Buffer.from(`${input.packageName}@${input.version}:${integrity}`), + npmKeyToPublicKeyPem(matchingKey), + Buffer.from(upstreamSignature, 'base64'), + ) + if (verified) { + // We treat each published key independently, so the logged record is bound to the + // exact key that signed the package version rather than to a registry-level placeholder. + return { + integrity, + upstreamSignature, + responsibleKeyId: keyId, + } + } + } + + return null +} + +async function fetchPackument(packageName: string, sourceRegistry: SourceRegistry) { + const encodedPackageName = packageName.startsWith('@') + ? packageName.replace('/', '%2f') + : packageName + + const upstreamResponse = await fetch(`${sourceRegistry.registryBaseUrl}/${encodedPackageName}`) + if (!upstreamResponse.ok) { + throw new Error(`upstream returned ${upstreamResponse.status}`) + } + + return (await upstreamResponse.json()) as Record +} + +function getVersionMetadata(packument: Record, version: string) { + const versions = packument.versions + if (!versions || typeof versions !== 'object') { + return null + } + + const versionMetadata = (versions as Record)[version] + if (!versionMetadata || typeof versionMetadata !== 'object') { + return null + } + + return versionMetadata as Record +} + +async function fetchAndIngestTarball(input: { + packageName: string + version: string + tarballUrl: string + verifiedSignature: VerifiedVersionSignature + sumDbBaseUrl?: string +}) { + const response = await fetch(input.tarballUrl) + if (!response.ok) { + throw new Error(`Unable to fetch tarball ${input.tarballUrl}: ${response.status}`) + } + + const body = Buffer.from(await response.arrayBuffer()) + const record: IngestRecord = { + keyId: input.verifiedSignature.responsibleKeyId, + name: input.packageName, + version: input.version, + type: 'tarball', + digest: createArtifactDigest(body), + size: body.byteLength, + url: input.tarballUrl, + integrity: input.verifiedSignature.integrity, + signature: input.verifiedSignature.upstreamSignature, + } + const ingestResult = await ingestRecord(input.sumDbBaseUrl, record) + + console.log( + `[proxy] tarball ${input.packageName}@${input.version} checkpointed=${ingestResult.checkpointed} reason=${ingestResult.reason}`, + ) +} + +async function ensureInstallTarballRecorded(input: { + packageName: string + packument: Record + sourceRegistry: SourceRegistry + sumDbBaseUrl?: string +}) { + // npm installs typically fetch a packument before the tarball itself. We proactively try to + // checkpoint the latest tagged tarball here so sumdb logging does not depend on npm reusing + // the proxy for the subsequent tarball download. + const distTags = input.packument['dist-tags'] + if (!distTags || typeof distTags !== 'object') { + return + } + + const latestVersion = (distTags as Record).latest + if (typeof latestVersion !== 'string') { + return + } + + const versionMetadata = getVersionMetadata(input.packument, latestVersion) + if (!versionMetadata) { + return + } + + const verifiedSignature = verifyVersionSignature({ + packageName: input.packageName, + version: latestVersion, + versionMetadata, + sourceRegistry: input.sourceRegistry, + }) + if (!verifiedSignature) { + return + } + + const dist = versionMetadata.dist as Record + const tarballUrl = dist.tarball + if (typeof tarballUrl !== 'string') { + return + } + + await fetchAndIngestTarball({ + packageName: input.packageName, + version: latestVersion, + tarballUrl, + verifiedSignature, + sumDbBaseUrl: input.sumDbBaseUrl, + }) +} + +async function writePackumentCache(cacheDir: string, registryLabel: string, packageName: string, body: string) { + await fs.mkdir(cacheDir, { recursive: true }) + await fs.writeFile(cachePathForRegistry(cacheDir, registryLabel, packageName), body) +} + +async function readPackumentCache(cacheDir: string, registryLabel: string, packageName: string): Promise { + try { + return await fs.readFile(cachePathForRegistry(cacheDir, registryLabel, packageName), 'utf8') + } catch (error) { + const nodeError = error as NodeJS.ErrnoException + if (nodeError.code === 'ENOENT') { + return null + } + throw error + } +} + +function sendJson(response: http.ServerResponse, statusCode: number, body: unknown) { + response.statusCode = statusCode + response.setHeader('content-type', 'application/json; charset=utf-8') + response.end(`${stableStringify(body)}\n`) +} + +function sendError(response: http.ServerResponse, statusCode: number, message: string) { + sendJson(response, statusCode, { error: message }) +} + +export async function createRegistryProxyServer(options: ProxyServerOptions) { + const keyPair = + options.registryPrivateKey && options.registryPublicKey + ? { + privateKeyPem: options.registryPrivateKey, + publicKeyPem: options.registryPublicKey, + npmKey: publicKeyPemToNpmKey(options.registryPublicKey), + } + : generateRegistryKeyPair() + + const identity = createRegistryIdentity({ + publicKey: keyPair.publicKeyPem, + baseUrl: options.baseUrl, + upstreamBaseUrl: options.upstreamBaseUrl ?? 'https://registry.npmjs.org', + }) + + const sourceRegistries = await hydrateSourceRegistries(options.sourceRegistries ?? [], options.upstreamBaseUrl) + + const server = http.createServer(async (request, response) => { + const method = request.method ?? 'GET' + const url = new URL(request.url ?? '/', options.baseUrl) + const pathname = url.pathname + + if (method === 'GET' && pathname === '/-/npm/v1/keys') { + sendJson(response, 200, { + keys: [keyPair.npmKey], + }) + return + } + + if (method !== 'GET') { + sendError(response, 405, `Unsupported method ${method}`) + return + } + + try { + const tarballRequest = parseTarballRequest(pathname) + if (tarballRequest) { + const sourceRegistry = resolveSourceRegistry(sourceRegistries) + const requestId = createRequestId() + const upstreamTarballUrl = `${sourceRegistry.registryBaseUrl}${pathname}` + const upstreamResponse = await fetch(upstreamTarballUrl) + if (!upstreamResponse.ok || !upstreamResponse.body) { + sendError(response, upstreamResponse.status || 502, `Unable to fetch ${upstreamTarballUrl}`) + return + } + + response.statusCode = upstreamResponse.status + response.setHeader( + 'content-type', + upstreamResponse.headers.get('content-type') ?? 'application/octet-stream', + ) + const contentLength = upstreamResponse.headers.get('content-length') + if (contentLength) { + response.setHeader('content-length', contentLength) + } + response.setHeader('x-npmx-request-id', requestId) + + const chunks: Buffer[] = [] + for await (const chunk of upstreamResponse.body) { + const buffer = Buffer.from(chunk) + chunks.push(buffer) + response.write(buffer) + } + response.end() + + const body = Buffer.concat(chunks) + const packument = await fetchPackument(tarballRequest.packageName, sourceRegistry) + const versionMetadata = getVersionMetadata(packument, tarballRequest.version) + const verifiedSignature = + versionMetadata && + verifyVersionSignature({ + packageName: tarballRequest.packageName, + version: tarballRequest.version, + versionMetadata, + sourceRegistry, + }) + + if (!verifiedSignature) { + console.log(`[proxy] tarball ${tarballRequest.packageName}@${tarballRequest.version} checkpointed=false reason=signature verification failed`) + return + } + + const record: IngestRecord = { + keyId: verifiedSignature.responsibleKeyId, + name: tarballRequest.packageName, + version: tarballRequest.version, + type: 'tarball', + digest: createArtifactDigest(body), + size: body.byteLength, + url: upstreamTarballUrl, + integrity: verifiedSignature.integrity, + signature: verifiedSignature.upstreamSignature, + } + const ingestResult = await ingestRecord(options.sumDbBaseUrl, record) + + console.log( + `[proxy] tarball ${tarballRequest.packageName}@${tarballRequest.version} checkpointed=${ingestResult.checkpointed} reason=${ingestResult.reason}`, + ) + return + } + + if (!isPackumentPath(pathname)) { + sendError(response, 404, `Unsupported path ${pathname}`) + return + } + + const packageName = parsePackageName(pathname) + if (!packageName) { + sendError(response, 400, `Invalid package path ${pathname}`) + return + } + const sourceRegistry = resolveSourceRegistry(sourceRegistries) + + const requestId = createRequestId() + let body: string | null = null + try { + const upstreamPackument = await fetchPackument(packageName, sourceRegistry) + await ensureInstallTarballRecorded({ + packageName, + packument: upstreamPackument, + sourceRegistry, + sumDbBaseUrl: options.sumDbBaseUrl, + }) + + const rewritten = rewritePackumentTarballs(upstreamPackument) + body = `${stableStringify(rewritten)}\n` + await writePackumentCache(options.cacheDir, sourceRegistry.label, packageName, body) + } catch (error) { + body = await readPackumentCache(options.cacheDir, sourceRegistry.label, packageName) + if (!body) { + throw error + } + } + + response.statusCode = 200 + response.setHeader('content-type', 'application/json; charset=utf-8') + response.setHeader('x-npmx-request-id', requestId) + response.end(body) + + console.log(`[proxy] packument ${packageName} request=${requestId}`) + } catch (error) { + sendError(response, 500, error instanceof Error ? error.message : String(error)) + } + }) + + return { + server, + port: options.port, + identity, + sourceRegistries, + keyPair, + } +} diff --git a/apps/registry-proxy/tsconfig.json b/apps/registry-proxy/tsconfig.json new file mode 100644 index 0000000000..26e8ddf779 --- /dev/null +++ b/apps/registry-proxy/tsconfig.json @@ -0,0 +1,14 @@ +{ + "compilerOptions": { + "target": "ES2022", + "module": "nodenext", + "strict": true, + "esModuleInterop": true, + "skipLibCheck": true, + "noEmit": true, + "allowImportingTsExtensions": true, + "types": ["node"] + }, + "include": ["src/**/*.ts"], + "exclude": ["node_modules", "dist"] +} diff --git a/apps/sumdb/package.json b/apps/sumdb/package.json new file mode 100644 index 0000000000..b9a4cd4da5 --- /dev/null +++ b/apps/sumdb/package.json @@ -0,0 +1,9 @@ +{ + "name": "@npmx/sumdb", + "version": "0.0.0", + "private": true, + "type": "module", + "scripts": { + "dev": "node --experimental-strip-types src/cli.ts" + } +} diff --git a/apps/sumdb/src/cli.ts b/apps/sumdb/src/cli.ts new file mode 100644 index 0000000000..12031bd9c7 --- /dev/null +++ b/apps/sumdb/src/cli.ts @@ -0,0 +1,30 @@ +import path from 'node:path' +import process from 'node:process' +import { registryCatalog } from '../../../config/registries.ts' +import { collectPublishedKeys, hydrateSourceRegistries } from '../../registry-core/src/index.ts' +import { createSumDbServer } from './server.ts' + +const port = Number(process.env.SUMDB_PORT ?? process.env.PORT ?? 4318) +const dataDir = process.env.SUMDB_DATA_DIR ?? path.resolve(process.cwd(), '.data/sumdb') +const allowedRegistryKeys = process.env.ALLOWED_INGEST_REGISTRY_KEYS + ? process.env.ALLOWED_INGEST_REGISTRY_KEYS.split(',').map(item => item.trim()).filter(Boolean) + : null + +const hydratedRegistries = await hydrateSourceRegistries(registryCatalog) +const trustedResponsibleKeys = collectPublishedKeys(hydratedRegistries) + +const { server, keyPair } = await createSumDbServer({ + port, + dataDir, + sumDbPrivateKey: process.env.SUMDB_PRIVATE_KEY, + sumDbPublicKey: process.env.SUMDB_PUBLIC_KEY, + allowedRegistryKeys, + trustedResponsibleKeys, +}) + +server.listen(port, '127.0.0.1', () => { + console.log(`[sumdb] listening on http://127.0.0.1:${port}`) + console.log(`[sumdb] key id: ${keyPair.keyId}`) + console.log(`[sumdb] data dir: ${dataDir}`) + console.log(`[sumdb] trusted responsible keys: ${trustedResponsibleKeys.length}`) +}) diff --git a/apps/sumdb/src/server.ts b/apps/sumdb/src/server.ts new file mode 100644 index 0000000000..d0e11afb16 --- /dev/null +++ b/apps/sumdb/src/server.ts @@ -0,0 +1,167 @@ +import http from 'node:http' +import { URL } from 'node:url' +import { generateRegistryKeyPair } from '../../registry-core/src/index.ts' +import { + deriveKeyId, + publicKeyPemToNpmKey, + stableStringify, + type IngestRecord, + type NpmKey, +} from '../../registry-core/src/index.ts' +import { SumDbStore } from './store.ts' + +interface SumDbServerOptions { + port: number + dataDir: string + sumDbPrivateKey?: string + sumDbPublicKey?: string + allowedRegistryKeys: string[] | null + trustedResponsibleKeys: NpmKey[] +} + +function sendJson(response: http.ServerResponse, statusCode: number, body: unknown) { + response.statusCode = statusCode + response.setHeader('content-type', 'application/json; charset=utf-8') + response.end(`${stableStringify(body)}\n`) +} + +function sendError(response: http.ServerResponse, statusCode: number, message: string) { + sendJson(response, statusCode, { + error: message, + }) +} + +async function readJsonBody(request: http.IncomingMessage): Promise { + const chunks: Buffer[] = [] + for await (const chunk of request) { + chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk)) + } + return JSON.parse(Buffer.concat(chunks).toString('utf8')) as T +} + +function parseLookupPath(pathname: string) { + const parts = pathname.split('/').filter(Boolean) + if (parts.length < 4 || parts[0] !== 'lookup') { + return null + } + + return { + keyId: decodeURIComponent(parts[1]!), + name: decodeURIComponent(parts.slice(2, -1).join('/')), + version: decodeURIComponent(parts.at(-1)!), + } +} + +export async function createSumDbServer(options: SumDbServerOptions) { + const keyPair = + options.sumDbPrivateKey && options.sumDbPublicKey + ? { + privateKeyPem: options.sumDbPrivateKey, + publicKeyPem: options.sumDbPublicKey, + keyId: deriveKeyId(options.sumDbPublicKey), + npmKey: publicKeyPemToNpmKey(options.sumDbPublicKey), + } + : generateRegistryKeyPair() + + // The sumdb signs checkpoints with npm-style key material, but it only consumes trusted + // responsible public keys that are fed into it at startup. + const store = new SumDbStore({ + dataDir: options.dataDir, + sumDbPrivateKey: keyPair.privateKeyPem, + sumDbPublicKey: keyPair.publicKeyPem, + sumDbKeyId: keyPair.keyId, + allowedRegistryKeys: options.allowedRegistryKeys, + trustedResponsibleKeys: options.trustedResponsibleKeys, + }) + + await store.load() + store.latestCheckpoint() + await store.save() + + const server = http.createServer(async (request, response) => { + try { + const method = request.method ?? 'GET' + const url = new URL(request.url ?? '/', `http://${request.headers.host ?? '127.0.0.1'}`) + const pathname = url.pathname + + if (method === 'GET' && pathname === '/') { + sendJson(response, 200, { + service: 'sum.npmx.dev', + publicKey: keyPair.publicKeyPem, + keyId: keyPair.keyId, + keys: [keyPair.npmKey], + latestCheckpoint: store.latestCheckpoint(), + }) + return + } + + if (method === 'GET' && pathname === '/latest-checkpoint') { + sendJson(response, 200, store.latestCheckpoint()) + return + } + + if (method === 'GET' && pathname.startsWith('/checkpoint/')) { + const treeSize = Number(pathname.replace('/checkpoint/', '')) + const checkpoint = store.getCheckpointByTreeSize(treeSize) + if (!checkpoint) { + sendError(response, 404, `Checkpoint for tree size ${treeSize} was not found`) + return + } + sendJson(response, 200, checkpoint) + return + } + + if (method === 'POST' && pathname === '/ingest') { + const record = await readJsonBody(request) + const result = await store.ingest(record) + sendJson(response, 201, result) + return + } + + const lookup = parseLookupPath(pathname) + if (method === 'GET' && lookup) { + sendJson(response, 200, store.lookup(lookup.keyId, lookup.name, lookup.version)) + return + } + + if (method === 'GET' && pathname.startsWith('/tile/')) { + const [, , levelRaw, indexRaw] = pathname.split('/') + const tile = store.getTile(Number(levelRaw), Number(indexRaw)) + if (!tile) { + sendError(response, 404, `Tile ${levelRaw}/${indexRaw} does not exist`) + return + } + sendJson(response, 200, { + level: Number(levelRaw), + index: Number(indexRaw), + hash: tile, + }) + return + } + + if (method === 'GET' && pathname.startsWith('/proof/inclusion/')) { + const leafIndex = Number(pathname.replace('/proof/inclusion/', '')) + const treeSize = Number(url.searchParams.get('treeSize') ?? store.latestCheckpoint().treeSize) + sendJson(response, 200, store.getInclusionProof(leafIndex, treeSize)) + return + } + + if (method === 'GET' && pathname.startsWith('/proof/consistency/')) { + const [, , , fromRaw, toRaw] = pathname.split('/') + sendJson(response, 200, store.getConsistencyProof(Number(fromRaw), Number(toRaw))) + return + } + + sendError(response, 404, `Unknown route ${method} ${pathname}`) + } catch (error) { + sendError(response, 500, error instanceof Error ? error.message : String(error)) + } + }) + + return { + server, + port: options.port, + keyPair, + store, + } +} diff --git a/apps/sumdb/src/store.ts b/apps/sumdb/src/store.ts new file mode 100644 index 0000000000..4c18c5dd7c --- /dev/null +++ b/apps/sumdb/src/store.ts @@ -0,0 +1,251 @@ +import fs from 'node:fs/promises' +import path from 'node:path' +import crypto from 'node:crypto' +import { + createLeafRecord, + createPackageSignatureText, + getConsistencyProof, + getInclusionProof, + hashNode, + merkleRoot, + npmKeyToPublicKeyPem, + signCheckpoint, + type CheckpointPayload, + type ConsistencyProofResponse, + type InclusionProofResponse, + type IngestRecord, + type NpmKey, + type SignedCheckpoint, + type SumDbLeafRecord, +} from '../../registry-core/src/index.ts' + +interface SumDbState { + leaves: SumDbLeafRecord[] + checkpoints: SignedCheckpoint[] +} + +function createInitialState(): SumDbState { + return { + leaves: [], + checkpoints: [], + } +} + +export class SumDbStore { + readonly dataDir: string + readonly statePath: string + readonly sumDbPrivateKey: string + readonly sumDbPublicKey: string + readonly sumDbKeyId: string + readonly allowedRegistryKeys: Set | null + readonly trustedResponsibleKeys: Map + + state: SumDbState = createInitialState() + + constructor(input: { + dataDir: string + sumDbPrivateKey: string + sumDbPublicKey: string + sumDbKeyId: string + allowedRegistryKeys: string[] | null + trustedResponsibleKeys: NpmKey[] + }) { + this.dataDir = input.dataDir + this.statePath = path.join(this.dataDir, 'state.json') + this.sumDbPrivateKey = input.sumDbPrivateKey + this.sumDbPublicKey = input.sumDbPublicKey + this.sumDbKeyId = input.sumDbKeyId + this.allowedRegistryKeys = + input.allowedRegistryKeys && input.allowedRegistryKeys.length > 0 + ? new Set(input.allowedRegistryKeys) + : null + this.trustedResponsibleKeys = new Map(input.trustedResponsibleKeys.map(key => [key.keyid, key])) + } + + async load() { + await fs.mkdir(this.dataDir, { recursive: true }) + + try { + const raw = await fs.readFile(this.statePath, 'utf8') + this.state = JSON.parse(raw) as SumDbState + } catch (error) { + const nodeError = error as NodeJS.ErrnoException + if (nodeError.code !== 'ENOENT') { + throw error + } + this.state = createInitialState() + await this.save() + } + } + + async save() { + await fs.writeFile(this.statePath, JSON.stringify(this.state, null, 2)) + } + + latestCheckpoint(): SignedCheckpoint { + const latest = this.state.checkpoints[this.state.checkpoints.length - 1] + if (latest) { + return latest + } + + const payload: CheckpointPayload = { + treeSize: 0, + rootHash: merkleRoot([]), + issuedAt: new Date().toISOString(), + keyId: this.sumDbKeyId, + } + + const checkpoint = signCheckpoint(payload, this.sumDbPrivateKey, this.sumDbPublicKey) + this.state.checkpoints.push(checkpoint) + return checkpoint + } + + private assertRegistryAllowed(keyId: string) { + if (this.allowedRegistryKeys && !this.allowedRegistryKeys.has(keyId)) { + throw new Error(`Registry key ${keyId} is not allowed to ingest records`) + } + } + + private verifyRecord(record: IngestRecord) { + if (record.type !== 'tarball') { + throw new Error(`Only tarball records can be ingested`) + } + if (!record.integrity) { + throw new Error(`Integrity is required for all ingested records`) + } + this.assertRegistryAllowed(record.keyId) + + const responsibleKey = this.trustedResponsibleKeys.get(record.keyId) + if (!responsibleKey) { + throw new Error(`Unknown responsible key ${record.keyId}`) + } + + // The sumdb re-verifies the upstream package signature using only the trusted public key + // for the responsible key ID and the signature stored alongside the logged artifact record. + const upstreamVerified = crypto.verify( + 'sha256', + Buffer.from(createPackageSignatureText(record.name, record.version, record.integrity)), + npmKeyToPublicKeyPem(responsibleKey), + Buffer.from(record.signature, 'base64'), + ) + if (!upstreamVerified) { + throw new Error(`Upstream signature verification failed for ${record.name}@${record.version}`) + } + } + + async ingest(record: IngestRecord) { + this.verifyRecord(record) + + const leaf = createLeafRecord(record) + const existingIndex = this.state.leaves.findIndex(existing => existing.canonicalLeaf === leaf.canonicalLeaf) + if (existingIndex !== -1) { + return { + leaf: this.state.leaves[existingIndex]!, + checkpoint: this.latestCheckpoint(), + leafIndex: existingIndex, + deduped: true, + } + } + + this.state.leaves.push(leaf) + + const payload: CheckpointPayload = { + treeSize: this.state.leaves.length, + rootHash: merkleRoot(this.state.leaves.map(item => item.canonicalLeaf)), + issuedAt: new Date().toISOString(), + keyId: this.sumDbKeyId, + } + + const checkpoint = signCheckpoint(payload, this.sumDbPrivateKey, this.sumDbPublicKey) + this.state.checkpoints.push(checkpoint) + await this.save() + + return { + leaf, + checkpoint, + leafIndex: this.state.leaves.length - 1, + deduped: false, + } + } + + lookup(keyId: string, packageName: string, version: string) { + const records = this.state.leaves + .map((record, leafIndex) => ({ ...record, leafIndex })) + .filter(record => record.keyId === keyId && record.name === packageName && record.version === version) + + return { + keyId, + name: packageName, + version, + records, + treeSize: this.latestCheckpoint().treeSize, + checkpointId: this.state.checkpoints.length - 1, + } + } + + getCheckpointByTreeSize(treeSize: number) { + return this.state.checkpoints.find(checkpoint => checkpoint.treeSize === treeSize) ?? null + } + + getTile(level: number, index: number) { + if (level < 0 || index < 0) { + return null + } + + let hashes = this.state.leaves.map(record => record.leafHash) + if (level === 0) { + return hashes[index] ?? null + } + + for (let currentLevel = 1; currentLevel <= level; currentLevel++) { + const nextLevel: string[] = [] + for (let offset = 0; offset < hashes.length; offset += 2) { + const left = hashes[offset] + const right = hashes[offset + 1] + if (!left) continue + nextLevel.push(right ? hashNode(left, right) : left) + } + hashes = nextLevel + } + + return hashes[index] ?? null + } + + getInclusionProof(leafIndex: number, treeSize: number): InclusionProofResponse { + if (treeSize < 1 || treeSize > this.state.leaves.length) { + throw new Error(`Tree size ${treeSize} is outside the current tree`) + } + + const leaf = this.state.leaves[leafIndex] + if (!leaf || leafIndex >= treeSize) { + throw new Error(`Leaf index ${leafIndex} is outside tree size ${treeSize}`) + } + + return { + leafHash: leaf.leafHash, + leafIndex, + treeSize, + hashes: getInclusionProof( + this.state.leaves.slice(0, treeSize).map(item => item.canonicalLeaf), + leafIndex, + treeSize, + ), + } + } + + getConsistencyProof(fromTreeSize: number, toTreeSize: number): ConsistencyProofResponse { + if (toTreeSize < 1 || toTreeSize > this.state.leaves.length) { + throw new Error(`Tree size ${toTreeSize} is outside the current tree`) + } + + return { + fromTreeSize, + toTreeSize, + hashes: getConsistencyProof( + this.state.leaves.slice(0, toTreeSize).map(item => item.canonicalLeaf), + fromTreeSize, + toTreeSize, + ), + } + } +} diff --git a/apps/sumdb/tsconfig.json b/apps/sumdb/tsconfig.json new file mode 100644 index 0000000000..26e8ddf779 --- /dev/null +++ b/apps/sumdb/tsconfig.json @@ -0,0 +1,14 @@ +{ + "compilerOptions": { + "target": "ES2022", + "module": "nodenext", + "strict": true, + "esModuleInterop": true, + "skipLibCheck": true, + "noEmit": true, + "allowImportingTsExtensions": true, + "types": ["node"] + }, + "include": ["src/**/*.ts"], + "exclude": ["node_modules", "dist"] +} diff --git a/config/registries.ts b/config/registries.ts new file mode 100644 index 0000000000..bf81e50a72 --- /dev/null +++ b/config/registries.ts @@ -0,0 +1,18 @@ +export interface ConfiguredRegistrySource { + label: string + registryBaseUrl: string +} + +// This is the repo-level source of truth for source registries. +// Runtime code resolves trust identities by fetching each registry's /-/npm/v1/keys, +// so hostnames here are only discovery inputs and not the final trust identity. +export const registryCatalog: ConfiguredRegistrySource[] = [ + { + label: 'yarn', + registryBaseUrl: 'https://registry.yarnpkg.com', + }, + { + label: 'npm', + registryBaseUrl: 'https://registry.npmjs.org', + }, +] diff --git a/package.json b/package.json index e70bf14bda..d972a8bc7f 100644 --- a/package.json +++ b/package.json @@ -12,6 +12,10 @@ "scripts": { "build": "nuxt build", "build:test": "NODE_ENV=test vp run build", + "demo:keys": "node --experimental-strip-types apps/demo/src/generate-keys.ts", + "demo:run": "node --experimental-strip-types apps/demo/src/run-demo.ts", + "demo:test": "node --test --experimental-strip-types apps/demo/src/integration.test.ts", + "demo:verify": "node --experimental-strip-types apps/demo/src/verify.ts", "dev": "nuxt dev", "dev:docs": "vp run --filter npmx-docs dev --port=3001", "i18n:check:fix": "node scripts/compare-translations.ts --fix", @@ -23,9 +27,12 @@ "mock-connector": "vp run --filter npmx-connector dev:mock", "generate-pwa-icons": "pwa-assets-generator", "preview": "nuxt preview", + "proxy:dev": "node --experimental-strip-types apps/registry-proxy/src/cli.ts", "postinstall": "(pnpm rebuild @resvg/resvg-js || true) && vp run generate:lexicons && vp run generate:sprite && nuxt prepare && nuxt prepare --cwd docs && vp config", + "registry-core:test": "node --test --experimental-strip-types apps/registry-core/src/*.test.ts", "generate:fixtures": "node scripts/generate-fixtures.ts", "generate:jwk": "node scripts/gen-jwk.ts", + "sumdb:dev": "node --experimental-strip-types apps/sumdb/src/cli.ts", "test": "vp test", "test:a11y": "vp run build:test && LIGHTHOUSE_COLOR_MODE=dark vp run test:a11y:prebuilt && LIGHTHOUSE_COLOR_MODE=light vp run test:a11y:prebuilt", "test:a11y:prebuilt": "./scripts/lighthouse.sh", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 0413588f5d..a10b13cdb1 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -335,6 +335,14 @@ importers: specifier: 3.2.6 version: 3.2.6(typescript@6.0.2) + apps/demo: {} + + apps/registry-core: {} + + apps/registry-proxy: {} + + apps/sumdb: {} + cli: dependencies: '@clack/prompts': diff --git a/pnpm-workspace.yaml b/pnpm-workspace.yaml index ab32009332..21354b7b03 100644 --- a/pnpm-workspace.yaml +++ b/pnpm-workspace.yaml @@ -1,5 +1,6 @@ packages: - . + - apps/* - cli - docs From 2662093f1cd784ad271eb4a35db618b84205c99a Mon Sep 17 00:00:00 2001 From: Mohammad Bagher Abiyat <37929992+Aslemammad@users.noreply.github.com> Date: Tue, 7 Apr 2026 11:50:13 +0330 Subject: [PATCH 2/2] feat: add mirror-aware routing and key aggregation --- DESCRIPTION.md | 233 +++++++ apps/demo/src/integration.test.ts | 625 ++++++++++++------- apps/registry-core/src/protocol.ts | 16 +- apps/registry-core/src/registry-discovery.ts | 2 + apps/registry-proxy/src/server.ts | 137 +++- config/registries.ts | 3 + 6 files changed, 779 insertions(+), 237 deletions(-) create mode 100644 DESCRIPTION.md diff --git a/DESCRIPTION.md b/DESCRIPTION.md new file mode 100644 index 0000000000..a4baabf5d8 --- /dev/null +++ b/DESCRIPTION.md @@ -0,0 +1,233 @@ +This is a pull request for my two months of research on how we can decentralize npm by not breaking any mainstream behavior, so we make it as easy as possible to adopt new paradigms by users without having to deal with new conventions or at least a inconvenient number of them. + +> [“I have only made this letter longer because I have not had the time to make it shorter.”](https://www.goodreads.com/quotes/21224-i-have-only-made-this-letter-longer-because-i-have) + +I'd avoid making this pull request description shorter, not because of time, but because i cannot hold my excitement any more than that. So forgive me for the obvious writing mistakes. + +I went through a journey of ideas. Initially I visualized this as a localhost server called denpm that would store its local url in `~/.npmrc` as `registry=http://denpm.local` and then from there it'd distribute the pacakge requests, mapping each package to a random registry, something like `npm add vite` would go through `denpm.local -> registry.npmjs.org`, `denpm.local -> registry.yarnpkg.com`, `denpm.local -> registry.npmmirror.co`, `denpm.local -> r.cnpmjs.org` or perhaps any other registry the user might want to provide. + +This is totally possible due to the nature of redirects in npm. So `npm add vite --registry=http://denpm.local` would result into this `package-lock.json` if the localhost server decides to just redirect the request to `registry.npmjs.org`. + +```json + "node_modules/vite": { + "version": "8.0.1", + "resolved": "https://registry.npmjs.org/vite/-/vite-8.0.1.tgz", + "integrity": "sha512-wt+Z2qIhfFt85uiyRt5LPU4oVEJBXj8hZNWKeqFG4gRG/0RaRGJ7njQCwzFVjO+v4+Ipmf5CY7VdmZRAYYBPHw==", + } +``` + +Before the redirect, the source, or the proxy server, which is in this case `denpm.local` can do a whole lot of stuff. It can check the signatures behind the package to make sure it hasn't been tampered or the destination server, the registry, the mirror, which is in this case `registry.npmjs.org`, does not serve the user something different than what it previously claimed through the signature. + +Or to stimulate decentralization, the proxy can just randomly assign each package to a distinct registry. This would potentially remove the single point of failure nature of npm and our overreliance on it. + +I'm bringing all of this just to mention that the possibility of opting out of the npm registry is there and unbelievably it's as simple as `npm config set registry http://denpm.local/`. + +We all love npm and it's the giant everyone is standing on its shoulder, _BUT_ if there's an opportunity to ease the work for the npm servers, distribute the load being lifted, increase security and a whole lot of other stuff, then why not explore those wins? + +The recent growth over npmx showed that all of this is possible as long as we make something smoother than what's available. + +The thing that striked me after researching denpm was that the golang ecosystem had nearly solved the package management issue through a mix of centralization and a lot of decentralization. So that led me to dig even more into how they did it and how they leveraged transparency logs to allow proxies act in an authentic manner. At that point I realized a new CLI is not only not enough, but it might be unncessary. + +So the biggest inspiration for this effort is the golang ecosystem. Centralization at that point would be part of npmx itselsf, specifically the Checksum database it'd maintain. Decentralization would be basically everything else, like the npm registry and other _registries_ and _mirrors_. + +I keep separating registries and mirrors, though might there be a slight technical difference, but both should be advertised and users should know the difference between them and the fact that spinning up a new registry is way cheaper than spinning a full mirror. + +The community might decide to maintain servers that are one-to-one replications of the npm registry itself or at least, or a portion of it. That's what I'd call a mirror, like `registry.npmmirror.co` by [cnpm](https://github.com/cnpm/cnpmcore). + +Registries are though more important, they might want to host exclusive packages. For instance, `registry.viteplus.dev` would decide to only host packages like vite or vitest only, or even better, their supply chain. + +So registries for ownership and mirrors for distribution and obviously, mirroring. Imagine a world where each maintainer can host their own packages under their own domain if they prefer, which is totally possible, but hasn't been mainstream yet due to friction I'd argue. + +That's where [VSR](https://www.vlt.sh/serverless-registry) or [Verdaccio](https://verdaccio.org) can join the effort as well to ease up the hosting side. + +Back to the solution, in the next few sections I'll go in details around how the puzzle pieces are going to fit together. + +## Checksum Database + +Something like `sum.npmx.dev`. + +This is the point of centralization in the puzzle. It'd solve the problem of package unpublishes, mutability and version replacements in the new decentralized package management world. Two mirrors won't be able to ship different bytes for the same version of the same package, if one acts unfaithfully, it'd be easily caught by what's already recorded in the checksum database. + +The initial and main consumer of this checksum database would be the npmx proxy, but after gaining momentum, it might be something that the package managers might want to rely on, independantly. + +### Merkle Trees and Transparency Logs + +More details in [Russ Cox's blog post](https://research.swtch.com/tlog). Briefly, this data structure would allow us to create a tamper-evident database so a released package would be cryptographically frozen and therefore cannot be tampered. + +And similar to the golang checksum database, we'd expose APIs that'd allow any user or service to verify the merkle tree we're hosting. + +The checksum database itself allows for the auditability of registries and proxies. This data structure would allow the auditability for the checksum database itself. +So it's not an unverifiable point of centralization but rather a totally verifiable and consistent one. + +## Proxy + +`registry.npmx.dev` or `proxy.npmx.dev`. This the same url that the user would have to pass to `npm config set registry`. + +It'd handle the job of redirects to the right registries, making sure that they serve the right content, returning [consistent manifests](https://blog.vlt.sh/blog/the-massive-hole-in-the-npm-ecosystem) and all the security improvments we can make over npm. + +In the current mvp, the proxy only allows project with the `integrity` field to be stored in the checksum database and returned to the user to increase the security. +So packages with no `integrity` (not signed by the registry) are not allowed to be stored. This can be changed but it also means less security, even though we sign the each field in the checksum database too. + +### `/-/npm/v1/keys` + +This what `npm audit signatures` use to audit the signatures of the packages and verify that we're consuming what the registry has actually signed. + +```json +{ + "keys": [ + { + "expires": "2025-01-29T00:00:00.000Z", + "keyid": "SHA256:jl3bwswu80PjjokCgh0o2w5c2U4LhQAE57gj9cz1kzA", + "keytype": "ecdsa-sha2-nistp256", + "scheme": "ecdsa-sha2-nistp256", + "key": "MFkwEwYHKoZIzj0CAQYIKoZIzj0DAQcDQgAE1Olb3zMAFFxXKHiIkQO5cJ3Yhl5i6UPp+IhuteBJbuHcA5UogKo0EWtlWwW6KSaKoTNEYL7JlCQiVnkhBktUgg==" + }, + { + "expires": null, + "keyid": "SHA256:DhQ8wR5APBvFHLF/+Tc+AYvPOdTpcIDqOhxsBHRwC7U", + "keytype": "ecdsa-sha2-nistp256", + "scheme": "ecdsa-sha2-nistp256", + "key": "MFkwEwYHKoZIzj0CAQYIKoZIzj0DAQcDQgAEY6Ya7W++7aUPzvMTrezH6Ycx3c+HOKYCcNGybJZSCJq/fd7Qa8uuAKtdIkUQtQiEKERhAmE5lMMJhP8OkDOa2g==" + } + ] +} +``` + +`registry.npmx.dev/-/npm/v1/keys` not only can host those keys by the npm registry, but all the keys from all other registries and mirrrors. + +I assume this file won't be hundreds of megabytes or even more, but if my assumption is wrong, we can cherry pick the keys we return to the user based on what registries they prefer in a potential dashboard using the `authorization` http header. + +``` +GET /-/npm/v1/keys HTTP/1.1 +Host: registry.npmjs.org +user-agent: npm/10.2.4 node/v20.11.0 linux x64 workspaces/false +npm-session: b9c1d2e3f4a5b6c7 ← new random ID, new invocation +npm-command: audit +authorization: Bearer npm_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx +accept-encoding: gzip, deflate, br +accept: */* +connection: keep-alive +if-none-match: "abc123etag" +``` + +### The New world + +By avoiding overdependence on the npm registry, new kind of registries and mirrors would emerge. One I keep dreaming of that'd mitigate most of attacks happening on npm, remind you that most attacks happen on npm LINK ARTICLE, is a `mirror.socket.dev` which would only host what's available on npm once it goes their in-house audits, which are pretty good. They have been able to catch most of the recent attacks on npm before anyone else, but still, since there's no way to affect user workflows directly, like through explicit errors and failures in `npm add` by avoiding to serve a particular package, a lot of potential is being missed. + +Another kind of registries I imagine are organization backed registries that host only what they ship or what they rely on, like the `registry.viteplus.dev` example mentioned above. That'd be the same story with maintainer backed registries, like `registry.roe.dev` hosting packages that Daniel maintains. + +## FAQ + +I am adding these as prompts for myself. If the PR is going to persuade anyone, it should answer these directly instead of assuming the reader will fill the gaps for me. + +### What exact npm failure modes am I trying to solve first? + +I think the root problem is the extremely centralized characteristic of npm and consequently, our overreliance on it. Other issues like security, can be easily solved once we demonstrate a viable path to decentralization so any registry or mirror can decide to host their desired packages however they want with preferrable guardrails they want (e.g. running AI-powered security checks, 2FA). + +The other problem that decentralization solves is the unsustainability of npm, which is perhaps why it's not receiving major upgrades or major changes and that's not npm's fault, this is the problem of serving data for free at the scale of npm and other registries. Decentralization would bring instances would host portions of npm, which would make it sustainable for them since they won't have to lift the load of the whole ecosystem, or they might decide to serve only the few packages they're willing to serve. + +### Which important npm security problems am I explicitly not solving in this PR? + +The low-hanging fruits like verifying metadata, digests, integrity and also enforcing cryptographic authentication would be solved, since, they're low-hanging fruits and there's no harm in tackling them. The mvp already tackles implements thsoe guardrails partially for the sake of demonstration. But more importantly, decentralization itself does not solve any security issue and perhaps, most of the complex security issues npm and maintainers face like social engineering and account takeovers. But it rather stimulates the spin up of new mirrors and registries that would take those security concern into account like the `mirror.socket.dev` example above. + +### What is the concrete win for users if they adopt this? + +Opting out of the npm registry, whether partially or totally, as a lock-in solution. + +### Why a proxy instead of a new CLI? + +The main thing a CLI cannot replicate is the checksum database. Aside from that, all the secondary wins can be implemented by a CLI eventually like verifying the merkle tree and assigning installs to different registries, but that'd require a longer, actually a way longer, transition period. The CJS to ESM discussion has been there for years but still whether because of friction of migration or compatibility issues. So the more convenient the experience is, the better and the shorter this transition would be, specially if the transition takes one command only. + +`sum.npmx.dev` would be the source of truth, other than that, everything can replicated by CLIs in the long run with less convenience for the users potentially. + +### Why a sumdb instead of relying only on lockfile `integrity`? + +The user still relies on the lockfile `integrity`, but value would come from the checksum database rather than each registry issuing its own integrity. + +### Why keep lockfiles pointing at the upstream tarball URL instead of the proxy URL? + +Decentralization. So downloads would go directly to the responsible registry, otherwise, if all downloads would still go through the proxy, then that'd be the new point of centralization. + +### What exactly does `keyId` mean here? + +Hash of the registry's public key. Like [npm's keyId](https://registry.npmjs.org/-/npm/v1/keys). + +### What does a successful verification prove? + +That the registry serving the tarball is serving something others also agree to serve. It proves that the content is being served authentically (it's indeed coming from the desired place cryptographically) and consistently (what's being served now would be served later and what's being served for me, would be served to others too). + +### What does “decentralization” mean in this proposal? + +Multiple registries and mirrors serving the _right_ content rather than only npm doing that. + +### Are mirrors and registries different in principle, or just in operational practice? + +I don't understand the question honestly. + +### Are package names still the global npm names? + +For the proxy, yes, initially, but the checksum database does not care about names as discussed below. + +### If two sources claim the same package and version but offer different tarballs, what happens? + +hard failure. + +### Does this stop malicious maintainers? + +No, registries that we redirect to would take that responsibility. + +### Does this stop `preinstall` and `postinstall` malware? + +No, registries that we redirect to would take that responsibility. + +### Does this stop unpublishing or version replacement? + +Yes. + +### What does this system say about content authenticity versus content safety? + +content authenticity is directly addressed but content safety is not and hopefully it'd be a secondary effect. + +### Who runs the sumdb? + +npmx. + +### Which npm client flows already work with this prototype? + +Tried it on npm and it was working. + +### Why is this better than a plain mirror? + +plain mirrors already exists `registry.npmmirror.co` by [cnpm](https://github.com/cnpm/cnpmcore), but they're costly and they assume npm is the source of truth. + +### Is the long-term goal to decentralize hosting, trust, naming, or all three? + +hosting already is with this. trust, no, since every registry has to agree on a authenticity which there's only one of it. naming can be decentralized too because the checksum database does not care about names (even though it stores it) and rather it cares about the mix hash of values like the `integrity`, `digest`, `name`, `version`, `keyId` and bunch of other values. So it's possible that two different registries (two different `keyId`s) can host packages that share the same exact name but as long as the overall hash is not the same, they'd point to two different things. This would need a discussion around content-addressing (adressing tarballs based on content rather than name), but I'm sure that'd be easily possible like `npm add react#${hash of the content}`. + +### What would have to change next to support real multi-registry fetch selection? + +The npmx proxy would be the fastest way to achieve this ideal, I think it's a matter of a PR or a few. + +### What is the minimal next milestone that would prove this idea is viable? + +I think there should be a wider discussion with the npmx team and teams like vlt, pnpm and other teams in the javascript (mainly npm) registries space. + +### “This is still centralized.” What is my answer? + +No and Yes. Yes because we have a checksum database that everyone agrees on (though anyone can host their own checksum databases and proxies), and no because tarballs are not located into a centralized place anymore. + +### “This doesn’t solve install-time code execution.” What is my answer? + +This is not the job of a checksum database or a proxy, rather, it's the job of mirrors or registries like the socket one I mentioned above. + +### “Why not just use npm mirrors?” What is my answer? + +npm mirrors assume npm itself is the single source of truth. Here `sum.npmx.dev` is the source of truth for the content authenticity and registry can contribute to it. + +### “Why should anyone trust `sum.npmx.dev`?” What is my answer? + +It's a dumb ledger that accepts changes as long as the cryptography makes sense. + +### “If the proxy isn’t in the lockfile, what value is it adding?” What is my answer? + +The biggest and most imporant part of this process is the initial `npm add` one, which is where the proxy plays an important role. After that and once the install request gets redirected to a registry, then the lockfile already has the correct `integrity` which comes from the checksum database and also the tarball url, which points to the registry. diff --git a/apps/demo/src/integration.test.ts b/apps/demo/src/integration.test.ts index 0e48479e32..8e646d50a6 100644 --- a/apps/demo/src/integration.test.ts +++ b/apps/demo/src/integration.test.ts @@ -1,29 +1,45 @@ import assert from 'node:assert/strict' import { execFile } from 'node:child_process' import { once } from 'node:events' -import { mkdir, mkdtemp, readFile, rm } from 'node:fs/promises' +import { mkdtemp, mkdir, readFile, rm, writeFile } from 'node:fs/promises' +import http from 'node:http' import net from 'node:net' import { tmpdir } from 'node:os' import path from 'node:path' import test from 'node:test' import { promisify } from 'node:util' -import { registryCatalog } from '../../../config/registries.ts' import { collectPublishedKeys, + createArtifactDigest, createPackageSignatureText, fetchRegistryKeys, generateRegistryKeyPair, hydrateSourceRegistries, - npmKeyToPublicKeyPem, resolveSourceRegistry, + signText, + type ConfiguredSourceRegistry, + type NpmKey, } from '../../registry-core/src/index.ts' import { createRegistryProxyServer } from '../../registry-proxy/src/server.ts' import { createSumDbServer } from '../../sumdb/src/server.ts' import { verifyPackageFromSumDb } from './verify-lib.ts' -import crypto from 'node:crypto' const execFileAsync = promisify(execFile) +type PackedPackage = { + name: string + version: string + tarballFilename: string + tarballBytes: Buffer + integrity: string + signature: string +} + +type MockRegistryPackage = { + packageInfo: PackedPackage + tarballUrl: string +} + function logStep(message: string) { console.log(`[e2e] ${message}`) } @@ -59,6 +75,157 @@ async function stopServer(server: net.Server) { await once(server, 'close') } +function tarballPathname(packageName: string, tarballFilename: string) { + if (packageName.startsWith('@')) { + const [scope, name] = packageName.split('/') + return `/${scope}/${name}/-/${tarballFilename}` + } + + return `/${packageName}/-/${tarballFilename}` +} + +function packumentPathname(packageName: string) { + if (packageName.startsWith('@')) { + return `/${packageName.replace('/', '%2f')}` + } + + return `/${packageName}` +} + +function createPackument(mockPackage: MockRegistryPackage) { + return { + name: mockPackage.packageInfo.name, + 'dist-tags': { + latest: mockPackage.packageInfo.version, + }, + versions: { + [mockPackage.packageInfo.version]: { + name: mockPackage.packageInfo.name, + version: mockPackage.packageInfo.version, + dist: { + integrity: mockPackage.packageInfo.integrity, + signatures: [ + { + keyid: mockPackage.packageInfo.signature ? undefined : undefined, + }, + ], + tarball: mockPackage.tarballUrl, + }, + }, + }, + } +} + +function createSignedPackument(mockPackage: MockRegistryPackage, signingKeyId: string) { + const packument = createPackument(mockPackage) + const versionMetadata = (packument.versions as Record>)[mockPackage.packageInfo.version]! + const dist = versionMetadata.dist as Record + dist.signatures = [ + { + keyid: signingKeyId, + sig: mockPackage.packageInfo.signature, + }, + ] + return packument +} + +function createMockRegistryServer(input: { + baseUrl: string + keys: NpmKey[] + packages: MockRegistryPackage[] +}) { + const packuments = new Map>() + const tarballs = new Map() + + for (const mockPackage of input.packages) { + packuments.set( + packumentPathname(mockPackage.packageInfo.name), + createSignedPackument(mockPackage, input.keys[0]!.keyid), + ) + tarballs.set( + tarballPathname(mockPackage.packageInfo.name, mockPackage.packageInfo.tarballFilename), + mockPackage.packageInfo.tarballBytes, + ) + } + + return http.createServer((request, response) => { + const pathname = new URL(request.url ?? '/', input.baseUrl).pathname + if (pathname === '/-/npm/v1/keys') { + response.statusCode = 200 + response.setHeader('content-type', 'application/json; charset=utf-8') + response.end(`${JSON.stringify({ keys: input.keys })}\n`) + return + } + + const tarball = tarballs.get(pathname) + if (tarball) { + response.statusCode = 200 + response.setHeader('content-type', 'application/octet-stream') + response.setHeader('content-length', String(tarball.byteLength)) + response.end(tarball) + return + } + + const packument = packuments.get(pathname) + if (packument) { + response.statusCode = 200 + response.setHeader('content-type', 'application/json; charset=utf-8') + response.end(`${JSON.stringify(packument)}\n`) + return + } + + response.statusCode = 404 + response.setHeader('content-type', 'application/json; charset=utf-8') + response.end(`${JSON.stringify({ error: 'not found' })}\n`) + }) +} + +async function createPackedPackage(input: { + tempRoot: string + name: string + version: string + privateKeyPem: string +}) { + const packageDir = path.join( + input.tempRoot, + `pkg-${input.name.replaceAll('@', '').replaceAll('/', '-')}@${input.version}`, + ) + await mkdir(packageDir, { recursive: true }) + await writeFile( + path.join(packageDir, 'package.json'), + JSON.stringify( + { + name: input.name, + version: input.version, + main: 'index.js', + }, + null, + 2, + ), + ) + await writeFile(path.join(packageDir, 'index.js'), `module.exports = ${JSON.stringify(input.name)}\n`) + + const { stdout } = await execFileAsync('npm', ['pack', '--json'], { + cwd: packageDir, + }) + const [{ filename }] = JSON.parse(stdout) as Array<{ filename: string }> + const tarballBytes = await readFile(path.join(packageDir, filename)) + const integrity = createArtifactDigest(tarballBytes) + const signature = signText( + input.privateKeyPem, + createPackageSignatureText(input.name, input.version, integrity), + ) + + return { + name: input.name, + version: input.version, + tarballFilename: filename, + tarballBytes, + integrity, + signature, + } satisfies PackedPackage +} + async function npmInstall(input: { directory: string registryBaseUrl: string @@ -126,97 +293,120 @@ async function fetchLookup(sumDbBaseUrl: string, keyId: string, packageName: str } async function fetchPackumentFromRegistry(registryBaseUrl: string, packageName: string) { - const encodedPackageName = packageName.startsWith('@') - ? packageName.replace('/', '%2f') - : packageName - const response = await fetch(`${registryBaseUrl}/${encodedPackageName}`) + const response = await fetch(`${registryBaseUrl}${packumentPathname(packageName)}`) if (!response.ok) { throw new Error(`Unable to fetch packument for ${packageName} from ${registryBaseUrl}`) } return (await response.json()) as Record } -function getVerifiedSigningKeyId(input: { - packageName: string - version: string - packument: Record - candidateKeys: NpmKey[] -}) { - const versions = input.packument.versions as Record | undefined - const versionMetadata = versions?.[input.version] as Record | undefined - const dist = versionMetadata?.dist as Record | undefined - const integrity = dist?.integrity - const signatures = Array.isArray(dist?.signatures) ? (dist?.signatures as Array>) : [] - - if (typeof integrity !== 'string' || !integrity || signatures.length === 0) { - throw new Error(`Missing integrity or signatures for ${input.packageName}@${input.version}`) - } - - for (const signature of signatures) { - const keyId = typeof signature.keyid === 'string' ? signature.keyid : undefined - const sig = typeof signature.sig === 'string' ? signature.sig : undefined - if (!keyId || !sig) { - continue - } - - const matchingKey = input.candidateKeys.find(key => key.keyid === keyId) - if (!matchingKey) { - continue - } - - const verified = crypto.verify( - 'sha256', - Buffer.from(createPackageSignatureText(input.packageName, input.version, integrity)), - npmKeyToPublicKeyPem(matchingKey), - Buffer.from(sig, 'base64'), - ) - if (verified) { - return { - keyId, - integrity, - } - } - } - - throw new Error(`No verified signing key found for ${input.packageName}@${input.version}`) -} - -async function findRecordedKeyId(input: { - sumDbBaseUrl: string - keyIds: string[] - packageName: string - version: string -}) { - for (const keyId of input.keyIds) { - const lookup = await fetchLookup(input.sumDbBaseUrl, keyId, input.packageName, input.version) - if (lookup.records.length > 0) { - return { - keyId, - lookup, - } - } - } - - throw new Error(`No sumdb record found for ${input.packageName}@${input.version}`) -} - test( - 'proxy + sumdb E2E install flow verifies minimal tarball records with logged upstream signatures', + 'proxy + sumdb E2E flow distinguishes mirrors from registries and serves aggregated keys', { timeout: 120_000 }, async () => { - logStep('allocating temp workspace and ports') + logStep('allocating temp workspace and local ports') const tempRoot = await mkdtemp(path.join(tmpdir(), 'npmx-registry-e2e-')) + const npmOriginPort = await getFreePort() + const mirrorPort = await getFreePort() + const registryPort = await getFreePort() const sumDbPort = await getFreePort() - const proxyPort = await getFreePort() + const mirrorProxyPort = await getFreePort() + const originProxyPort = await getFreePort() + + const npmOriginBaseUrl = `http://127.0.0.1:${npmOriginPort}` + const mirrorBaseUrl = `http://127.0.0.1:${mirrorPort}` + const registryBaseUrl = `http://127.0.0.1:${registryPort}` const sumDbBaseUrl = `http://127.0.0.1:${sumDbPort}` - const proxyBaseUrl = `http://127.0.0.1:${proxyPort}` + const mirrorProxyBaseUrl = `http://127.0.0.1:${mirrorProxyPort}` + const originProxyBaseUrl = `http://127.0.0.1:${originProxyPort}` + + const npmSigningKeys = generateRegistryKeyPair() + const registrySigningKeys = generateRegistryKeyPair() const sumDbKeys = generateRegistryKeyPair() - const proxyKeys = generateRegistryKeyPair() + const mirrorProxyKeys = generateRegistryKeyPair() + const originProxyKeys = generateRegistryKeyPair() + + logStep('creating package fixtures for npm-origin, mirror, and registry-only packages') + const sharedPackage = await createPackedPackage({ + tempRoot, + name: 'shared-demo-package', + version: '1.0.0', + privateKeyPem: npmSigningKeys.privateKeyPem, + }) + const npmOnlyPackage = await createPackedPackage({ + tempRoot, + name: 'npm-only-demo-package', + version: '1.0.0', + privateKeyPem: npmSigningKeys.privateKeyPem, + }) + const registryOnlyPackage = await createPackedPackage({ + tempRoot, + name: 'registry-only-demo-package', + version: '1.0.0', + privateKeyPem: registrySigningKeys.privateKeyPem, + }) - logStep('hydrating source registries and collecting trusted package-signing keys') - const hydratedRegistries = await hydrateSourceRegistries(registryCatalog) - const trustedResponsibleKeys = collectPublishedKeys(hydratedRegistries) + const npmOriginServer = createMockRegistryServer({ + baseUrl: npmOriginBaseUrl, + keys: [npmSigningKeys.npmKey], + packages: [ + { + packageInfo: sharedPackage, + tarballUrl: `${npmOriginBaseUrl}${tarballPathname(sharedPackage.name, sharedPackage.tarballFilename)}`, + }, + { + packageInfo: npmOnlyPackage, + tarballUrl: `${npmOriginBaseUrl}${tarballPathname(npmOnlyPackage.name, npmOnlyPackage.tarballFilename)}`, + }, + ], + }) + const mirrorServer = createMockRegistryServer({ + baseUrl: mirrorBaseUrl, + keys: [npmSigningKeys.npmKey], + packages: [ + { + packageInfo: sharedPackage, + tarballUrl: `${mirrorBaseUrl}${tarballPathname(sharedPackage.name, sharedPackage.tarballFilename)}`, + }, + { + packageInfo: npmOnlyPackage, + tarballUrl: `${npmOriginBaseUrl}${tarballPathname(npmOnlyPackage.name, npmOnlyPackage.tarballFilename)}`, + }, + ], + }) + + const registryServer = createMockRegistryServer({ + baseUrl: registryBaseUrl, + keys: [registrySigningKeys.npmKey], + packages: [ + { + packageInfo: registryOnlyPackage, + tarballUrl: `${registryBaseUrl}${tarballPathname( + registryOnlyPackage.name, + registryOnlyPackage.tarballFilename, + )}`, + }, + ], + }) + + const sourceRegistries: ConfiguredSourceRegistry[] = [ + { label: 'mirror', registryBaseUrl: mirrorBaseUrl, kind: 'mirror' }, + { label: 'npm-origin', registryBaseUrl: npmOriginBaseUrl, kind: 'mirror' }, + { label: 'registry-only', registryBaseUrl: registryBaseUrl, kind: 'registry' }, + ] + + await startServer(npmOriginServer, npmOriginPort) + await startServer(mirrorServer, mirrorPort) + await startServer(registryServer, registryPort) + + logStep('hydrating source registries and collecting trusted signing keys') + const hydratedRegistries = await hydrateSourceRegistries(sourceRegistries) + assert.equal(hydratedRegistries[0]!.npmKeys[0]!.keyid, npmSigningKeys.npmKey.keyid) + assert.equal(hydratedRegistries[1]!.npmKeys[0]!.keyid, npmSigningKeys.npmKey.keyid) + assert.equal(hydratedRegistries[2]!.npmKeys[0]!.keyid, registrySigningKeys.npmKey.keyid) + + const trustedResponsibleKeys = collectPublishedKeys(hydratedRegistries) const sumDb = await createSumDbServer({ port: sumDbPort, dataDir: path.join(tempRoot, 'sumdb'), @@ -225,174 +415,177 @@ test( allowedRegistryKeys: null, trustedResponsibleKeys, }) - - logStep(`starting sumdb on ${sumDbBaseUrl}`) await startServer(sumDb.server, sumDbPort) - logStep('creating proxy with registry catalog and runtime-fetched npm keys') - const proxy = await createRegistryProxyServer({ - port: proxyPort, - baseUrl: proxyBaseUrl, - sourceRegistries: registryCatalog, - cacheDir: path.join(tempRoot, 'proxy-cache'), + logStep('starting a proxy that prefers the mirror candidate when multiple sources can serve a package') + const mirrorProxy = await createRegistryProxyServer({ + port: mirrorProxyPort, + baseUrl: mirrorProxyBaseUrl, + sourceRegistries, + cacheDir: path.join(tempRoot, 'proxy-cache-mirror'), sumDbBaseUrl, - registryPrivateKey: proxyKeys.privateKeyPem, - registryPublicKey: proxyKeys.publicKeyPem, + registryPrivateKey: mirrorProxyKeys.privateKeyPem, + registryPublicKey: mirrorProxyKeys.publicKeyPem, + random: () => 0, }) - - logStep(`starting proxy on ${proxyBaseUrl}`) - await startServer(proxy.server, proxyPort) + await startServer(mirrorProxy.server, mirrorProxyPort) + + logStep('starting a second proxy that prefers the npm-origin candidate') + const originProxy = await createRegistryProxyServer({ + port: originProxyPort, + baseUrl: originProxyBaseUrl, + sourceRegistries, + cacheDir: path.join(tempRoot, 'proxy-cache-origin'), + sumDbBaseUrl, + registryPrivateKey: originProxyKeys.privateKeyPem, + registryPublicKey: originProxyKeys.publicKeyPem, + random: () => 0.99, + }) + await startServer(originProxy.server, originProxyPort) try { - const firstProject = path.join(tempRoot, 'is-number-project') - const secondProject = path.join(tempRoot, 'scoped-project') - - await mkdir(firstProject, { recursive: true }) - await mkdir(secondProject, { recursive: true }) + logStep('checking proxy /-/npm/v1/keys aggregates upstream keys with the proxy key') + const keysResponse = await fetch(`${mirrorProxyBaseUrl}/-/npm/v1/keys`) + const keysPayload = (await keysResponse.json()) as { keys: NpmKey[] } + const servedKeyIds = new Set(keysPayload.keys.map(key => key.keyid)) + assert.equal(servedKeyIds.has(mirrorProxyKeys.keyId), true) + assert.equal(servedKeyIds.has(npmSigningKeys.keyId), true) + assert.equal(servedKeyIds.has(registrySigningKeys.keyId), true) + assert.equal(keysPayload.keys.length, 3) + + logStep('verifying random routing can choose mirror or npm-origin for the same package') + const mirrorSharedPackument = await fetchPackumentFromRegistry(mirrorProxyBaseUrl, sharedPackage.name) + const originSharedPackument = await fetchPackumentFromRegistry(originProxyBaseUrl, sharedPackage.name) + const mirrorSharedDist = ((mirrorSharedPackument.versions as Record)[sharedPackage.version] as Record< + string, + unknown + >).dist as Record + const originSharedDist = ((originSharedPackument.versions as Record)[sharedPackage.version] as Record< + string, + unknown + >).dist as Record + assert.equal( + mirrorSharedDist.tarball, + `${mirrorBaseUrl}${tarballPathname(sharedPackage.name, sharedPackage.tarballFilename)}`, + ) + assert.equal( + originSharedDist.tarball, + `${npmOriginBaseUrl}${tarballPathname(sharedPackage.name, sharedPackage.tarballFilename)}`, + ) - logStep('installing is-number through the proxy') + logStep('installing a package that exists in npm-origin and mirror but not in the custom registry') + const npmOnlyProject = path.join(tempRoot, 'npm-only-project') + await mkdir(npmOnlyProject, { recursive: true }) await npmInstall({ - directory: firstProject, - registryBaseUrl: proxyBaseUrl, - packageName: 'is-number', + directory: npmOnlyProject, + registryBaseUrl: mirrorProxyBaseUrl, + packageName: npmOnlyPackage.name, }) - logStep('installing @jridgewell/resolve-uri through the proxy') + const npmOnlyTarball = await fetchResolvedTarballPath(npmOnlyProject, `node_modules/${npmOnlyPackage.name}`) + assert.equal( + npmOnlyTarball.resolved, + `${npmOriginBaseUrl}${tarballPathname(npmOnlyPackage.name, npmOnlyPackage.tarballFilename)}`, + ) + + logStep('installing a package that exists only in the custom registry with its own key') + const registryOnlyProject = path.join(tempRoot, 'registry-only-project') + await mkdir(registryOnlyProject, { recursive: true }) await npmInstall({ - directory: secondProject, - registryBaseUrl: proxyBaseUrl, - packageName: '@jridgewell/resolve-uri', + directory: registryOnlyProject, + registryBaseUrl: mirrorProxyBaseUrl, + packageName: registryOnlyPackage.name, }) - - logStep('reading resolved tarball URLs from package-lock.json') - const firstTarball = await fetchResolvedTarballPath(firstProject, 'node_modules/is-number') - const secondTarball = await fetchResolvedTarballPath( - secondProject, - 'node_modules/@jridgewell/resolve-uri', + const registryOnlyTarball = await fetchResolvedTarballPath( + registryOnlyProject, + `node_modules/${registryOnlyPackage.name}`, + ) + assert.equal( + registryOnlyTarball.resolved, + `${registryBaseUrl}${tarballPathname(registryOnlyPackage.name, registryOnlyPackage.tarballFilename)}`, ) - logStep(`resolved is-number tarball: ${firstTarball.resolved}`) - logStep(`resolved @jridgewell/resolve-uri tarball: ${secondTarball.resolved}`) - - assert.ok(firstTarball.resolved.startsWith('https://registry.npmjs.org/')) - assert.ok(secondTarball.resolved.startsWith('https://registry.npmjs.org/')) - assert.equal(firstTarball.resolved.startsWith(proxyBaseUrl), false) - assert.equal(secondTarball.resolved.startsWith(proxyBaseUrl), false) - - logStep('fetching source-registry keys and verifying the actual package-signing key IDs') - const yarnKeys = await fetchRegistryKeys('https://registry.yarnpkg.com') - const npmKeys = await fetchRegistryKeys('https://registry.npmjs.org') - const firstPackument = await fetchPackumentFromRegistry('https://registry.yarnpkg.com', 'is-number') - const secondPackument = await fetchPackumentFromRegistry('https://registry.npmjs.org', '@jridgewell/resolve-uri') - const firstSigning = getVerifiedSigningKeyId({ - packageName: 'is-number', - version: firstTarball.version, - packument: firstPackument, - candidateKeys: yarnKeys, - }) - const secondSigning = getVerifiedSigningKeyId({ - packageName: '@jridgewell/resolve-uri', - version: secondTarball.version, - packument: secondPackument, - candidateKeys: npmKeys, - }) - logStep(`is-number signed by ${firstSigning.keyId}`) - logStep(`@jridgewell/resolve-uri signed by ${secondSigning.keyId}`) - logStep('verifying tarballs against sumdb checkpoints and proofs') - const firstVerification = await verifyPackageFromSumDb({ + logStep('verifying the npm-origin-backed package was checkpointed under the shared npm signing key') + const npmOnlyVerification = await verifyPackageFromSumDb({ sumDbBaseUrl, - registryKeyId: firstSigning.keyId, - packageName: 'is-number', - version: firstTarball.version, - tarballPath: firstTarball.tarballPath, + registryKeyId: npmSigningKeys.keyId, + packageName: npmOnlyPackage.name, + version: npmOnlyPackage.version, + tarballPath: npmOnlyTarball.tarballPath, }) - const secondVerification = await verifyPackageFromSumDb({ + assert.equal(npmOnlyVerification.ok, true) + + logStep('verifying the registry-only package was checkpointed under the registry-specific key') + const registryOnlyVerification = await verifyPackageFromSumDb({ sumDbBaseUrl, - registryKeyId: secondSigning.keyId, - packageName: '@jridgewell/resolve-uri', - version: secondTarball.version, - tarballPath: secondTarball.tarballPath, + registryKeyId: registrySigningKeys.keyId, + packageName: registryOnlyPackage.name, + version: registryOnlyPackage.version, + tarballPath: registryOnlyTarball.tarballPath, }) - assert.equal(firstVerification.ok, true) - assert.equal(secondVerification.ok, true) - logStep(`verified is-number under ${firstSigning.keyId}`) - logStep(`verified @jridgewell/resolve-uri under ${secondSigning.keyId}`) - - logStep('checking the sumdb stores only minimal tarball records') - const firstLookup = await fetchLookup(sumDbBaseUrl, firstSigning.keyId, 'is-number', firstTarball.version) - const firstRecord = firstLookup.records.find(record => record.type === 'tarball') - assert.ok(firstRecord) - assert.equal(firstRecord!.keyId, firstSigning.keyId) - assert.equal(firstRecord!.integrity, firstSigning.integrity) - assert.ok(typeof firstRecord!.digest === 'string' && firstRecord!.digest.startsWith('sha512-')) - assert.ok(typeof firstRecord!.signature === 'string' && firstRecord!.signature.length > 0) - assert.ok(typeof firstRecord!.leafIndex === 'number') - - const secondLookup = await fetchLookup( + assert.equal(registryOnlyVerification.ok, true) + + logStep('checking the logged leaves keep only the minimal tarball record shape') + const npmOnlyLookup = await fetchLookup( sumDbBaseUrl, - secondSigning.keyId, - '@jridgewell/resolve-uri', - secondTarball.version, + npmSigningKeys.keyId, + npmOnlyPackage.name, + npmOnlyPackage.version, ) - const secondRecord = secondLookup.records.find(record => record.type === 'tarball') - assert.ok(secondRecord) - assert.equal(secondRecord!.keyId, secondSigning.keyId) - assert.equal(secondRecord!.integrity, secondSigning.integrity) - assert.ok(typeof secondRecord!.signature === 'string' && secondRecord!.signature.length > 0) - - logStep('verifying the proxy stays npm-compatible and does not expose custom registry routes') - const proxyKeysResponse = await fetch(`${proxyBaseUrl}/-/npm/v1/keys`) - const proxyKeysPayload = (await proxyKeysResponse.json()) as { - keys: Array<{ keyid: string; keytype: string; scheme: string; key: string }> - } - assert.equal(proxyKeysPayload.keys.length, 1) - assert.match(proxyKeysPayload.keys[0]!.keyid, /^SHA256:/) - assert.equal(proxyKeysPayload.keys[0]!.keytype, 'ecdsa-sha2-nistp256') - assert.equal(proxyKeysPayload.keys[0]!.scheme, 'ecdsa-sha2-nistp256') - - const proxyRootResponse = await fetch(`${proxyBaseUrl}/`) - assert.equal(proxyRootResponse.status, 404) - logStep('custom proxy introspection routes are gone; only npm-compatible paths remain') - - logStep('confirming there are no alternate key IDs recorded for the same package versions') - const recordedFirst = await findRecordedKeyId({ - sumDbBaseUrl, - keyIds: trustedResponsibleKeys.map(key => key.keyid), - packageName: 'is-number', - version: firstTarball.version, - }) - const recordedSecond = await findRecordedKeyId({ + const npmOnlyRecord = npmOnlyLookup.records[0] + assert.ok(npmOnlyRecord) + assert.equal(npmOnlyRecord.keyId, npmSigningKeys.keyId) + assert.equal(npmOnlyRecord.integrity, npmOnlyPackage.integrity) + assert.ok(typeof npmOnlyRecord.signature === 'string' && npmOnlyRecord.signature.length > 0) + + const registryOnlyLookup = await fetchLookup( sumDbBaseUrl, - keyIds: trustedResponsibleKeys.map(key => key.keyid), - packageName: '@jridgewell/resolve-uri', - version: secondTarball.version, - }) - assert.equal(recordedFirst.keyId, firstSigning.keyId) - assert.equal(recordedSecond.keyId, secondSigning.keyId) + registrySigningKeys.keyId, + registryOnlyPackage.name, + registryOnlyPackage.version, + ) + const registryOnlyRecord = registryOnlyLookup.records[0] + assert.ok(registryOnlyRecord) + assert.equal(registryOnlyRecord.keyId, registrySigningKeys.keyId) + assert.equal(registryOnlyRecord.integrity, registryOnlyPackage.integrity) + + logStep('verifying local registry key discovery matches the mirror-vs-registry distinction') + const mirrorKeys = await fetchRegistryKeys(mirrorBaseUrl) + const npmOriginKeys = await fetchRegistryKeys(npmOriginBaseUrl) + const registryKeys = await fetchRegistryKeys(registryBaseUrl) + assert.equal(mirrorKeys[0]!.keyid, npmOriginKeys[0]!.keyid) + assert.notEqual(registryKeys[0]!.keyid, npmOriginKeys[0]!.keyid) } finally { - logStep('shutting down proxy and sumdb') - await stopServer(proxy.server) + logStep('shutting down proxies, sumdb, and local registries') + await stopServer(mirrorProxy.server) + await stopServer(originProxy.server) await stopServer(sumDb.server) + await stopServer(npmOriginServer) + await stopServer(mirrorServer) + await stopServer(registryServer) await rm(tempRoot, { recursive: true, force: true }) } }, ) -test('source registry resolution uses the first configured registry', () => { - const npmKeys = generateRegistryKeyPair().npmKey +test('source registry selection randomizes among available candidates', () => { + const npmKey = generateRegistryKeyPair().npmKey const registries = [ { - label: 'first', - registryBaseUrl: 'https://registry.first.example', - keysEndpoint: 'https://registry.first.example/-/npm/v1/keys', - npmKeys: [npmKeys], + label: 'mirror', + registryBaseUrl: 'https://mirror.example', + kind: 'mirror' as const, + keysEndpoint: 'https://mirror.example/-/npm/v1/keys', + npmKeys: [npmKey], }, { - label: 'second', - registryBaseUrl: 'https://registry.second.example', - keysEndpoint: 'https://registry.second.example/-/npm/v1/keys', - npmKeys: [npmKeys], + label: 'npm-origin', + registryBaseUrl: 'https://registry.npmjs.example', + kind: 'mirror' as const, + keysEndpoint: 'https://registry.npmjs.example/-/npm/v1/keys', + npmKeys: [npmKey], }, ] - assert.equal(resolveSourceRegistry(registries).label, 'first') + assert.equal(resolveSourceRegistry(registries, () => 0).label, 'mirror') + assert.equal(resolveSourceRegistry(registries, () => 0.99).label, 'npm-origin') }) diff --git a/apps/registry-core/src/protocol.ts b/apps/registry-core/src/protocol.ts index 0dad8ad38f..a51d7e5e0d 100644 --- a/apps/registry-core/src/protocol.ts +++ b/apps/registry-core/src/protocol.ts @@ -13,6 +13,7 @@ export interface RegistryIdentity { export interface SourceRegistry { label: string registryBaseUrl: string + kind?: 'mirror' | 'registry' keysEndpoint: string npmKeys: NpmKey[] } @@ -97,14 +98,19 @@ export function createRegistryIdentity(input: { } } -export function resolveSourceRegistry(registries: SourceRegistry[]): SourceRegistry { - const matched = registries[0] - if (!matched) { +export function resolveSourceRegistry( + registries: SourceRegistry[], + random: () => number = Math.random, +): SourceRegistry { + if (registries.length === 0) { throw new Error('No source registry configured') } - // With package-level routing removed, the first configured registry is the fetch source. - return matched + // We randomize only after the candidate list has already been narrowed to registries that can + // actually serve the package/version. This keeps mirror-vs-registry routing simple without + // reviving static package-pattern rules. + const index = Math.min(registries.length - 1, Math.floor(random() * registries.length)) + return registries[index]! } export function createArtifactDigest(input: Buffer | string): string { diff --git a/apps/registry-core/src/registry-discovery.ts b/apps/registry-core/src/registry-discovery.ts index 71674292e7..9ccc44ebd1 100644 --- a/apps/registry-core/src/registry-discovery.ts +++ b/apps/registry-core/src/registry-discovery.ts @@ -4,6 +4,7 @@ import type { SourceRegistry } from './protocol.ts' export interface ConfiguredSourceRegistry { label: string registryBaseUrl: string + kind?: 'mirror' | 'registry' npmKeys?: NpmKey[] } @@ -39,6 +40,7 @@ export async function hydrateSourceRegistries( effective.map(async registry => ({ label: registry.label, registryBaseUrl: registry.registryBaseUrl, + kind: registry.kind, keysEndpoint: `${registry.registryBaseUrl}/-/npm/v1/keys`, npmKeys: registry.npmKeys ?? (await fetchRegistryKeys(registry.registryBaseUrl)), })), diff --git a/apps/registry-proxy/src/server.ts b/apps/registry-proxy/src/server.ts index 81d456ee13..af15d036a2 100644 --- a/apps/registry-proxy/src/server.ts +++ b/apps/registry-proxy/src/server.ts @@ -4,6 +4,7 @@ import http from 'node:http' import path from 'node:path' import { URL } from 'node:url' import { + collectPublishedKeys, createArtifactDigest, createRegistryIdentity, hydrateSourceRegistries, @@ -27,6 +28,7 @@ interface ProxyServerOptions { sumDbBaseUrl?: string registryPrivateKey?: string registryPublicKey?: string + random?: () => number } type VerifiedVersionSignature = { @@ -215,6 +217,21 @@ async function fetchPackument(packageName: string, sourceRegistry: SourceRegistr return (await upstreamResponse.json()) as Record } +async function fetchPackumentIfPresent(packageName: string, sourceRegistry: SourceRegistry) { + try { + const packument = await fetchPackument(packageName, sourceRegistry) + return { + sourceRegistry, + packument, + } + } catch (error) { + if (error instanceof Error && error.message === 'upstream returned 404') { + return null + } + throw error + } +} + function getVersionMetadata(packument: Record, version: string) { const versions = packument.versions if (!versions || typeof versions !== 'object') { @@ -229,6 +246,80 @@ function getVersionMetadata(packument: Record, version: string) return versionMetadata as Record } +async function findPackumentCandidates(packageName: string, sourceRegistries: SourceRegistry[]) { + const candidates = await Promise.all( + sourceRegistries.map(sourceRegistry => fetchPackumentIfPresent(packageName, sourceRegistry)), + ) + + return candidates.filter( + ( + candidate, + ): candidate is { + sourceRegistry: SourceRegistry + packument: Record + } => candidate !== null, + ) +} + +async function resolvePackumentCandidate( + packageName: string, + sourceRegistries: SourceRegistry[], + random: () => number, +) { + const candidates = await findPackumentCandidates(packageName, sourceRegistries) + if (candidates.length === 0) { + throw new Error(`Package ${packageName} was not found in any configured source registry`) + } + + const chosenRegistry = resolveSourceRegistry( + candidates.map(candidate => candidate.sourceRegistry), + random, + ) + + return candidates.find(candidate => candidate.sourceRegistry.registryBaseUrl === chosenRegistry.registryBaseUrl)! +} + +async function resolveVersionCandidate(input: { + packageName: string + version: string + sourceRegistries: SourceRegistry[] + random: () => number +}) { + const candidates = await findPackumentCandidates(input.packageName, input.sourceRegistries) + const versionCandidates = candidates + .map(candidate => { + const versionMetadata = getVersionMetadata(candidate.packument, input.version) + if (!versionMetadata) { + return null + } + + return { + sourceRegistry: candidate.sourceRegistry, + packument: candidate.packument, + versionMetadata, + } + }) + .filter( + ( + candidate, + ): candidate is { + sourceRegistry: SourceRegistry + packument: Record + versionMetadata: Record + } => candidate !== null, + ) + + if (versionCandidates.length === 0) { + return null + } + + const chosenRegistry = resolveSourceRegistry( + versionCandidates.map(candidate => candidate.sourceRegistry), + input.random, + ) + return versionCandidates.find(candidate => candidate.sourceRegistry.registryBaseUrl === chosenRegistry.registryBaseUrl) +} + async function fetchAndIngestTarball(input: { packageName: string version: string @@ -353,6 +444,11 @@ export async function createRegistryProxyServer(options: ProxyServerOptions) { }) const sourceRegistries = await hydrateSourceRegistries(options.sourceRegistries ?? [], options.upstreamBaseUrl) + const publishedUpstreamKeys = collectPublishedKeys(sourceRegistries) + const servedKeys = [keyPair.npmKey, ...publishedUpstreamKeys].filter( + (key, index, keys) => keys.findIndex(candidate => candidate.keyid === key.keyid) === index, + ) + const random = options.random ?? Math.random const server = http.createServer(async (request, response) => { const method = request.method ?? 'GET' @@ -361,7 +457,7 @@ export async function createRegistryProxyServer(options: ProxyServerOptions) { if (method === 'GET' && pathname === '/-/npm/v1/keys') { sendJson(response, 200, { - keys: [keyPair.npmKey], + keys: servedKeys, }) return } @@ -374,9 +470,22 @@ export async function createRegistryProxyServer(options: ProxyServerOptions) { try { const tarballRequest = parseTarballRequest(pathname) if (tarballRequest) { - const sourceRegistry = resolveSourceRegistry(sourceRegistries) + const candidate = await resolveVersionCandidate({ + packageName: tarballRequest.packageName, + version: tarballRequest.version, + sourceRegistries, + random, + }) + if (!candidate) { + sendError(response, 404, `Unable to find ${tarballRequest.packageName}@${tarballRequest.version}`) + return + } + + const sourceRegistry = candidate.sourceRegistry const requestId = createRequestId() - const upstreamTarballUrl = `${sourceRegistry.registryBaseUrl}${pathname}` + const dist = candidate.versionMetadata.dist as Record + const upstreamTarballUrl = + typeof dist.tarball === 'string' ? dist.tarball : `${sourceRegistry.registryBaseUrl}${pathname}` const upstreamResponse = await fetch(upstreamTarballUrl) if (!upstreamResponse.ok || !upstreamResponse.body) { sendError(response, upstreamResponse.status || 502, `Unable to fetch ${upstreamTarballUrl}`) @@ -403,16 +512,12 @@ export async function createRegistryProxyServer(options: ProxyServerOptions) { response.end() const body = Buffer.concat(chunks) - const packument = await fetchPackument(tarballRequest.packageName, sourceRegistry) - const versionMetadata = getVersionMetadata(packument, tarballRequest.version) - const verifiedSignature = - versionMetadata && - verifyVersionSignature({ - packageName: tarballRequest.packageName, - version: tarballRequest.version, - versionMetadata, - sourceRegistry, - }) + const verifiedSignature = verifyVersionSignature({ + packageName: tarballRequest.packageName, + version: tarballRequest.version, + versionMetadata: candidate.versionMetadata, + sourceRegistry, + }) if (!verifiedSignature) { console.log(`[proxy] tarball ${tarballRequest.packageName}@${tarballRequest.version} checkpointed=false reason=signature verification failed`) @@ -448,12 +553,12 @@ export async function createRegistryProxyServer(options: ProxyServerOptions) { sendError(response, 400, `Invalid package path ${pathname}`) return } - const sourceRegistry = resolveSourceRegistry(sourceRegistries) - const requestId = createRequestId() let body: string | null = null try { - const upstreamPackument = await fetchPackument(packageName, sourceRegistry) + const chosenCandidate = await resolvePackumentCandidate(packageName, sourceRegistries, random) + const sourceRegistry = chosenCandidate.sourceRegistry + const upstreamPackument = chosenCandidate.packument await ensureInstallTarballRecorded({ packageName, packument: upstreamPackument, diff --git a/config/registries.ts b/config/registries.ts index bf81e50a72..19c5a599bc 100644 --- a/config/registries.ts +++ b/config/registries.ts @@ -1,6 +1,7 @@ export interface ConfiguredRegistrySource { label: string registryBaseUrl: string + kind?: 'mirror' | 'registry' } // This is the repo-level source of truth for source registries. @@ -10,9 +11,11 @@ export const registryCatalog: ConfiguredRegistrySource[] = [ { label: 'yarn', registryBaseUrl: 'https://registry.yarnpkg.com', + kind: 'mirror', }, { label: 'npm', registryBaseUrl: 'https://registry.npmjs.org', + kind: 'registry', }, ]