diff --git a/CHANGES.md b/CHANGES.md index 7307dd7c9..52bd641e0 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -117,10 +117,19 @@ To be released. ### @fedify/cli + - Made `fedify lookup --recurse` honor `-p`/`--allow-private-address` + for recursively discovered object URLs, matching the policy already used + by `-t`/`--traverse`. Recursive lookups still reject private or + localhost targets by default unless users explicitly opt in. + [[#700], [#718]] + - Added [FEP-044f] `quote` support to `fedify lookup --recurse`, so the CLI can follow both the new quote-post relation and the older `quoteUrl` compatibility surface. [[#452], [#679]] +[#700]: https://github.com/fedify-dev/fedify/issues/700 +[#718]: https://github.com/fedify-dev/fedify/pull/718 + ### @fedify/solidstart - Added `@fedify/solidstart` package for integrating Fedify with diff --git a/docs/cli.md b/docs/cli.md index 67ee8d0c0..5074d0152 100644 --- a/docs/cli.md +++ b/docs/cli.md @@ -545,11 +545,12 @@ For short names, only Fedify property naming is accepted. For example, > `--recurse` and [`-t`/`--traverse`](#t-traverse-traverse-the-collection) > are mutually exclusive. > -> Recursive fetches always disallow private/localhost addresses for safety. -> URLs explicitly provided on the command line always allow private -> addresses, while +> Recursive fetches disallow private/localhost addresses by default for +> safety. URLs explicitly provided on the command line always allow private +> addresses, while recursive object fetches honor > [`-p`/`--allow-private-address`](#p-allow-private-address-allow-private-ip-addresses) -> has no effect on recursive steps. +> when you explicitly opt in. Recursive JSON-LD `@context` URLs still remain +> blocked. ### `--recurse-depth`: Set recursion depth limit @@ -1015,21 +1016,19 @@ fedify lookup http://localhost:8000/users/alice ~~~~ The `-p`/`--allow-private-address` option additionally allows private -addresses for URLs discovered during traversal. It only has an effect -when used together with -[`-t`/`--traverse`](#t-traverse-traverse-the-collection), since URLs +addresses for URLs discovered during traversal or recursive object fetches. +It only affects discovered URLs used by +[`-t`/`--traverse`](#t-traverse-traverse-the-collection) and +[`--recurse`](#recurse-recurse-through-object-relationships), since URLs embedded in remote responses are otherwise rejected to mitigate SSRF -attacks against private addresses. +attacks against private addresses. Recursive JSON-LD `@context` URLs are +still blocked even when this option is enabled. ~~~~ sh fedify lookup --traverse --allow-private-address http://localhost:8000/users/alice/outbox +fedify lookup --recurse=replyTarget --allow-private-address http://localhost:8000/notes/1 ~~~~ -> [!NOTE] -> Recursive fetches enabled by -> [`--recurse`](#recurse-recurse-through-object-relationships) always -> disallow private addresses regardless of this option. - ### `-s`/`--separator`: Output separator *This option is available since Fedify 1.3.0.* diff --git a/packages/cli/src/lookup.test.ts b/packages/cli/src/lookup.test.ts index 7f6bdf7f8..ae9541916 100644 --- a/packages/cli/src/lookup.test.ts +++ b/packages/cli/src/lookup.test.ts @@ -11,6 +11,7 @@ import { join } from "node:path"; import process from "node:process"; import { Writable } from "node:stream"; import test from "node:test"; +import { serve } from "srvx"; import { configContext } from "./config.ts"; import { getContextLoader } from "./docloader.ts"; import { runCli } from "./runner.ts"; @@ -21,6 +22,7 @@ import { collectRecursiveObjects, createTimeoutSignal, getLookupFailureHint, + getPrivateUrlCandidate, getRecursiveTargetId, lookupCommand, RecursiveLookupError, @@ -768,6 +770,25 @@ test("getLookupFailureHint - suggests authorized-fetch for non-URL errors", () = ); }); +test("getPrivateUrlCandidate - detects obvious private hosts without DNS", () => { + assert.equal( + getPrivateUrlCandidate("http://localhost:8080/object")?.href, + "http://localhost:8080/object", + ); + assert.equal( + getPrivateUrlCandidate("http://127.0.0.1:8080/object")?.href, + "http://127.0.0.1:8080/object", + ); + assert.equal( + getPrivateUrlCandidate("http://[::1]:8080/object")?.href, + "http://[::1]:8080/object", + ); + assert.equal( + getPrivateUrlCandidate("https://example.com/object"), + null, + ); +}); + test("getLookupFailureHint - does not treat all UrlError values as private", () => { assert.equal( getLookupFailureHint(new UrlError("Unsupported protocol: ftp:")), @@ -1056,12 +1077,221 @@ async function runLookupAndCaptureExitCode( } } +async function captureStderr( + callback: () => Promise, +): Promise<{ result: T; stderr: string }> { + const originalWrite = process.stderr.write; + let stderr = ""; + process.stderr.write = (( + chunk: string | Uint8Array, + encodingOrCallback?: unknown, + callback?: () => void, + ) => { + stderr += typeof chunk === "string" ? chunk : Buffer.from(chunk).toString(); + if (typeof encodingOrCallback === "function") { + encodingOrCallback(); + } else { + callback?.(); + } + return true; + }) as typeof process.stderr.write; + try { + const result = await callback(); + return { result, stderr }; + } finally { + process.stderr.write = originalWrite; + } +} + function extractIdsFromRawOutput(content: string): string[] { return [...content.matchAll(/"id"\s*:\s*"([^"]+)"/g)].map((match) => match[1] ); } +async function withRecursiveLookupServer( + options: { + replyContextPath?: string; + }, + callback: (server: { + rootUrl: URL; + replyUrl: URL; + requestedPaths: string[]; + }) => Promise, +): Promise { + const requestedPaths: string[] = []; + const server = serve({ + port: 0, + hostname: "127.0.0.1", + silent: true, + fetch(request) { + const requestUrl = new URL(request.url); + const rootUrl = new URL("/notes/1", requestUrl.origin); + const replyUrl = new URL("/notes/0", requestUrl.origin); + const replyContextUrl = options.replyContextPath == null + ? undefined + : new URL(options.replyContextPath, requestUrl.origin); + requestedPaths.push(requestUrl.pathname); + + let body: unknown; + if (requestUrl.pathname === rootUrl.pathname) { + body = { + "@context": "https://www.w3.org/ns/activitystreams", + id: rootUrl.href, + type: "Note", + content: "root", + inReplyTo: replyUrl.href, + }; + } else if (requestUrl.pathname === replyUrl.pathname) { + body = { + "@context": replyContextUrl == null + ? "https://www.w3.org/ns/activitystreams" + : [ + "https://www.w3.org/ns/activitystreams", + replyContextUrl.href, + ], + id: replyUrl.href, + type: "Note", + content: "reply", + ...(replyContextUrl == null ? {} : { fedifyTest: "value" }), + }; + } else if ( + replyContextUrl != null && + requestUrl.pathname === replyContextUrl.pathname + ) { + body = { + "@context": { + fedifyTest: "https://fedify.dev/ns/test#fedifyTest", + }, + }; + } else { + return new Response(null, { status: 404 }); + } + + return Response.json(body, { + headers: { + "Content-Type": "application/activity+json", + }, + }); + }, + }); + + await server.ready(); + assert.ok(server.url != null); + const origin = new URL(server.url).origin; + try { + return await callback({ + rootUrl: new URL("/notes/1", origin), + replyUrl: new URL("/notes/0", origin), + requestedPaths, + }); + } finally { + await server.close(true); + } +} + +test("runLookup - rejects recursive private targets by default", async () => { + const testDir = "./test_output_runlookup_recurse_private_default"; + const testFile = `${testDir}/out.jsonl`; + await mkdir(testDir, { recursive: true }); + try { + await withRecursiveLookupServer( + {}, + async ({ rootUrl, requestedPaths }) => { + const { result: exitCode, stderr } = await captureStderr(() => + runLookupAndCaptureExitCode( + createLookupRunCommand({ + urls: [rootUrl.href], + recurse: "replyTarget", + recurseDepth: 20, + allowPrivateAddress: false, + output: testFile, + }), + ) + ); + assert.equal(exitCode, 1); + assert.deepEqual(requestedPaths, ["/notes/1"]); + assert.match( + stderr, + /--allow-private-address/, + ); + + const content = await readFile(testFile, "utf8"); + assert.deepEqual(extractIdsFromRawOutput(content), [rootUrl.href]); + }, + ); + } finally { + await rm(testDir, { recursive: true }); + } +}); + +test("runLookup - allows recursive private targets with allowPrivateAddress", async () => { + const testDir = "./test_output_runlookup_recurse_private_allowed"; + const testFile = `${testDir}/out.jsonl`; + await mkdir(testDir, { recursive: true }); + try { + await withRecursiveLookupServer( + {}, + async ({ rootUrl, replyUrl, requestedPaths }) => { + const exitCode = await runLookupAndCaptureExitCode( + createLookupRunCommand({ + urls: [rootUrl.href], + recurse: "replyTarget", + recurseDepth: 20, + allowPrivateAddress: true, + output: testFile, + }), + ); + assert.equal(exitCode, 0); + assert.deepEqual(requestedPaths, ["/notes/1", "/notes/0"]); + + const content = await readFile(testFile, "utf8"); + assert.deepEqual(extractIdsFromRawOutput(content), [ + rootUrl.href, + replyUrl.href, + ]); + }, + ); + } finally { + await rm(testDir, { recursive: true }); + } +}); + +test("runLookup - keeps recursive private contexts blocked", async () => { + const testDir = "./test_output_runlookup_recurse_private_context"; + const testFile = `${testDir}/out.jsonl`; + await mkdir(testDir, { recursive: true }); + try { + await withRecursiveLookupServer( + { replyContextPath: "/contexts/reply" }, + async ({ rootUrl, requestedPaths }) => { + const { result: exitCode, stderr } = await captureStderr(() => + runLookupAndCaptureExitCode( + createLookupRunCommand({ + urls: [rootUrl.href], + recurse: "replyTarget", + recurseDepth: 20, + allowPrivateAddress: true, + output: testFile, + }), + ) + ); + assert.equal(exitCode, 1); + assert.deepEqual(requestedPaths, ["/notes/1", "/notes/0"]); + assert.match( + stderr, + /Recursive JSON-LD context URL .* is always blocked/, + ); + + const content = await readFile(testFile, "utf8"); + assert.deepEqual(extractIdsFromRawOutput(content), [rootUrl.href]); + }, + ); + } finally { + await rm(testDir, { recursive: true }); + } +}); + test("runLookup - reverses output order in default multi-input mode", async () => { const testDir = "./test_output_runlookup_default_reverse"; const testFile = `${testDir}/out.jsonl`; diff --git a/packages/cli/src/lookup.ts b/packages/cli/src/lookup.ts index 069700162..0eed6bb7c 100644 --- a/packages/cli/src/lookup.ts +++ b/packages/cli/src/lookup.ts @@ -12,7 +12,13 @@ import { Object as APObject, traverseCollection, } from "@fedify/vocab"; -import { type DocumentLoader, UrlError } from "@fedify/vocab-runtime"; +import { + type DocumentLoader, + expandIPv6Address, + isValidPublicIPv4Address, + isValidPublicIPv6Address, + UrlError, +} from "@fedify/vocab-runtime"; import type { ResourceDescriptor } from "@fedify/webfinger"; import { getLogger } from "@logtape/logtape"; import { bindConfig } from "@optique/config"; @@ -37,8 +43,10 @@ import { string, withDefault, } from "@optique/core"; +import { url as messageUrl } from "@optique/core/message"; import { path, printError } from "@optique/run"; import { createWriteStream, type WriteStream } from "node:fs"; +import { isIP } from "node:net"; import process from "node:process"; import ora from "ora"; import { configContext } from "./config.ts"; @@ -87,11 +95,9 @@ const suppressErrorsOption = bindConfig( const allowPrivateAddressOption = bindConfig( flag("-p", "--allow-private-address", { description: message`Allow private IP addresses for URLs discovered \ -during traversal. This option only has an effect when used together \ -with ${optionNames(["-t", "--traverse"])}, since URLs explicitly \ -provided on the command line always allow private addresses and \ -recursive fetches via ${optionNames(["--recurse"])} always disallow \ -them.`, +during traversal or recursive object fetches. Recursive JSON-LD \ +context URLs always remain blocked. URLs explicitly provided on the \ +command line always allow private addresses.`, }), { context: configContext, @@ -527,7 +533,9 @@ function handleTimeoutError( const urlText = url ? ` for: ${colors.red(url)}` : ""; spinner.fail(`Request timed out after ${timeoutSeconds} seconds${urlText}.`); printError( - message`Try increasing the timeout with -T/--timeout option or check network connectivity.`, + message`Try increasing the timeout with ${ + optionNames(["-T", "--timeout"]) + } option or check network connectivity.`, ); } @@ -548,6 +556,94 @@ function isPrivateAddressError(error: unknown): boolean { ); } +export function getPrivateUrlCandidate( + candidate: unknown, +): URL | null { + // This helper is only for post-failure hinting. It intentionally does a + // cheap hostname/IP check so we can recognize obvious private targets + // without re-running the full document-loader validation path. + if (typeof candidate !== "string" && !(candidate instanceof URL)) return null; + + try { + const url = new URL(candidate); + const hostname = url.hostname; + if (hostname === "localhost") return url; + + const normalized = hostname.startsWith("[") && hostname.endsWith("]") + ? hostname.slice(1, -1) + : hostname; + const ipVersion = isIP(normalized); + if (ipVersion === 4) { + return isValidPublicIPv4Address(normalized) ? null : url; + } + if (ipVersion === 6) { + const expanded = expandIPv6Address(normalized); + return isValidPublicIPv6Address(expanded) ? null : url; + } + return null; + } catch { + return null; + } +} + +function isPrivateAddressTarget(target: string): boolean { + return getPrivateUrlCandidate(target) != null; +} + +function getPrivateContextUrl(error: unknown): URL | null { + // Recursive object fetches and recursive JSON-LD context fetches use + // different loader policies. When the strict context loader rejects a + // private @context URL, the underlying UrlError is often surfaced as a + // jsonld parsing error instead of the original loader error. This helper + // reconstructs the blocked private context URL so the CLI can show a + // recurse-specific hint instead of the generic authorized-fetch hint. + // This detection intentionally depends on jsonld's current error shape: + // name === "jsonld.InvalidUrl", the "valid JSON-LD object" substring, and + // a trailing `URL: "..."` segment all at once. If jsonld changes those + // details, this helper and the related lookup tests need to be updated + // together. + const errorMessage = error instanceof Error ? error.message : String(error); + if ( + !(error instanceof Error) || + error.name !== "jsonld.InvalidUrl" || + !errorMessage.includes("valid JSON-LD object") + ) { + return null; + } + + const structuredError = error as { + details?: { url?: unknown }; + url?: unknown; + }; + const structuredUrl = getPrivateUrlCandidate(structuredError.details?.url) ?? + getPrivateUrlCandidate(structuredError.url); + if (structuredUrl != null) return structuredUrl; + + const match = errorMessage.match(/URL:\s*"([^"]+)"/); + if (match == null) return null; + return getPrivateUrlCandidate(match[1]); +} + +function printRecursivePrivateAddressHint(): void { + printError( + message`The recursive target appears to be private or localhost. Try with ${ + optionNames(["-p", "--allow-private-address"]) + }, or use ${ + optionNames(["-S", "--suppress-errors"]) + } to skip blocked steps.`, + ); +} + +function printRecursivePrivateContextHint(privateContextUrl: URL): void { + printError( + message`Recursive JSON-LD context URL ${ + messageUrl(privateContextUrl) + } is always blocked, even with ${ + optionNames(["-p", "--allow-private-address"]) + }. Use ${optionNames(["-S", "--suppress-errors"])} to skip blocked steps.`, + ); +} + export function getLookupFailureHint( error: unknown, options: { recursive?: boolean } = {}, @@ -582,17 +678,19 @@ function printLookupFailureHint( switch (hint) { case "private-address": printError( - message`The URL appears to be private or localhost. Try with -p/--allow-private-address.`, + message`The URL appears to be private or localhost. Try with ${ + optionNames(["-p", "--allow-private-address"]) + }.`, ); return; case "recursive-private-address": - printError( - message`Recursive fetches do not allow private/localhost URLs. Use -S/--suppress-errors to skip blocked steps, or fetch those targets explicitly without --recurse.`, - ); + printRecursivePrivateAddressHint(); return; case "authorized-fetch": printError( - message`It may be a private object. Try with -a/--authorized-fetch.`, + message`It may be a private object. Try with ${ + optionNames(["-a", "--authorized-fetch"]) + }.`, ); return; } @@ -730,10 +828,8 @@ export async function runLookup( let server: TemporaryServer | undefined = undefined; // URLs explicitly provided by the user always allow private addresses, // so that local servers can be looked up without -p/--allow-private-address. - // URLs discovered during traversal follow the option to mitigate SSRF - // against private addresses, while recursive fetches always disallow - // private addresses regardless of the option (see the --recurse branch - // below, which hardcodes `allowPrivateAddress: false`). + // URLs discovered during traversal or recursion follow the option to + // mitigate SSRF against private addresses. const initialBaseDocumentLoader = await getDocumentLoader({ userAgent: command.userAgent, allowPrivateAddress: true, @@ -894,14 +990,14 @@ export async function runLookup( }...`; if (command.recurse != null) { - const recursiveBaseDocumentLoader = await getDocumentLoader({ - userAgent: command.userAgent, - allowPrivateAddress: false, - }); - const recursiveDocumentLoader = wrapDocumentLoaderWithTimeout( - recursiveBaseDocumentLoader, - command.timeout, - ); + const initialLookupDocumentLoader: DocumentLoader = initialAuthLoader ?? + initialDocumentLoader; + const recursiveLookupDocumentLoader: DocumentLoader = authLoader ?? + documentLoader; + // `-p/--allow-private-address` only changes the follow-up object fetches + // that recurse explicitly performs. JSON-LD context loads stay on the + // strict loader so a remote object cannot implicitly expand the trust + // boundary via private @context URLs. const recursiveBaseContextLoader = await getContextLoader({ userAgent: command.userAgent, allowPrivateAddress: false, @@ -910,30 +1006,6 @@ export async function runLookup( recursiveBaseContextLoader, command.timeout, ); - const recursiveAuthLoader = command.authorizedFetch && - authIdentity != null - ? wrapDocumentLoaderWithTimeout( - getAuthenticatedDocumentLoader( - authIdentity, - { - allowPrivateAddress: false, - userAgent: command.userAgent, - specDeterminer: { - determineSpec() { - return command.firstKnock; - }, - rememberSpec() { - }, - }, - }, - ), - command.timeout, - ) - : undefined; - const initialLookupDocumentLoader: DocumentLoader = initialAuthLoader ?? - initialDocumentLoader; - const recursiveLookupDocumentLoader: DocumentLoader = recursiveAuthLoader ?? - recursiveDocumentLoader; let totalObjects = 0; const recurseDepth = command.recurseDepth!; @@ -966,7 +1038,9 @@ export async function runLookup( spinner.fail(`Failed to fetch object: ${colors.red(url)}.`); if (authLoader == null) { printError( - message`It may be a private object. Try with -a/--authorized-fetch.`, + message`It may be a private object. Try with ${ + optionNames(["-a", "--authorized-fetch"]) + }.`, ); } await finalizeAndExit(1); @@ -1048,17 +1122,32 @@ export async function runLookup( spinner.fail( `Failed to recursively fetch object: ${colors.red(error.target)}.`, ); - if (authLoader == null) { + if ( + !command.allowPrivateAddress && + isPrivateAddressTarget(error.target) + ) { + printRecursivePrivateAddressHint(); + } else if (authLoader == null) { printError( - message`It may be a private object. Try with -a/--authorized-fetch.`, + message`It may be a private object. Try with ${ + optionNames(["-a", "--authorized-fetch"]) + }.`, ); } } else { spinner.fail("Failed to recursively fetch object."); + const privateContextUrl = getPrivateContextUrl(error); + if (privateContextUrl != null) { + printRecursivePrivateContextHint(privateContextUrl); + await finalizeAndExit(1); + return; + } const hint = getLookupFailureHint(error, { recursive: true }); if (shouldSuggestSuppressErrorsForLookupFailure(authLoader, hint)) { printError( - message`Use the -S/--suppress-errors option to suppress partial errors.`, + message`Use the ${ + optionNames(["-S", "--suppress-errors"]) + } option to suppress partial errors.`, ); } else { printLookupFailureHint(authLoader, error, { recursive: true }); @@ -1172,7 +1261,9 @@ export async function runLookup( spinner.fail(`Failed to fetch object: ${colors.red(url)}.`); if (authLoader == null) { printError( - message`It may be a private object. Try with -a/--authorized-fetch.`, + message`It may be a private object. Try with ${ + optionNames(["-a", "--authorized-fetch"]) + }.`, ); } await finalizeAndExit(1); @@ -1272,7 +1363,9 @@ export async function runLookup( const hint = getLookupFailureHint(error); if (shouldSuggestSuppressErrorsForLookupFailure(authLoader, hint)) { printError( - message`Use the -S/--suppress-errors option to suppress partial errors.`, + message`Use the ${ + optionNames(["-S", "--suppress-errors"]) + } option to suppress partial errors.`, ); } else { printLookupFailureHint(authLoader, error); @@ -1323,7 +1416,9 @@ export async function runLookup( spinner.fail(`Failed to fetch ${colors.red(url)}`); if (authLoader == null) { printError( - message`It may be a private object. Try with -a/--authorized-fetch.`, + message`It may be a private object. Try with ${ + optionNames(["-a", "--authorized-fetch"]) + }.`, ); } success = false;