diff --git a/library/helpers/extractStringsFromUserInput.test.ts b/library/helpers/extractStringsFromUserInput.test.ts index 8804a704d..8f58878a1 100644 --- a/library/helpers/extractStringsFromUserInput.test.ts +++ b/library/helpers/extractStringsFromUserInput.test.ts @@ -193,6 +193,81 @@ t.test("it decodes uri encoded strings", async () => { ); }); +t.test("it decodes double-encoded strings iteratively", async () => { + t.same( + extractStringsFromUserInput({ str: "a%2520b" }), + fromArr(["str", "a%2520b", "a%20b", "a b"]) + ); +}); + +t.test("it decodes up to MAX_URL_DECODE_DEPTH (5) times", async () => { + t.same( + extractStringsFromUserInput({ str: "a%2525252520b" }), + fromArr([ + "str", + "a%2525252520b", + "a%25252520b", + "a%252520b", + "a%2520b", + "a%20b", + "a b", + ]) + ); +}); + +t.test( + "it stops at MAX_URL_DECODE_DEPTH and does not fully decode", + async () => { + t.same( + extractStringsFromUserInput({ str: "a%252525252520b" }), + fromArr([ + "str", + "a%252525252520b", + "a%2525252520b", + "a%25252520b", + "a%252520b", + "a%2520b", + "a%20b", + ]) + ); + t.notOk(extractStringsFromUserInput({ str: "a%252525252520b" }).has("a b")); + } +); + +t.test("it stops decoding on invalid percent encoding", async () => { + t.same( + extractStringsFromUserInput({ str: "test%ZZfoo" }), + fromArr(["str", "test%ZZfoo"]) + ); +}); + +t.test( + "it handles encoded percent sign (%25) becoming an invalid sequence", + async () => { + t.same( + extractStringsFromUserInput({ str: "a%25b" }), + fromArr(["str", "a%25b", "a%b"]) + ); + } +); + +t.test( + "it does not partially decode strings with mixed valid and invalid sequences", + async () => { + t.same( + extractStringsFromUserInput({ str: "foo%20bar%ZZbaz" }), + fromArr(["str", "foo%20bar%ZZbaz"]) + ); + } +); + +t.test("it decodes multi-byte UTF-8 percent sequences", async () => { + t.same( + extractStringsFromUserInput({ str: "%C3%A9" }), + fromArr(["str", "%C3%A9", "é"]) + ); +}); + function buildNestedDictIterative(depth: number): Record { let result: Record = { a: "b" }; for (let i = 1; i <= depth; i++) { diff --git a/library/helpers/extractStringsFromUserInput.ts b/library/helpers/extractStringsFromUserInput.ts index d8f1dd761..dfc9fa9d4 100644 --- a/library/helpers/extractStringsFromUserInput.ts +++ b/library/helpers/extractStringsFromUserInput.ts @@ -11,6 +11,8 @@ type UserString = string; // the error will be caught, but it stops our inspection const MAX_DEPTH = 1024; +const MAX_URL_DECODE_DEPTH = 5; + export function extractStringsFromUserInput( obj: unknown, depth: number = 0 @@ -50,15 +52,7 @@ export function extractStringsFromUserInput( if (typeof obj === "string" && obj.length > 0) { results.add(obj); - - if (obj.includes("%") && obj.length >= 3) { - const r = safeDecodeURIComponent(obj); - if (r && r !== obj) { - // Only add if the decoded value is different from the original, to avoid duplicates in results - // This improves the performance of all injection tests - results.add(r); - } - } + addURLDecodedVariants(obj, results); const jwt = tryDecodeAsJWT(obj); if (jwt.jwt) { @@ -79,3 +73,21 @@ export function extractStringsFromUserInput( return results; } + +function addURLDecodedVariants(str: string, results: Set) { + let current = str; + for (let i = 0; i < MAX_URL_DECODE_DEPTH; i++) { + if (current.length < 3 || !current.includes("%")) { + break; + } + + const decoded = safeDecodeURIComponent(current); + if (!decoded || decoded === current) { + // If decoding fails or doesn't change the string, stop further attempts to decode + break; + } + + results.add(decoded); + current = decoded; + } +}