Skip to content

Commit 29dfb65

Browse files
feat: add surrogate-key in the header for Fastly (#3393)
* feat: add surrogate-key in the header for fastly * fix: move middlewares to proxy and some improvements * fix: courses/p/ are actually programs
1 parent a21934b commit 29dfb65

2 files changed

Lines changed: 197 additions & 7 deletions

File tree

frontends/main/src/proxy.test.ts

Lines changed: 120 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import { NextRequest } from "next/server"
2-
import { isPageRoute, proxy } from "./proxy"
2+
import { isPageRoute, mitxonlineSurrogateKey, proxy } from "./proxy"
33

44
describe("isPageRoute", () => {
55
test.each([
@@ -33,19 +33,136 @@ describe("isPageRoute", () => {
3333
})
3434
})
3535

36+
describe("mitxonlineSurrogateKey", () => {
37+
describe("course pages — /courses/:readable_id", () => {
38+
it("returns mitxonline:course key", () => {
39+
expect(
40+
mitxonlineSurrogateKey("/courses/course-v1:MITx+6.00.1x+3T2019"),
41+
).toBe("mitxonline:course:course-v1:MITx+6.00.1x+3T2019")
42+
})
43+
44+
it("handles URL-encoded readable_id", () => {
45+
const encoded = encodeURIComponent("course-v1:MITx+6.00.1x+3T2019")
46+
expect(mitxonlineSurrogateKey(`/courses/${encoded}`)).toBe(
47+
"mitxonline:course:course-v1:MITx+6.00.1x+3T2019",
48+
)
49+
})
50+
})
51+
52+
describe("program pages — /programs/:readable_id and /courses/p/:readable_id", () => {
53+
it("returns mitxonline:program key for /programs/:readable_id", () => {
54+
expect(mitxonlineSurrogateKey("/programs/program-v1:MITx+SDS")).toBe(
55+
"mitxonline:program:program-v1:MITx+SDS",
56+
)
57+
})
58+
59+
it("returns mitxonline:program key for /courses/p/:readable_id (ProgramAsCoursePage)", () => {
60+
// /courses/p/ renders ProgramAsCoursePage — the readable_id belongs to a
61+
// program, so the surrogate key must use the program namespace so that
62+
// MITxOnline's program-save signal purges this page correctly.
63+
expect(mitxonlineSurrogateKey("/courses/p/program-v1:MITx+SDS")).toBe(
64+
"mitxonline:program:program-v1:MITx+SDS",
65+
)
66+
})
67+
})
68+
69+
describe("non-product pages", () => {
70+
it.each(["/", "/search", "/about", "/courses", "/programs"])(
71+
"returns null for %s",
72+
(pathname) => {
73+
expect(mitxonlineSurrogateKey(pathname)).toBeNull()
74+
},
75+
)
76+
})
77+
78+
describe("hostile URL segments — regression: Headers.set() crash on control characters", () => {
79+
it("returns null without throwing on malformed percent-encoding in course path", () => {
80+
// %GG is not valid percent-encoding — decodeURIComponent would throw URIError
81+
expect(() => mitxonlineSurrogateKey("/courses/%GG")).not.toThrow()
82+
expect(mitxonlineSurrogateKey("/courses/%GG")).toBeNull()
83+
})
84+
85+
it("returns null without throwing on malformed percent-encoding in program path", () => {
86+
expect(() => mitxonlineSurrogateKey("/programs/%GG")).not.toThrow()
87+
expect(mitxonlineSurrogateKey("/programs/%GG")).toBeNull()
88+
})
89+
90+
it("returns null on CRLF in course path (%0D%0A decodes to \\r\\n)", () => {
91+
// Without safeDecodeSegment, passing this to Headers.set() would throw TypeError
92+
expect(() =>
93+
mitxonlineSurrogateKey("/courses/foo%0D%0AX-Injected%3A+yes"),
94+
).not.toThrow()
95+
expect(
96+
mitxonlineSurrogateKey("/courses/foo%0D%0AX-Injected%3A+yes"),
97+
).toBeNull()
98+
})
99+
100+
it("returns null on CRLF in program path", () => {
101+
expect(() =>
102+
mitxonlineSurrogateKey("/programs/foo%0D%0AX-Injected%3A+yes"),
103+
).not.toThrow()
104+
expect(
105+
mitxonlineSurrogateKey("/programs/foo%0D%0AX-Injected%3A+yes"),
106+
).toBeNull()
107+
})
108+
109+
it("returns null on null byte in course path", () => {
110+
expect(() => mitxonlineSurrogateKey("/courses/foo%00bar")).not.toThrow()
111+
expect(mitxonlineSurrogateKey("/courses/foo%00bar")).toBeNull()
112+
})
113+
114+
it("returns null on null byte in program path", () => {
115+
expect(() => mitxonlineSurrogateKey("/programs/foo%00bar")).not.toThrow()
116+
expect(mitxonlineSurrogateKey("/programs/foo%00bar")).toBeNull()
117+
})
118+
})
119+
})
120+
36121
describe("proxy", () => {
37122
const makeRequest = (pathname: string) =>
38123
new NextRequest(new URL(pathname, "https://learn.mit.edu"))
39124

40-
test("tags page routes with both Cache-Control and Surrogate-Key", () => {
41-
const response = proxy(makeRequest("/courses/course-v1:MITxT+5.601x"))
125+
test("tags generic page routes with Cache-Control and html-pages Surrogate-Key", () => {
126+
const response = proxy(makeRequest("/about"))
42127
expect(response.headers.get("Surrogate-Key")).toBe("html-pages")
43128
expect(response.headers.get("Cache-Control")).toContain("s-maxage=")
44129
})
45130

131+
test("appends per-item surrogate key for MITxOnline course pages", () => {
132+
const response = proxy(makeRequest("/courses/course-v1:MITxT+5.601x"))
133+
expect(response.headers.get("Surrogate-Key")).toBe(
134+
"html-pages mitxonline:course:course-v1:MITxT+5.601x",
135+
)
136+
expect(response.headers.get("Cache-Control")).toContain("s-maxage=")
137+
})
138+
139+
test("appends per-item surrogate key for MITxOnline program pages (/programs/)", () => {
140+
const response = proxy(makeRequest("/programs/program-v1:MITxT+18.01x"))
141+
expect(response.headers.get("Surrogate-Key")).toBe(
142+
"html-pages mitxonline:program:program-v1:MITxT+18.01x",
143+
)
144+
})
145+
146+
test("appends mitxonline:program surrogate key for /courses/p/ (ProgramAsCoursePage)", () => {
147+
const response = proxy(makeRequest("/courses/p/program-v1:MITxT+18.01x"))
148+
expect(response.headers.get("Surrogate-Key")).toBe(
149+
"html-pages mitxonline:program:program-v1:MITxT+18.01x",
150+
)
151+
})
152+
46153
test("leaves non-page routes untagged", () => {
47154
const response = proxy(makeRequest("/healthcheck"))
48155
expect(response.headers.get("Surrogate-Key")).toBeNull()
49156
expect(response.headers.get("Cache-Control")).toBeNull()
50157
})
158+
159+
test("does not throw on CRLF in course path and falls back to html-pages only", () => {
160+
// Regression: without safeDecodeSegment, Headers.set() would throw TypeError
161+
// when the decoded segment contains \r\n (%0D%0A).
162+
expect(() =>
163+
proxy(makeRequest("/courses/foo%0D%0AX-Injected%3A+yes")),
164+
).not.toThrow()
165+
const response = proxy(makeRequest("/courses/foo%0D%0AX-Injected%3A+yes"))
166+
expect(response.headers.get("Surrogate-Key")).toBe("html-pages")
167+
})
51168
})

frontends/main/src/proxy.ts

Lines changed: 77 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,75 @@ export function isPageRoute(pathname: string): boolean {
3434
return true
3535
}
3636

37+
// Matches /courses/<readable_id> — readable_ids contain colons and + but no slashes.
38+
const COURSE_PATTERN = /^\/courses\/([^/?]+)$/
39+
// Matches /courses/p/<readable_id> — despite the /courses/ prefix this is a
40+
// *program* page (ProgramAsCoursePage); the readable_id is a program readable_id.
41+
const PROGRAM_AS_COURSE_PATTERN = /^\/courses\/p\/([^/?]+)$/
42+
// Matches /programs/<readable_id>
43+
const PROGRAM_PATTERN = /^\/programs\/([^/?]+)$/
44+
45+
/**
46+
* Safely decode a URL path segment for use in a response header value.
47+
*
48+
* Returns null instead of throwing when:
49+
* - percent-encoding is malformed (decodeURIComponent would throw URIError)
50+
* - the decoded value contains characters invalid in HTTP header values
51+
* (control characters, \r, \n, \0) which would cause Headers.set() to throw
52+
*/
53+
function safeDecodeSegment(segment: string): string | null {
54+
let decoded: string
55+
try {
56+
decoded = decodeURIComponent(segment)
57+
} catch {
58+
return null
59+
}
60+
// HTTP header values must only contain visible ASCII + SP + HT (RFC 7230 §3.2.6).
61+
// Reject anything with control chars (\x00-\x1F except \x09, or \x7F) to prevent
62+
// header injection and avoid Headers.set() throwing.
63+
// eslint-disable-next-line no-control-regex
64+
if (/[\x00-\x08\x0A-\x1F\x7F]/.test(decoded)) {
65+
return null
66+
}
67+
return decoded
68+
}
69+
70+
/**
71+
* Derives a per-item MITxOnline Surrogate-Key tag from the request path, or
72+
* returns null if the path is not a MITxOnline course or program page.
73+
*
74+
* Key format (matches what MITxOnline purges):
75+
* mitxonline:course:<readable_id> — /courses/<readable_id>
76+
* mitxonline:program:<readable_id> — /programs/<readable_id>
77+
* — /courses/p/<readable_id>
78+
* (/courses/p/ renders ProgramAsCoursePage;
79+
* the readable_id is a program readable_id)
80+
*
81+
* NOTE: Surrogate-Key headers must be set here rather than in page.tsx because
82+
* Next.js commits response headers before any page/layout code runs (to begin
83+
* streaming). By the time page.tsx executes, headers have already been sent.
84+
*/
85+
export function mitxonlineSurrogateKey(pathname: string): string | null {
86+
const courseMatch = COURSE_PATTERN.exec(pathname)
87+
if (courseMatch) {
88+
const readableId = safeDecodeSegment(courseMatch[1])
89+
if (readableId !== null) {
90+
return `mitxonline:course:${readableId}`
91+
}
92+
}
93+
94+
const programMatch =
95+
PROGRAM_AS_COURSE_PATTERN.exec(pathname) ?? PROGRAM_PATTERN.exec(pathname)
96+
if (programMatch) {
97+
const readableId = safeDecodeSegment(programMatch[1])
98+
if (readableId !== null) {
99+
return `mitxonline:program:${readableId}`
100+
}
101+
}
102+
103+
return null
104+
}
105+
37106
/**
38107
* Next.js proxy (formerly "middleware"): sets the Cache-Control header at
39108
* request time so that NEXT_CACHE_S_MAXAGE_SECONDS is read from the Kubernetes
@@ -53,10 +122,14 @@ export function proxy(request: NextRequest) {
53122

54123
const response = NextResponse.next()
55124
response.headers.set("Cache-Control", cacheControl)
56-
// Tag all HTML/page routes so Fastly can purge them on deploy without also
57-
// purging immutable /_next/static/ chunks. Driven by the same isPageRoute()
58-
// test as Cache-Control above, so the tag and the cache policy never diverge.
59-
response.headers.set("Surrogate-Key", "html-pages")
125+
126+
// All page routes share the html-pages tag so Fastly can purge them on
127+
// deploy. MITxOnline course/program pages additionally carry a per-item tag
128+
// so MITxOnline can invalidate individual product pages on data change.
129+
const itemKey = mitxonlineSurrogateKey(request.nextUrl.pathname)
130+
const surrogateKey = itemKey ? `html-pages ${itemKey}` : "html-pages"
131+
response.headers.set("Surrogate-Key", surrogateKey)
132+
60133
return response
61134
}
62135

0 commit comments

Comments
 (0)