-
Notifications
You must be signed in to change notification settings - Fork 66.7k
Expand file tree
/
Copy pathhandle-invalid-query-strings.ts
More file actions
135 lines (117 loc) · 4.5 KB
/
handle-invalid-query-strings.ts
File metadata and controls
135 lines (117 loc) · 4.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
import type { Response, NextFunction } from 'express'
import statsd from '@/observability/lib/statsd.js'
import { noCacheControl, defaultCacheControl } from '@/frame/middleware/cache-control.js'
import { ExtendedRequest } from '@/types'
const STATSD_KEY = 'middleware.handle_invalid_querystrings'
// Exported for the sake of end-to-end tests
export const MAX_UNFAMILIAR_KEYS_BAD_REQUEST = 15
export const MAX_UNFAMILIAR_KEYS_REDIRECT = 3
const RECOGNIZED_KEYS_BY_PREFIX = {
'/_next/data/': ['versionId', 'productId', 'restPage', 'apiVersion', 'category', 'subcategory'],
'/api/search': ['query', 'language', 'version', 'page', 'product', 'autocomplete', 'limit'],
'/api/combined-search': ['query', 'version', 'size', 'debug'],
'/api/anchor-redirect': ['hash', 'path'],
'/api/webhooks': ['category', 'version'],
'/api/pageinfo': ['pathname'],
}
const RECOGNIZED_KEYS_BY_ANY = new Set([
// Learning track pages
'learn',
'learnProduct',
// Platform picker
'platform',
// Tool picker
'tool',
// When apiVersion isn't the only one. E.g. ?apiVersion=XXX&tool=vscode
'apiVersion',
// Search results page
'query',
// Any page, Search Overlay
'search-overlay-input',
'search-overlay-open',
'search-overlay-ask-ai',
// The drop-downs on "Webhook events and payloads"
'actionType',
// Used by the tracking middleware
'ghdomain',
// UTM campaign tracking
'utm_source',
'utm_medium',
'utm_campaign',
// Used by experiments
'feature',
])
export default function handleInvalidQuerystrings(
req: ExtendedRequest,
res: Response,
next: NextFunction,
) {
const { method, query, path } = req
if (method === 'GET' || method === 'HEAD') {
const originalKeys = Object.keys(query)
let keys = originalKeys.filter((key) => !RECOGNIZED_KEYS_BY_ANY.has(key))
if (keys.length > 0) {
// Before we judge the number of query strings, strip out all the ones
// we're familiar with.
for (const [prefix, recognizedKeys] of Object.entries(RECOGNIZED_KEYS_BY_PREFIX)) {
if (path.startsWith(prefix)) {
keys = keys.filter((key) => !recognizedKeys.includes(key))
}
}
}
// If you fill out the Survey form with all the fields and somehow
// don't attempt to make a POST request, you'll end up with a query
// string like this.
const honeypotted = 'survey-token' in query && 'survey-vote' in query
if (keys.length >= MAX_UNFAMILIAR_KEYS_BAD_REQUEST || honeypotted) {
noCacheControl(res)
const message = honeypotted ? 'Honeypotted' : 'Too many unrecognized query string parameters'
res.status(400).send(message)
const tags = [
'response:400',
`url:${req.url}`,
`path:${req.path}`,
`keys:${originalKeys.length}`,
]
statsd.increment(STATSD_KEY, 1, tags)
return
}
// This is a pattern we've observed in production and we're shielding
// against it happening again. The root home page is hit with a
// 8 character long query string that has no value.
const rootHomePage = path.split('/').length === 2
const badKeylessQuery =
rootHomePage && keys.length === 1 && keys[0].length === 8 && !query[keys[0]]
// It's still a mystery why these requests happen but we've seen large
// number of requests that have a very long URL-encoded query string
// that starts with 'tool' but doesn't have any value.
// For example
// ?tool%25252525253Dvisualstudio%252525253D%2525252526tool%25252525...
// ...3Dvscode%2525253D%25252526tool%2525253Dvscode%25253D%252526tool...
// ...%25253Dvimneovim%253D%2526tool%253Djetbrains%3D%26tool%3Djetbrains=&
// Let's shield against those by removing them.
const badToolsQuery = keys.some((key) => key.startsWith('tool%') && !query[key])
if (keys.length >= MAX_UNFAMILIAR_KEYS_REDIRECT || badKeylessQuery || badToolsQuery) {
if (process.env.NODE_ENV === 'development') {
console.warn(
'Redirecting because of a questionable query string, see https://github.com/github/docs/blob/main/src/shielding/README.md',
)
}
defaultCacheControl(res)
const sp = new URLSearchParams(query as any)
keys.forEach((key) => sp.delete(key))
let newURL = req.path
if (sp.toString()) newURL += `?${sp}`
res.redirect(302, newURL)
const tags = [
'response:302',
`url:${req.url}`,
`path:${req.path}`,
`keys:${originalKeys.length}`,
]
statsd.increment(STATSD_KEY, 1, tags)
return
}
}
return next()
}