Skip to content

Commit 9102978

Browse files
saracenGitLab
authored andcommitted
Merge branch 'ajwalker/add-cachekey-package' into 'main'
Refactor extract cache key sanitization into dedicated package See merge request https://gitlab.com/gitlab-org/gitlab-runner/-/merge_requests/6509 Merged-by: Arran Walker <ajwalker@gitlab.com> Approved-by: Ashvin Sharma <ashsharma@gitlab.com> Approved-by: Axel von Bertoldi <avonbertoldi@gitlab.com> Reviewed-by: Ashvin Sharma <ashsharma@gitlab.com>
2 parents c428204 + 5e9ae3d commit 9102978

4 files changed

Lines changed: 235 additions & 166 deletions

File tree

cache/cachekey/cachekey.go

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
package cachekey
2+
3+
import (
4+
"fmt"
5+
"path"
6+
"strings"
7+
"unicode"
8+
)
9+
10+
// normaliser decodes URL-encoded slashes and dots, and converts backslashes to
11+
// forward slashes in a single pass.
12+
var normaliser = strings.NewReplacer(
13+
"%2f", "/",
14+
"%2F", "/",
15+
"%2e", ".",
16+
"%2E", ".",
17+
`\`, "/",
18+
)
19+
20+
// Sanitize validates and normalises a cache key.
21+
// Cache keys may contain path separators. The function:
22+
// - decodes URL-encoded '/' (%2f) and '.' (%2e) characters
23+
// - replaces all '\' with '/'
24+
// - resolves path traversals (., ..) within a virtual root
25+
// - strips trailing whitespace from the rightmost path segments,
26+
// removing any that become empty after trimming
27+
func Sanitize(cacheKey string) (string, error) {
28+
if cacheKey == "" {
29+
return "", nil
30+
}
31+
32+
// Decode percent-encoded chars and normalise separators, then
33+
// resolve traversals against a virtual root so ".." can never
34+
// escape beyond the root.
35+
cleaned := path.Clean("/" + normaliser.Replace(cacheKey))
36+
37+
// Strip the leading "/" we added, split into segments, then walk
38+
// backwards trimming trailing whitespace from the rightmost
39+
// segments—dropping any that become empty.
40+
parts := strings.Split(cleaned[1:], "/")
41+
n := len(parts)
42+
for n > 0 {
43+
parts[n-1] = strings.TrimRightFunc(parts[n-1], unicode.IsSpace)
44+
if parts[n-1] != "" {
45+
break
46+
}
47+
n--
48+
}
49+
50+
key := strings.Join(parts[:n], "/")
51+
52+
if key == "" {
53+
return "", fmt.Errorf("cache key %q could not be sanitized", cacheKey)
54+
}
55+
56+
return key, nil
57+
}

cache/cachekey/cachekey_test.go

Lines changed: 167 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,167 @@
1+
//go:build !integration
2+
3+
package cachekey
4+
5+
import (
6+
"fmt"
7+
"strings"
8+
"testing"
9+
10+
"github.com/stretchr/testify/assert"
11+
)
12+
13+
func TestSanitize(t *testing.T) {
14+
tests := []struct {
15+
rawKey string
16+
expectedKey string
17+
wantErr bool
18+
}{
19+
// ── Empty / identity ────────────────────────────────────────
20+
{rawKey: ""},
21+
{rawKey: "fallback_key", expectedKey: "fallback_key"},
22+
{rawKey: "some-job/some-ref", expectedKey: "some-job/some-ref"},
23+
{rawKey: ".../....", expectedKey: ".../...."},
24+
{rawKey: "...", expectedKey: "..."},
25+
26+
// ── Trailing whitespace / slashes / backslashes ─────────────
27+
{rawKey: "fallback_key/", expectedKey: "fallback_key"},
28+
{rawKey: "fallback_key ", expectedKey: "fallback_key"},
29+
{rawKey: "fallback_key\\", expectedKey: "fallback_key"},
30+
{rawKey: "fallback_key/ \\", expectedKey: "fallback_key"},
31+
{rawKey: "fallback_key/ / \\ \\", expectedKey: "fallback_key"},
32+
{rawKey: "fallback_key/o", expectedKey: "fallback_key/o"},
33+
{rawKey: "fallback_key / \\o", expectedKey: "fallback_key / /o"},
34+
{rawKey: "\t foo bar \t\r", expectedKey: "\t foo bar"},
35+
{rawKey: " foo / bar ", expectedKey: " foo / bar"},
36+
{rawKey: "foo\r", expectedKey: "foo"},
37+
{rawKey: "foo\t", expectedKey: "foo"},
38+
{rawKey: "foo \t \r ", expectedKey: "foo"},
39+
40+
// ── Completely unsanitisable ────────────────────────────────
41+
{rawKey: "\\", wantErr: true},
42+
{rawKey: "\\.", wantErr: true},
43+
{rawKey: "/", wantErr: true},
44+
{rawKey: " ", wantErr: true},
45+
{rawKey: ".", wantErr: true},
46+
{rawKey: "..", wantErr: true},
47+
{rawKey: " / ", wantErr: true},
48+
{rawKey: "//", wantErr: true},
49+
{rawKey: `//\`, wantErr: true},
50+
{rawKey: "../.", wantErr: true},
51+
{rawKey: "foo\\bar\\..\\..", wantErr: true},
52+
{rawKey: "foo/bar/../..", wantErr: true},
53+
{rawKey: " \t\r\n", wantErr: true},
54+
55+
// ── URL-encoded slashes (%2f / %2F) ────────────────────────
56+
{rawKey: "something %2F something", expectedKey: "something / something"},
57+
{rawKey: "something %2f something", expectedKey: "something / something"},
58+
{rawKey: "some%2f../job/some/ref/.", expectedKey: "job/some/ref"},
59+
60+
// ── URL-encoded dots (%2e / %2E) ───────────────────────────
61+
{rawKey: "%2E", wantErr: true},
62+
{rawKey: "%2E%2E", wantErr: true},
63+
{rawKey: "%2E%2E%2E", expectedKey: "..."},
64+
{rawKey: "%2e", wantErr: true},
65+
{rawKey: "%2e%2E", wantErr: true},
66+
{rawKey: ".%2E", wantErr: true},
67+
{rawKey: "%2e.", wantErr: true},
68+
{rawKey: "%2E%2e%2E", expectedKey: "..."},
69+
70+
// %5C is left as-is (literal percent-encoded backslash is fine).
71+
{rawKey: "%5C", expectedKey: "%5C"},
72+
{rawKey: "%5c", expectedKey: "%5c"},
73+
74+
// ── Forward-slash path traversal ────────────────────────────
75+
{rawKey: "foo/./bar", expectedKey: "foo/bar"},
76+
{rawKey: "foo/blipp/../bar", expectedKey: "foo/bar"},
77+
{rawKey: "/foo/bar", expectedKey: "foo/bar"},
78+
{rawKey: "//foo/bar", expectedKey: "foo/bar"},
79+
{rawKey: "./foo/bar", expectedKey: "foo/bar"},
80+
{rawKey: "../foo/bar", expectedKey: "foo/bar"},
81+
{rawKey: ".../foo/bar", expectedKey: ".../foo/bar"},
82+
{rawKey: "foo/bar/..", expectedKey: "foo"},
83+
{rawKey: "foo/bar/../../../.././blerp", expectedKey: "blerp"},
84+
{rawKey: "a/b/c/../../d", expectedKey: "a/d"},
85+
86+
// ── Backslash path traversal ────────────────────────────────
87+
{rawKey: `job\name/git\ref`, expectedKey: "job/name/git/ref"},
88+
{rawKey: "foo\\.\\bar", expectedKey: "foo/bar"},
89+
{rawKey: "foo\\blipp\\..\\bar", expectedKey: "foo/bar"},
90+
{rawKey: "\\foo\\bar", expectedKey: "foo/bar"},
91+
{rawKey: "\\\\foo\\bar", expectedKey: "foo/bar"},
92+
{rawKey: ".\\foo\\bar", expectedKey: "foo/bar"},
93+
{rawKey: "..\\foo\\bar", expectedKey: "foo/bar"},
94+
{rawKey: "...\\foo\\bar", expectedKey: ".../foo/bar"},
95+
{rawKey: "foo\\bar\\..", expectedKey: "foo"},
96+
{rawKey: "foo\\bar\\..\\..\\..\\..\\.\\blerp", expectedKey: "blerp"},
97+
98+
// ── Space-only segments & misc ──────────────────────────────
99+
{rawKey: "foo/ /bar", expectedKey: "foo/ /bar"},
100+
{rawKey: "foo/ /", expectedKey: "foo"},
101+
{rawKey: "foo/ / /", expectedKey: "foo"},
102+
}
103+
104+
for i, tt := range tests {
105+
name := fmt.Sprintf("%d:%q", i, tt.rawKey)
106+
t.Run(name, func(t *testing.T) {
107+
actual, err := Sanitize(tt.rawKey)
108+
if tt.wantErr {
109+
assert.Error(t, err)
110+
} else {
111+
assert.NoError(t, err)
112+
}
113+
assert.Equal(t, tt.expectedKey, actual)
114+
})
115+
}
116+
}
117+
118+
// TestSanitizeInvariants checks properties that must hold for every sanitised
119+
// key, regardless of input.
120+
func TestSanitizeInvariants(t *testing.T) {
121+
cases := []string{
122+
"a", "a/b", "../a", "a/../b", "a/./b",
123+
"a\\b", `a\..\\b`, "/a/b/", " a ", "...",
124+
"%2e%2e/%2f", "a/b/c/../../d/e",
125+
}
126+
for _, raw := range cases {
127+
t.Run(raw, func(t *testing.T) {
128+
key, _ := Sanitize(raw)
129+
if key == "" {
130+
return // unsanitisable, nothing to check
131+
}
132+
assert.False(t, strings.HasPrefix(key, "/"), "must not start with /")
133+
assert.False(t, key == ".." || strings.HasPrefix(key, "../"), "must not start with .. segment")
134+
assert.False(t, strings.Contains(key, `\`), "must not contain backslash")
135+
assert.False(t, strings.HasSuffix(key, " "), "must not end with space")
136+
assert.False(t, strings.HasSuffix(key, "/"), "must not end with /")
137+
138+
// No segment should be "." or ".."
139+
for _, seg := range strings.Split(key, "/") {
140+
assert.NotEqual(t, ".", seg, "must not contain '.' segment")
141+
assert.NotEqual(t, "..", seg, "must not contain '..' segment")
142+
}
143+
})
144+
}
145+
}
146+
147+
// TestSanitizeIdempotent verifies that sanitising an already-clean key
148+
// returns it unchanged with no error.
149+
func TestSanitizeIdempotent(t *testing.T) {
150+
inputs := []string{
151+
"fallback_key",
152+
"some-job/some-ref",
153+
"a/b/c",
154+
"...",
155+
".../foo/bar",
156+
}
157+
for _, raw := range inputs {
158+
t.Run(raw, func(t *testing.T) {
159+
first, err1 := Sanitize(raw)
160+
assert.NoError(t, err1)
161+
162+
second, err2 := Sanitize(first)
163+
assert.NoError(t, err2)
164+
assert.Equal(t, first, second, "sanitise should be idempotent")
165+
})
166+
}
167+
}

shells/abstract.go

Lines changed: 11 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,9 @@ import (
1414
"strconv"
1515
"strings"
1616
"time"
17-
"unicode"
1817

1918
"gitlab.com/gitlab-org/gitlab-runner/cache"
19+
"gitlab.com/gitlab-org/gitlab-runner/cache/cachekey"
2020
"gitlab.com/gitlab-org/gitlab-runner/common"
2121
"gitlab.com/gitlab-org/gitlab-runner/common/spec"
2222
"gitlab.com/gitlab-org/gitlab-runner/helpers"
@@ -118,24 +118,21 @@ func newCacheConfig(build *common.Build, userKey string, keyChecks ...func(strin
118118
rawKey = build.GetAllVariables().ExpandValue(userKey)
119119
}
120120

121-
// hashers per mode: nop in unhashed mode, sha256sum in hashed mode
122-
hashers := map[bool]func(string) string{
123-
false: func(s string) string { return s },
124-
true: func(s string) string { return fmt.Sprintf("%x", sha256.Sum256([]byte(s))) },
121+
hasher := func(s string) string { return s }
122+
sanitizer := cachekey.Sanitize
123+
// if hash key support is enabled, we don't need to sanitize keys anymore
124+
if build.IsFeatureFlagOn(featureflags.HashCacheKeys) {
125+
hasher = func(s string) string { return fmt.Sprintf("%x", sha256.Sum256([]byte(s))) }
126+
sanitizer = func(s string) (string, error) { return s, nil }
125127
}
126-
// sanitizers per mode: real sanitizer in unhashed mode, nop sanitizer in hashed mode
127-
sanitizers := map[bool]func(string) (string, error){
128-
false: sanitizeCacheKey,
129-
true: func(s string) (string, error) { return s, nil },
130-
}
131-
132-
hashCacheKeys := build.IsFeatureFlagOn(featureflags.HashCacheKeys)
133-
hasher, sanitizer := hashers[hashCacheKeys], sanitizers[hashCacheKeys]
134128

135129
var warning string
136130
humanKey, err := sanitizer(rawKey)
137-
if err != nil {
131+
switch {
132+
case err != nil:
138133
warning = err.Error()
134+
case humanKey != rawKey:
135+
warning = fmt.Sprintf("cache key %q sanitized to %q", rawKey, humanKey)
139136
}
140137

141138
for _, check := range keyChecks {
@@ -304,68 +301,6 @@ func (b *AbstractShell) extractCacheOrFallbackCachesWrapper(
304301
})
305302
}
306303

307-
// sanitizeCacheKey replicates some cache key rules from GitLab and adds additional validations for known-bad cache
308-
// keys.
309-
// It accepts that the cache keys can be paths and:
310-
// - replaces the URL encoded version of `/` & `.` with their ASCII ones
311-
// - replaces all `\` with `/`
312-
// - ensures there are no path traversals possible "outside of the base path"
313-
// - ensures the last path element is not empty or ends in a space
314-
func sanitizeCacheKey(cacheKey string) (sanitizedKey string, err error) {
315-
if cacheKey == "" {
316-
return "", nil
317-
}
318-
319-
replaceEncodedSlashes := func(s string) string {
320-
for _, slash := range []string{"%2f", "%2F"} {
321-
s = strings.ReplaceAll(s, slash, "/")
322-
}
323-
return s
324-
}
325-
replaceEncodedDots := func(s string) string {
326-
for _, dot := range []string{"%2e", "%2E"} {
327-
s = strings.ReplaceAll(s, dot, ".")
328-
}
329-
return s
330-
}
331-
toSlash := func(s string) string {
332-
return strings.ReplaceAll(s, `\`, `/`)
333-
}
334-
trimSpaceRight := func(s string) string {
335-
return strings.TrimRightFunc(s, unicode.IsSpace)
336-
}
337-
338-
// We root the path, so that path traversals outside of the base path, e.g. resulting in a key like `../../foo/bar`,
339-
// aren't possible
340-
// Note: path.Join calls path.Clean internally
341-
cleaned := path.Join(
342-
"/", toSlash(replaceEncodedSlashes(replaceEncodedDots(cacheKey))),
343-
)
344-
345-
var key string
346-
for cleaned != "" {
347-
dir, file := path.Split(cleaned)
348-
file = trimSpaceRight(file)
349-
350-
if file == "" {
351-
cleaned = dir[:len(dir)-1] // cut off the trailing `/` from dir and continue with that
352-
continue
353-
}
354-
355-
key = path.Join(dir[1:], file) // cut off the leading `/` from dir, because we rooted the path initially
356-
break
357-
}
358-
359-
if key == "" {
360-
return "", fmt.Errorf("cache key %q could not be sanitized", cacheKey)
361-
}
362-
if key != cacheKey {
363-
return key, fmt.Errorf("cache key %q sanitized to %q", cacheKey, key)
364-
}
365-
366-
return key, nil
367-
}
368-
369304
func (b *AbstractShell) addExtractCacheCommand(
370305
ctx context.Context,
371306
w ShellWriter,

0 commit comments

Comments
 (0)