Skip to content

Commit 5e9ae3d

Browse files
committed
Refactor extract cache key sanitization into dedicated package
Move sanitizeCacheKey from shells/abstract.go into a new cache/cachekey package as cachekey.Sanitize. The sanitization logic is preserved as-is, with two minor implementation tidies: - Use strings.NewReplacer for a single-pass normalisation of percent-encoded slashes/dots and backslashes, replacing the sequential strings.ReplaceAll calls. - Trim trailing whitespace segments with a simple backward loop over split parts, replacing the path.Split-based walk. The "sanitized to" warning is no longer returned as an error from the sanitizer itself. Instead, the caller in newCacheConfig detects when the key was modified and generates the warning. This separates validation errors from informational warnings.
1 parent 8f7bc64 commit 5e9ae3d

4 files changed

Lines changed: 235 additions & 166 deletions

File tree

cache/cachekey/cachekey.go

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
package cachekey
2+
3+
import (
4+
"fmt"
5+
"path"
6+
"strings"
7+
"unicode"
8+
)
9+
10+
// normaliser decodes URL-encoded slashes and dots, and converts backslashes to
11+
// forward slashes in a single pass.
12+
var normaliser = strings.NewReplacer(
13+
"%2f", "/",
14+
"%2F", "/",
15+
"%2e", ".",
16+
"%2E", ".",
17+
`\`, "/",
18+
)
19+
20+
// Sanitize validates and normalises a cache key.
21+
// Cache keys may contain path separators. The function:
22+
// - decodes URL-encoded '/' (%2f) and '.' (%2e) characters
23+
// - replaces all '\' with '/'
24+
// - resolves path traversals (., ..) within a virtual root
25+
// - strips trailing whitespace from the rightmost path segments,
26+
// removing any that become empty after trimming
27+
func Sanitize(cacheKey string) (string, error) {
28+
if cacheKey == "" {
29+
return "", nil
30+
}
31+
32+
// Decode percent-encoded chars and normalise separators, then
33+
// resolve traversals against a virtual root so ".." can never
34+
// escape beyond the root.
35+
cleaned := path.Clean("/" + normaliser.Replace(cacheKey))
36+
37+
// Strip the leading "/" we added, split into segments, then walk
38+
// backwards trimming trailing whitespace from the rightmost
39+
// segments—dropping any that become empty.
40+
parts := strings.Split(cleaned[1:], "/")
41+
n := len(parts)
42+
for n > 0 {
43+
parts[n-1] = strings.TrimRightFunc(parts[n-1], unicode.IsSpace)
44+
if parts[n-1] != "" {
45+
break
46+
}
47+
n--
48+
}
49+
50+
key := strings.Join(parts[:n], "/")
51+
52+
if key == "" {
53+
return "", fmt.Errorf("cache key %q could not be sanitized", cacheKey)
54+
}
55+
56+
return key, nil
57+
}

cache/cachekey/cachekey_test.go

Lines changed: 167 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,167 @@
1+
//go:build !integration
2+
3+
package cachekey
4+
5+
import (
6+
"fmt"
7+
"strings"
8+
"testing"
9+
10+
"github.com/stretchr/testify/assert"
11+
)
12+
13+
func TestSanitize(t *testing.T) {
14+
tests := []struct {
15+
rawKey string
16+
expectedKey string
17+
wantErr bool
18+
}{
19+
// ── Empty / identity ────────────────────────────────────────
20+
{rawKey: ""},
21+
{rawKey: "fallback_key", expectedKey: "fallback_key"},
22+
{rawKey: "some-job/some-ref", expectedKey: "some-job/some-ref"},
23+
{rawKey: ".../....", expectedKey: ".../...."},
24+
{rawKey: "...", expectedKey: "..."},
25+
26+
// ── Trailing whitespace / slashes / backslashes ─────────────
27+
{rawKey: "fallback_key/", expectedKey: "fallback_key"},
28+
{rawKey: "fallback_key ", expectedKey: "fallback_key"},
29+
{rawKey: "fallback_key\\", expectedKey: "fallback_key"},
30+
{rawKey: "fallback_key/ \\", expectedKey: "fallback_key"},
31+
{rawKey: "fallback_key/ / \\ \\", expectedKey: "fallback_key"},
32+
{rawKey: "fallback_key/o", expectedKey: "fallback_key/o"},
33+
{rawKey: "fallback_key / \\o", expectedKey: "fallback_key / /o"},
34+
{rawKey: "\t foo bar \t\r", expectedKey: "\t foo bar"},
35+
{rawKey: " foo / bar ", expectedKey: " foo / bar"},
36+
{rawKey: "foo\r", expectedKey: "foo"},
37+
{rawKey: "foo\t", expectedKey: "foo"},
38+
{rawKey: "foo \t \r ", expectedKey: "foo"},
39+
40+
// ── Completely unsanitisable ────────────────────────────────
41+
{rawKey: "\\", wantErr: true},
42+
{rawKey: "\\.", wantErr: true},
43+
{rawKey: "/", wantErr: true},
44+
{rawKey: " ", wantErr: true},
45+
{rawKey: ".", wantErr: true},
46+
{rawKey: "..", wantErr: true},
47+
{rawKey: " / ", wantErr: true},
48+
{rawKey: "//", wantErr: true},
49+
{rawKey: `//\`, wantErr: true},
50+
{rawKey: "../.", wantErr: true},
51+
{rawKey: "foo\\bar\\..\\..", wantErr: true},
52+
{rawKey: "foo/bar/../..", wantErr: true},
53+
{rawKey: " \t\r\n", wantErr: true},
54+
55+
// ── URL-encoded slashes (%2f / %2F) ────────────────────────
56+
{rawKey: "something %2F something", expectedKey: "something / something"},
57+
{rawKey: "something %2f something", expectedKey: "something / something"},
58+
{rawKey: "some%2f../job/some/ref/.", expectedKey: "job/some/ref"},
59+
60+
// ── URL-encoded dots (%2e / %2E) ───────────────────────────
61+
{rawKey: "%2E", wantErr: true},
62+
{rawKey: "%2E%2E", wantErr: true},
63+
{rawKey: "%2E%2E%2E", expectedKey: "..."},
64+
{rawKey: "%2e", wantErr: true},
65+
{rawKey: "%2e%2E", wantErr: true},
66+
{rawKey: ".%2E", wantErr: true},
67+
{rawKey: "%2e.", wantErr: true},
68+
{rawKey: "%2E%2e%2E", expectedKey: "..."},
69+
70+
// %5C is left as-is (literal percent-encoded backslash is fine).
71+
{rawKey: "%5C", expectedKey: "%5C"},
72+
{rawKey: "%5c", expectedKey: "%5c"},
73+
74+
// ── Forward-slash path traversal ────────────────────────────
75+
{rawKey: "foo/./bar", expectedKey: "foo/bar"},
76+
{rawKey: "foo/blipp/../bar", expectedKey: "foo/bar"},
77+
{rawKey: "/foo/bar", expectedKey: "foo/bar"},
78+
{rawKey: "//foo/bar", expectedKey: "foo/bar"},
79+
{rawKey: "./foo/bar", expectedKey: "foo/bar"},
80+
{rawKey: "../foo/bar", expectedKey: "foo/bar"},
81+
{rawKey: ".../foo/bar", expectedKey: ".../foo/bar"},
82+
{rawKey: "foo/bar/..", expectedKey: "foo"},
83+
{rawKey: "foo/bar/../../../.././blerp", expectedKey: "blerp"},
84+
{rawKey: "a/b/c/../../d", expectedKey: "a/d"},
85+
86+
// ── Backslash path traversal ────────────────────────────────
87+
{rawKey: `job\name/git\ref`, expectedKey: "job/name/git/ref"},
88+
{rawKey: "foo\\.\\bar", expectedKey: "foo/bar"},
89+
{rawKey: "foo\\blipp\\..\\bar", expectedKey: "foo/bar"},
90+
{rawKey: "\\foo\\bar", expectedKey: "foo/bar"},
91+
{rawKey: "\\\\foo\\bar", expectedKey: "foo/bar"},
92+
{rawKey: ".\\foo\\bar", expectedKey: "foo/bar"},
93+
{rawKey: "..\\foo\\bar", expectedKey: "foo/bar"},
94+
{rawKey: "...\\foo\\bar", expectedKey: ".../foo/bar"},
95+
{rawKey: "foo\\bar\\..", expectedKey: "foo"},
96+
{rawKey: "foo\\bar\\..\\..\\..\\..\\.\\blerp", expectedKey: "blerp"},
97+
98+
// ── Space-only segments & misc ──────────────────────────────
99+
{rawKey: "foo/ /bar", expectedKey: "foo/ /bar"},
100+
{rawKey: "foo/ /", expectedKey: "foo"},
101+
{rawKey: "foo/ / /", expectedKey: "foo"},
102+
}
103+
104+
for i, tt := range tests {
105+
name := fmt.Sprintf("%d:%q", i, tt.rawKey)
106+
t.Run(name, func(t *testing.T) {
107+
actual, err := Sanitize(tt.rawKey)
108+
if tt.wantErr {
109+
assert.Error(t, err)
110+
} else {
111+
assert.NoError(t, err)
112+
}
113+
assert.Equal(t, tt.expectedKey, actual)
114+
})
115+
}
116+
}
117+
118+
// TestSanitizeInvariants checks properties that must hold for every sanitised
119+
// key, regardless of input.
120+
func TestSanitizeInvariants(t *testing.T) {
121+
cases := []string{
122+
"a", "a/b", "../a", "a/../b", "a/./b",
123+
"a\\b", `a\..\\b`, "/a/b/", " a ", "...",
124+
"%2e%2e/%2f", "a/b/c/../../d/e",
125+
}
126+
for _, raw := range cases {
127+
t.Run(raw, func(t *testing.T) {
128+
key, _ := Sanitize(raw)
129+
if key == "" {
130+
return // unsanitisable, nothing to check
131+
}
132+
assert.False(t, strings.HasPrefix(key, "/"), "must not start with /")
133+
assert.False(t, key == ".." || strings.HasPrefix(key, "../"), "must not start with .. segment")
134+
assert.False(t, strings.Contains(key, `\`), "must not contain backslash")
135+
assert.False(t, strings.HasSuffix(key, " "), "must not end with space")
136+
assert.False(t, strings.HasSuffix(key, "/"), "must not end with /")
137+
138+
// No segment should be "." or ".."
139+
for _, seg := range strings.Split(key, "/") {
140+
assert.NotEqual(t, ".", seg, "must not contain '.' segment")
141+
assert.NotEqual(t, "..", seg, "must not contain '..' segment")
142+
}
143+
})
144+
}
145+
}
146+
147+
// TestSanitizeIdempotent verifies that sanitising an already-clean key
148+
// returns it unchanged with no error.
149+
func TestSanitizeIdempotent(t *testing.T) {
150+
inputs := []string{
151+
"fallback_key",
152+
"some-job/some-ref",
153+
"a/b/c",
154+
"...",
155+
".../foo/bar",
156+
}
157+
for _, raw := range inputs {
158+
t.Run(raw, func(t *testing.T) {
159+
first, err1 := Sanitize(raw)
160+
assert.NoError(t, err1)
161+
162+
second, err2 := Sanitize(first)
163+
assert.NoError(t, err2)
164+
assert.Equal(t, first, second, "sanitise should be idempotent")
165+
})
166+
}
167+
}

shells/abstract.go

Lines changed: 11 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,9 @@ import (
1414
"strconv"
1515
"strings"
1616
"time"
17-
"unicode"
1817

1918
"gitlab.com/gitlab-org/gitlab-runner/cache"
19+
"gitlab.com/gitlab-org/gitlab-runner/cache/cachekey"
2020
"gitlab.com/gitlab-org/gitlab-runner/common"
2121
"gitlab.com/gitlab-org/gitlab-runner/common/spec"
2222
"gitlab.com/gitlab-org/gitlab-runner/helpers"
@@ -118,24 +118,21 @@ func newCacheConfig(build *common.Build, userKey string, keyChecks ...func(strin
118118
rawKey = build.GetAllVariables().ExpandValue(userKey)
119119
}
120120

121-
// hashers per mode: nop in unhashed mode, sha256sum in hashed mode
122-
hashers := map[bool]func(string) string{
123-
false: func(s string) string { return s },
124-
true: func(s string) string { return fmt.Sprintf("%x", sha256.Sum256([]byte(s))) },
121+
hasher := func(s string) string { return s }
122+
sanitizer := cachekey.Sanitize
123+
// if hash key support is enabled, we don't need to sanitize keys anymore
124+
if build.IsFeatureFlagOn(featureflags.HashCacheKeys) {
125+
hasher = func(s string) string { return fmt.Sprintf("%x", sha256.Sum256([]byte(s))) }
126+
sanitizer = func(s string) (string, error) { return s, nil }
125127
}
126-
// sanitizers per mode: real sanitizer in unhashed mode, nop sanitizer in hashed mode
127-
sanitizers := map[bool]func(string) (string, error){
128-
false: sanitizeCacheKey,
129-
true: func(s string) (string, error) { return s, nil },
130-
}
131-
132-
hashCacheKeys := build.IsFeatureFlagOn(featureflags.HashCacheKeys)
133-
hasher, sanitizer := hashers[hashCacheKeys], sanitizers[hashCacheKeys]
134128

135129
var warning string
136130
humanKey, err := sanitizer(rawKey)
137-
if err != nil {
131+
switch {
132+
case err != nil:
138133
warning = err.Error()
134+
case humanKey != rawKey:
135+
warning = fmt.Sprintf("cache key %q sanitized to %q", rawKey, humanKey)
139136
}
140137

141138
for _, check := range keyChecks {
@@ -304,68 +301,6 @@ func (b *AbstractShell) extractCacheOrFallbackCachesWrapper(
304301
})
305302
}
306303

307-
// sanitizeCacheKey replicates some cache key rules from GitLab and adds additional validations for known-bad cache
308-
// keys.
309-
// It accepts that the cache keys can be paths and:
310-
// - replaces the URL encoded version of `/` & `.` with their ASCII ones
311-
// - replaces all `\` with `/`
312-
// - ensures there are no path traversals possible "outside of the base path"
313-
// - ensures the last path element is not empty or ends in a space
314-
func sanitizeCacheKey(cacheKey string) (sanitizedKey string, err error) {
315-
if cacheKey == "" {
316-
return "", nil
317-
}
318-
319-
replaceEncodedSlashes := func(s string) string {
320-
for _, slash := range []string{"%2f", "%2F"} {
321-
s = strings.ReplaceAll(s, slash, "/")
322-
}
323-
return s
324-
}
325-
replaceEncodedDots := func(s string) string {
326-
for _, dot := range []string{"%2e", "%2E"} {
327-
s = strings.ReplaceAll(s, dot, ".")
328-
}
329-
return s
330-
}
331-
toSlash := func(s string) string {
332-
return strings.ReplaceAll(s, `\`, `/`)
333-
}
334-
trimSpaceRight := func(s string) string {
335-
return strings.TrimRightFunc(s, unicode.IsSpace)
336-
}
337-
338-
// We root the path, so that path traversals outside of the base path, e.g. resulting in a key like `../../foo/bar`,
339-
// aren't possible
340-
// Note: path.Join calls path.Clean internally
341-
cleaned := path.Join(
342-
"/", toSlash(replaceEncodedSlashes(replaceEncodedDots(cacheKey))),
343-
)
344-
345-
var key string
346-
for cleaned != "" {
347-
dir, file := path.Split(cleaned)
348-
file = trimSpaceRight(file)
349-
350-
if file == "" {
351-
cleaned = dir[:len(dir)-1] // cut off the trailing `/` from dir and continue with that
352-
continue
353-
}
354-
355-
key = path.Join(dir[1:], file) // cut off the leading `/` from dir, because we rooted the path initially
356-
break
357-
}
358-
359-
if key == "" {
360-
return "", fmt.Errorf("cache key %q could not be sanitized", cacheKey)
361-
}
362-
if key != cacheKey {
363-
return key, fmt.Errorf("cache key %q sanitized to %q", cacheKey, key)
364-
}
365-
366-
return key, nil
367-
}
368-
369304
func (b *AbstractShell) addExtractCacheCommand(
370305
ctx context.Context,
371306
w ShellWriter,

0 commit comments

Comments
 (0)