Skip to content

Commit 879d7a9

Browse files
Add further compatibility fixes for normalizeURI (#4346)
Suggested by @emcfarlane. Follow up to #4345. Ref: https://github.com/microsoft/vscode-uri
1 parent 743f729 commit 879d7a9

2 files changed

Lines changed: 79 additions & 22 deletions

File tree

private/buf/buflsp/uri.go

Lines changed: 41 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -15,35 +15,57 @@
1515
package buflsp
1616

1717
import (
18+
"net/url"
1819
"strings"
1920

2021
"go.lsp.dev/protocol"
2122
"go.lsp.dev/uri"
2223
)
2324

24-
// normalizeURI ensures that URIs are properly percent-encoded for LSP compatibility.
25-
//
26-
// The go.lsp.dev/uri package (which uses Go's net/url) follows RFC 3986 strictly and
27-
// allows '@' unencoded in path components. However, VS Code's LSP client uses the
28-
// microsoft/vscode-uri package which encodes '@' as '%40' everywhere to avoid ambiguity
29-
// with the authority component separator (user@host).
30-
//
31-
// Additionally, on Windows, the package also encodes ':' as '%3A' in drive letter paths
32-
// (e.g., 'file:///d:/path' becomes 'file:///d%3A/path').
25+
// FilePathToURI converts a file path to a properly encoded URI.
26+
func FilePathToURI(path string) protocol.URI {
27+
return normalizeURI(uri.File(path))
28+
}
29+
30+
// normalizeURI encodes a URI to match VS Code's microsoft/vscode-uri behavior.
3331
//
34-
// When URIs don't match exactly, LSP operations like go-to-definition fail because
35-
// the client's URI (with %40) doesn't match the server's URI (with @).
32+
// Go's net/url follows RFC 3986 and permits '@' and ':' unencoded in path
33+
// components (valid pchar); vscode-uri always encodes them. vscode-uri also
34+
// lowercases Windows drive letters. When URIs differ, LSP operations like
35+
// go-to-definition silently fail because the client and server URIs don't match.
3636
func normalizeURI(u protocol.URI) protocol.URI {
37-
normalized := strings.ReplaceAll(string(u), "@", "%40")
37+
str := string(u)
3838

39-
if after, found := strings.CutPrefix(normalized, "file:///"); found {
40-
normalized = "file:///" + strings.ReplaceAll(after, ":", "%3A")
39+
after, found := strings.CutPrefix(str, "file:///")
40+
if !found {
41+
// Non-file URIs: only encode @.
42+
return protocol.URI(strings.ReplaceAll(str, "@", "%40"))
4143
}
4244

43-
return protocol.URI(normalized)
44-
}
45+
segments := strings.Split(after, "/")
46+
for i, segment := range segments {
47+
// Decode first to avoid double-encoding already-normalized URIs.
48+
// PathUnescape only fails on malformed sequences (e.g. %2G); falling
49+
// back to the raw segment is the best we can do.
50+
decoded, err := url.PathUnescape(segment)
51+
if err != nil {
52+
decoded = segment
53+
}
54+
// PathEscape encodes spaces as %20 (not +) and most special chars,
55+
// but permits '@' and ':' as RFC 3986 pchar. Encode those manually.
56+
encoded := url.PathEscape(decoded)
57+
encoded = strings.ReplaceAll(encoded, "@", "%40")
58+
encoded = strings.ReplaceAll(encoded, ":", "%3A")
59+
segments[i] = encoded
60+
}
4561

46-
// FilePathToURI converts a file path to a properly encoded URI.
47-
func FilePathToURI(path string) protocol.URI {
48-
return normalizeURI(uri.File(path))
62+
// vscode-uri lowercases Windows drive letters: C%3A → c%3A.
63+
// 'A'+32 == 'a' by ASCII identity; segments[0] is e.g. "C%3A" (4 bytes).
64+
if len(segments[0]) == 4 &&
65+
segments[0][0] >= 'A' && segments[0][0] <= 'Z' &&
66+
segments[0][1:] == "%3A" {
67+
segments[0] = string(segments[0][0]+32) + "%3A"
68+
}
69+
70+
return protocol.URI("file:///" + strings.Join(segments, "/"))
4971
}

private/buf/buflsp/uri_test.go

Lines changed: 38 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,9 +40,9 @@ func TestNormalizeURI(t *testing.T) {
4040
expected: "file:///home/user%40host/project/foo.proto",
4141
},
4242
{
43-
name: "windows-drive-letter-colon-encoded",
43+
name: "windows-drive-letter-uppercased-and-colon-encoded",
4444
input: "file:///C:/Users/project/foo.proto",
45-
expected: "file:///C%3A/Users/project/foo.proto",
45+
expected: "file:///c%3A/Users/project/foo.proto",
4646
},
4747
{
4848
name: "windows-lowercase-drive-letter-colon-encoded",
@@ -54,10 +54,45 @@ func TestNormalizeURI(t *testing.T) {
5454
input: "untitled:Untitled-1",
5555
expected: "untitled:Untitled-1",
5656
},
57+
{
58+
name: "non-file-uri-at-sign-encoded",
59+
input: "untitled:user@host",
60+
expected: "untitled:user%40host",
61+
},
5762
{
5863
name: "at-sign-and-windows-drive-letter-both-encoded",
5964
input: "file:///C:/Users/user@host/foo.proto",
60-
expected: "file:///C%3A/Users/user%40host/foo.proto",
65+
expected: "file:///c%3A/Users/user%40host/foo.proto",
66+
},
67+
{
68+
name: "space-in-path-encoded-as-percent-20",
69+
input: "file:///Users/bufbot/My Documents/foo.proto",
70+
expected: "file:///Users/bufbot/My%20Documents/foo.proto",
71+
},
72+
{
73+
name: "already-encoded-space-is-idempotent",
74+
input: "file:///Users/bufbot/My%20Documents/foo.proto",
75+
expected: "file:///Users/bufbot/My%20Documents/foo.proto",
76+
},
77+
{
78+
name: "already-encoded-at-sign-is-idempotent",
79+
input: "file:///home/user%40host/project/foo.proto",
80+
expected: "file:///home/user%40host/project/foo.proto",
81+
},
82+
{
83+
name: "already-encoded-windows-drive-letter-lowercased",
84+
input: "file:///C%3A/Users/project/foo.proto",
85+
expected: "file:///c%3A/Users/project/foo.proto",
86+
},
87+
{
88+
name: "percent-literal-in-filename-not-double-encoded",
89+
input: "file:///path/100%25done/foo.proto",
90+
expected: "file:///path/100%25done/foo.proto",
91+
},
92+
{
93+
name: "space-at-sign-and-windows-drive-letter",
94+
input: "file:///C:/Users/user@host/My Documents/foo.proto",
95+
expected: "file:///c%3A/Users/user%40host/My%20Documents/foo.proto",
6196
},
6297
}
6398
for _, test := range tests {

0 commit comments

Comments
 (0)