Skip to content

Commit 44791e1

Browse files
lcianadinauer
andauthored
Use java.net.URI for parsing URLs in UrlUtils (#4210)
* refactor: use `java.net.URI` for parsing in `UrlUtils` * reorganize tests * add tests * tests * changelog * Update sentry/src/main/java/io/sentry/util/UrlUtils.java Co-authored-by: Alexander Dinauer <adinauer@users.noreply.github.com> * Update CHANGELOG.md --------- Co-authored-by: Alexander Dinauer <adinauer@users.noreply.github.com>
1 parent 1a52aa2 commit 44791e1

File tree

3 files changed

+192
-109
lines changed

3 files changed

+192
-109
lines changed

CHANGELOG.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,11 @@
44

55
- The SDK now automatically propagates the trace-context to the native layer. This allows to connect errors on different layers of the application. ([#4137](https://github.com/getsentry/sentry-java/pull/4137))
66

7+
### Behavioural Changes
8+
9+
- Use `java.net.URI` for parsing URLs in `UrlUtils` ([#4210](https://github.com/getsentry/sentry-java/pull/4210))
10+
- This could affect grouping for issues with messages containing URLs that fall in known corner cases that were handled incorrectly previously (e.g. email in URL path)
11+
712
### Dependencies
813

914
- Bump Native SDK from v0.7.20 to v0.8.1 ([#4137](https://github.com/getsentry/sentry-java/pull/4137))

sentry/src/main/java/io/sentry/util/UrlUtils.java

Lines changed: 32 additions & 103 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,7 @@
33
import io.sentry.ISpan;
44
import io.sentry.SpanDataConvention;
55
import io.sentry.protocol.Request;
6-
import java.net.MalformedURLException;
7-
import java.net.URL;
8-
import java.util.regex.Matcher;
9-
import java.util.regex.Pattern;
6+
import java.net.URI;
107
import org.jetbrains.annotations.ApiStatus;
118
import org.jetbrains.annotations.NotNull;
129
import org.jetbrains.annotations.Nullable;
@@ -15,123 +12,55 @@
1512
public final class UrlUtils {
1613

1714
public static final @NotNull String SENSITIVE_DATA_SUBSTITUTE = "[Filtered]";
18-
private static final @NotNull Pattern AUTH_REGEX = Pattern.compile("(.+://)(.*@)(.*)");
1915

2016
public static @Nullable UrlDetails parseNullable(final @Nullable String url) {
21-
if (url == null) {
22-
return null;
23-
}
24-
25-
return parse(url);
17+
return url == null ? null : parse(url);
2618
}
2719

2820
public static @NotNull UrlDetails parse(final @NotNull String url) {
29-
if (isAbsoluteUrl(url)) {
30-
return splitAbsoluteUrl(url);
31-
} else {
32-
return splitRelativeUrl(url);
33-
}
34-
}
35-
36-
private static boolean isAbsoluteUrl(@NotNull String url) {
37-
return url.contains("://");
38-
}
39-
40-
private static @NotNull UrlDetails splitRelativeUrl(final @NotNull String url) {
41-
final int queryParamSeparatorIndex = url.indexOf("?");
42-
final int fragmentSeparatorIndex = url.indexOf("#");
43-
44-
final @Nullable String baseUrl =
45-
extractBaseUrl(url, queryParamSeparatorIndex, fragmentSeparatorIndex);
46-
final @Nullable String query =
47-
extractQuery(url, queryParamSeparatorIndex, fragmentSeparatorIndex);
48-
final @Nullable String fragment = extractFragment(url, fragmentSeparatorIndex);
21+
try {
22+
URI uri = new URI(url);
23+
if (uri.isAbsolute() && !isValidAbsoluteUrl(uri)) {
24+
return new UrlDetails(null, null, null);
25+
}
4926

50-
return new UrlDetails(baseUrl, query, fragment);
51-
}
27+
final @NotNull String schemeAndSeparator =
28+
uri.getScheme() == null ? "" : (uri.getScheme() + "://");
29+
final @NotNull String authority = uri.getRawAuthority() == null ? "" : uri.getRawAuthority();
30+
final @NotNull String path = uri.getRawPath() == null ? "" : uri.getRawPath();
31+
final @Nullable String query = uri.getRawQuery();
32+
final @Nullable String fragment = uri.getRawFragment();
5233

53-
private static @Nullable String extractBaseUrl(
54-
final @NotNull String url,
55-
final int queryParamSeparatorIndex,
56-
final int fragmentSeparatorIndex) {
57-
if (queryParamSeparatorIndex >= 0) {
58-
return url.substring(0, queryParamSeparatorIndex).trim();
59-
} else if (fragmentSeparatorIndex >= 0) {
60-
return url.substring(0, fragmentSeparatorIndex).trim();
61-
} else {
62-
return url;
63-
}
64-
}
34+
final @NotNull String filteredUrl = schemeAndSeparator + filterUserInfo(authority) + path;
6535

66-
private static @Nullable String extractQuery(
67-
final @NotNull String url,
68-
final int queryParamSeparatorIndex,
69-
final int fragmentSeparatorIndex) {
70-
if (queryParamSeparatorIndex > 0) {
71-
if (fragmentSeparatorIndex > 0 && fragmentSeparatorIndex > queryParamSeparatorIndex) {
72-
return url.substring(queryParamSeparatorIndex + 1, fragmentSeparatorIndex).trim();
73-
} else {
74-
return url.substring(queryParamSeparatorIndex + 1).trim();
75-
}
76-
} else {
77-
return null;
78-
}
79-
}
80-
81-
private static @Nullable String extractFragment(
82-
final @NotNull String url, final int fragmentSeparatorIndex) {
83-
if (fragmentSeparatorIndex > 0) {
84-
return url.substring(fragmentSeparatorIndex + 1).trim();
85-
} else {
86-
return null;
36+
return new UrlDetails(filteredUrl, query, fragment);
37+
} catch (Exception e) {
38+
return new UrlDetails(null, null, null);
8739
}
8840
}
8941

90-
private static @NotNull UrlDetails splitAbsoluteUrl(final @NotNull String url) {
42+
private static boolean isValidAbsoluteUrl(final @NotNull URI uri) {
9143
try {
92-
final @NotNull String filteredUrl = urlWithAuthRemoved(url);
93-
final @NotNull URL urlObj = new URL(url);
94-
final @NotNull String baseUrl = baseUrlOnly(filteredUrl);
95-
if (baseUrl.contains("#")) {
96-
// url considered malformed because it has fragment
97-
return new UrlDetails(null, null, null);
98-
} else {
99-
final @Nullable String query = urlObj.getQuery();
100-
final @Nullable String fragment = urlObj.getRef();
101-
return new UrlDetails(baseUrl, query, fragment);
102-
}
103-
} catch (MalformedURLException e) {
104-
return new UrlDetails(null, null, null);
44+
uri.toURL();
45+
} catch (Exception e) {
46+
return false;
10547
}
48+
return true;
10649
}
10750

108-
private static @NotNull String urlWithAuthRemoved(final @NotNull String url) {
109-
final @NotNull Matcher userInfoMatcher = AUTH_REGEX.matcher(url);
110-
if (userInfoMatcher.matches() && userInfoMatcher.groupCount() == 3) {
111-
final @NotNull String userInfoString = userInfoMatcher.group(2);
112-
final @NotNull String replacementString =
113-
userInfoString.contains(":")
114-
? (SENSITIVE_DATA_SUBSTITUTE + ":" + SENSITIVE_DATA_SUBSTITUTE + "@")
115-
: (SENSITIVE_DATA_SUBSTITUTE + "@");
116-
return userInfoMatcher.group(1) + replacementString + userInfoMatcher.group(3);
117-
} else {
51+
private static @NotNull String filterUserInfo(final @NotNull String url) {
52+
if (!url.contains("@")) {
11853
return url;
11954
}
120-
}
121-
122-
private static @NotNull String baseUrlOnly(final @NotNull String url) {
123-
final int queryParamSeparatorIndex = url.indexOf("?");
124-
125-
if (queryParamSeparatorIndex >= 0) {
126-
return url.substring(0, queryParamSeparatorIndex).trim();
127-
} else {
128-
final int fragmentSeparatorIndex = url.indexOf("#");
129-
if (fragmentSeparatorIndex >= 0) {
130-
return url.substring(0, fragmentSeparatorIndex).trim();
131-
} else {
132-
return url;
133-
}
55+
if (url.startsWith("@")) {
56+
return SENSITIVE_DATA_SUBSTITUTE + url;
13457
}
58+
final @NotNull String userInfo = url.substring(0, url.indexOf('@'));
59+
final @NotNull String filteredUserInfo =
60+
userInfo.contains(":")
61+
? (SENSITIVE_DATA_SUBSTITUTE + ":" + SENSITIVE_DATA_SUBSTITUTE)
62+
: SENSITIVE_DATA_SUBSTITUTE;
63+
return filteredUserInfo + url.substring(url.indexOf('@'));
13564
}
13665

13766
public static final class UrlDetails {

sentry/src/test/java/io/sentry/util/UrlUtilsTest.kt

Lines changed: 155 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,7 @@ class UrlUtilsTest {
142142
}
143143

144144
@Test
145-
fun `splits url without query or fragment and no authority`() {
145+
fun `splits url without query or fragment and no user info`() {
146146
val urlDetails = UrlUtils.parse(
147147
"https://sentry.io"
148148
)
@@ -161,30 +161,179 @@ class UrlUtilsTest {
161161
assertEquals("top", urlDetails.fragment)
162162
}
163163

164+
// Fragment is allowed to contain '?' according to RFC 3986
164165
@Test
165-
fun `no details extracted with query after fragment`() {
166+
fun `extracts details with question mark after fragment`() {
166167
val urlDetails = UrlUtils.parse(
167168
"https://user:password@sentry.io#fragment?q=1&s=2&token=secret"
168169
)
170+
assertEquals("https://[Filtered]:[Filtered]@sentry.io", urlDetails.url)
171+
assertNull(urlDetails.query)
172+
assertEquals("fragment?q=1&s=2&token=secret", urlDetails.fragment)
173+
}
174+
175+
@Test
176+
fun `extracts details with question mark after fragment without user info`() {
177+
val urlDetails = UrlUtils.parse(
178+
"https://sentry.io#fragment?q=1&s=2&token=secret"
179+
)
180+
assertEquals("https://sentry.io", urlDetails.url)
181+
assertNull(urlDetails.query)
182+
assertEquals("fragment?q=1&s=2&token=secret", urlDetails.fragment)
183+
}
184+
185+
@Test
186+
fun `no details extracted from malformed url due to invalid protocol`() {
187+
val urlDetails = UrlUtils.parse(
188+
"htps://user@sentry.io#fragment?q=1&s=2&token=secret"
189+
)
169190
assertNull(urlDetails.url)
170191
assertNull(urlDetails.query)
171192
assertNull(urlDetails.fragment)
172193
}
173194

174195
@Test
175-
fun `no details extracted with query after fragment without authority`() {
196+
fun `no details extracted from malformed url due to # symbol in fragment`() {
176197
val urlDetails = UrlUtils.parse(
177-
"https://sentry.io#fragment?q=1&s=2&token=secret"
198+
"https://example.com#hello#fragment"
178199
)
179200
assertNull(urlDetails.url)
180201
assertNull(urlDetails.query)
181202
assertNull(urlDetails.fragment)
182203
}
183204

184205
@Test
185-
fun `no details extracted from malformed url`() {
206+
fun `strips empty user info`() {
186207
val urlDetails = UrlUtils.parse(
187-
"htps://user@sentry.io#fragment?q=1&s=2&token=secret"
208+
"https://@sentry.io?query=a#fragment?q=1&s=2&token=secret"
209+
)
210+
assertEquals("https://[Filtered]@sentry.io", urlDetails.url)
211+
assertEquals("query=a", urlDetails.query)
212+
assertEquals("fragment?q=1&s=2&token=secret", urlDetails.fragment)
213+
}
214+
215+
@Test
216+
fun `extracts details from relative url with leading @ symbol`() {
217+
val urlDetails = UrlUtils.parse(
218+
"@@sentry.io/pages/10?query=a#fragment?q=1&s=2&token=secret"
219+
)
220+
assertEquals("@@sentry.io/pages/10", urlDetails.url)
221+
assertEquals("query=a", urlDetails.query)
222+
assertEquals("fragment?q=1&s=2&token=secret", urlDetails.fragment)
223+
}
224+
225+
@Test
226+
fun `extracts details from relative url with leading question mark`() {
227+
val urlDetails = UrlUtils.parse(
228+
"?query=a#fragment?q=1&s=2&token=secret"
229+
)
230+
assertEquals("", urlDetails.url)
231+
assertEquals("query=a", urlDetails.query)
232+
assertEquals("fragment?q=1&s=2&token=secret", urlDetails.fragment)
233+
}
234+
235+
@Test
236+
fun `does not filter email address in path`() {
237+
val urlDetails = UrlUtils.parseNullable(
238+
"https://staging.server.com/api/v4/auth/password/reset/email@example.com"
239+
)!!
240+
assertEquals("https://staging.server.com/api/v4/auth/password/reset/email@example.com", urlDetails.url)
241+
assertNull(urlDetails.query)
242+
assertNull(urlDetails.fragment)
243+
}
244+
245+
@Test
246+
fun `does not filter email address in path with fragment`() {
247+
val urlDetails = UrlUtils.parseNullable(
248+
"https://staging.server.com/api/v4/auth/password/reset/email@example.com#top"
249+
)!!
250+
assertEquals("https://staging.server.com/api/v4/auth/password/reset/email@example.com", urlDetails.url)
251+
assertNull(urlDetails.query)
252+
assertEquals("top", urlDetails.fragment)
253+
}
254+
255+
@Test
256+
fun `does not filter email address in path with query and fragment`() {
257+
val urlDetails = UrlUtils.parseNullable(
258+
"https://staging.server.com/api/v4/auth/password/reset/email@example.com?a=b&c=d#top"
259+
)!!
260+
assertEquals("https://staging.server.com/api/v4/auth/password/reset/email@example.com", urlDetails.url)
261+
assertEquals("a=b&c=d", urlDetails.query)
262+
assertEquals("top", urlDetails.fragment)
263+
}
264+
265+
@Test
266+
fun `does not filter email address in query`() {
267+
val urlDetails = UrlUtils.parseNullable(
268+
"https://staging.server.com/?email=someone@example.com"
269+
)!!
270+
assertEquals("https://staging.server.com/", urlDetails.url)
271+
assertEquals("email=someone@example.com", urlDetails.query)
272+
}
273+
274+
@Test
275+
fun `does not filter email address in fragment`() {
276+
val urlDetails = UrlUtils.parseNullable(
277+
"https://staging.server.com#email=someone@example.com"
278+
)!!
279+
assertEquals("https://staging.server.com", urlDetails.url)
280+
assertEquals("email=someone@example.com", urlDetails.fragment)
281+
}
282+
283+
@Test
284+
fun `does not filter email address in fragment with query`() {
285+
val urlDetails = UrlUtils.parseNullable(
286+
"https://staging.server.com?q=a&b=c#email=someone@example.com"
287+
)!!
288+
assertEquals("https://staging.server.com", urlDetails.url)
289+
assertEquals("q=a&b=c", urlDetails.query)
290+
assertEquals("email=someone@example.com", urlDetails.fragment)
291+
}
292+
293+
@Test
294+
fun `extracts details from relative url with email in path`() {
295+
val urlDetails = UrlUtils.parse(
296+
"/emails/user@sentry.io?query=a&b=c#fragment?q=1&s=2&token=secret"
297+
)
298+
assertEquals("/emails/user@sentry.io", urlDetails.url)
299+
assertEquals("query=a&b=c", urlDetails.query)
300+
assertEquals("fragment?q=1&s=2&token=secret", urlDetails.fragment)
301+
}
302+
303+
@Test
304+
fun `extracts details from relative url with email in query`() {
305+
val urlDetails = UrlUtils.parse(
306+
"users/10?email=user@sentry.io&b=c#fragment?q=1&s=2&token=secret"
307+
)
308+
assertEquals("users/10", urlDetails.url)
309+
assertEquals("email=user@sentry.io&b=c", urlDetails.query)
310+
assertEquals("fragment?q=1&s=2&token=secret", urlDetails.fragment)
311+
}
312+
313+
@Test
314+
fun `extracts details from relative url with email in fragment`() {
315+
val urlDetails = UrlUtils.parse(
316+
"users/10?email=user@sentry.io&b=c#fragment?q=1&s=2&email=user@sentry.io"
317+
)
318+
assertEquals("users/10", urlDetails.url)
319+
assertEquals("email=user@sentry.io&b=c", urlDetails.query)
320+
assertEquals("fragment?q=1&s=2&email=user@sentry.io", urlDetails.fragment)
321+
}
322+
323+
@Test
324+
fun `extracts path from file url`() {
325+
val urlDetails = UrlUtils.parse(
326+
"file:///users/sentry/text.txt"
327+
)
328+
assertEquals("file:///users/sentry/text.txt", urlDetails.url)
329+
assertNull(urlDetails.query)
330+
assertNull(urlDetails.fragment)
331+
}
332+
333+
@Test
334+
fun `does not extract details from websockets uri`() {
335+
val urlDetails = UrlUtils.parse(
336+
"wss://example.com/socket"
188337
)
189338
assertNull(urlDetails.url)
190339
assertNull(urlDetails.query)

0 commit comments

Comments
 (0)