diff --git a/scim-sdk-common/src/main/java/de/captaingoldfish/scim/sdk/common/constants/JavaStringUnescaper.java b/scim-sdk-common/src/main/java/de/captaingoldfish/scim/sdk/common/constants/JavaStringUnescaper.java new file mode 100644 index 000000000..cf5120a28 --- /dev/null +++ b/scim-sdk-common/src/main/java/de/captaingoldfish/scim/sdk/common/constants/JavaStringUnescaper.java @@ -0,0 +1,171 @@ +package de.captaingoldfish.scim.sdk.common.constants; + +/** + * Utility class for unescaping Java/JSON-style escape sequences within strings. + *
+ * This implementation is intentionally lightweight and independent of external libraries. It is designed for + * use cases such as SCIM filter parsing, where quoted string values may contain escaped characters that must + * be converted back into their literal form before comparison. + *
+ *+ * The following escape sequences are supported: + *
+ *+ * Unknown escape sequences (for example {@code \x}) are preserved as-is to avoid unintended data loss. A + * trailing backslash is also preserved literally. + *
+ *+ * This class does not aim to be a full drop-in replacement for {@code StringEscapeUtils.unescapeJava(...)}. + * Instead, it deliberately supports the escape sequences required by the filter grammar and a few closely + * related variants that are commonly expected by developers. + *
+ */ +public final class JavaStringUnescaper +{ + + private JavaStringUnescaper() + { + // Utility class + } + + /** + * Unescapes supported Java/JSON-style escape sequences in the given input string. + *+ * If the input is {@code null}, this method returns {@code null}. + *
+ *+ * Supported examples: + *
+ * + *
+ * {@code
+ * unescapeJava("hello\\nworld") -> "hello\nworld"
+ * unescapeJava("\\\"test\\\"") -> "\"test\""
+ * unescapeJava("foo\\/bar") -> "foo/bar"
+ * unescapeJava("\\u0041") -> "A"
+ * }
+ *
+ *
+ * @param input the input string that may contain escape sequences
+ * @return the unescaped string, or {@code null} if the input is {@code null}
+ * @throws IllegalArgumentException if an incomplete or invalid unicode escape sequence is encountered
+ */
+ public static String unescapeJava(String input)
+ {
+ // Preserve null semantics so callers do not need an additional null check.
+ if (input == null)
+ {
+ return null;
+ }
+
+ // Pre-size the builder to roughly the input length to reduce resizing overhead.
+ StringBuilder result = new StringBuilder(input.length());
+
+ // Walk through the input one character at a time.
+ for ( int i = 0 ; i < input.length() ; i++ )
+ {
+ char current = input.charAt(i);
+
+ // Fast path for ordinary characters: append directly.
+ if (current != '\\')
+ {
+ result.append(current);
+ continue;
+ }
+
+ // A trailing backslash cannot form a valid escape sequence.
+ // We preserve it literally instead of throwing an exception.
+ if (i + 1 >= input.length())
+ {
+ result.append('\\');
+ break;
+ }
+
+ // Consume the next character to determine the escape sequence.
+ char next = input.charAt(++i);
+
+ switch (next)
+ {
+ case 'b':
+ result.append('\b'); // backspace
+ break;
+
+ case 't':
+ result.append('\t'); // horizontal tab
+ break;
+
+ case 'n':
+ result.append('\n'); // newline
+ break;
+
+ case 'f':
+ result.append('\f'); // form feed
+ break;
+
+ case 'r':
+ result.append('\r'); // carriage return
+ break;
+
+ case '"':
+ result.append('\"'); // escaped double quote
+ break;
+
+ case '\'':
+ result.append('\''); // escaped single quote
+ break;
+
+ case '\\':
+ result.append('\\'); // escaped backslash
+ break;
+
+ case '/':
+ result.append('/'); // escaped forward slash (JSON-style)
+ break;
+
+ case 'u':
+ // Unicode escape sequence: \\uXXXX
+ // Exactly four hexadecimal digits must follow.
+ if (i + 4 >= input.length())
+ {
+ throw new IllegalArgumentException("Incomplete unicode escape sequence at index " + (i - 1));
+ }
+
+ String hex = input.substring(i + 1, i + 5);
+
+ try
+ {
+ int codePoint = Integer.parseInt(hex, 16);
+ result.append((char)codePoint);
+ }
+ catch (NumberFormatException ex)
+ {
+ throw new IllegalArgumentException("Invalid unicode escape sequence: \\u" + hex, ex);
+ }
+
+ // Skip the four hex digits because they were already consumed.
+ i += 4;
+ break;
+
+ default:
+ // Preserve unknown escape sequences literally.
+ // Example: "\x" remains "\x".
+ result.append('\\').append(next);
+ break;
+ }
+ }
+
+ return result.toString();
+ }
+}
diff --git a/scim-sdk-common/src/test/java/de/captaingoldfish/scim/sdk/common/constants/JavaStringUnescaperTest.java b/scim-sdk-common/src/test/java/de/captaingoldfish/scim/sdk/common/constants/JavaStringUnescaperTest.java
new file mode 100644
index 000000000..0a0ffc7cc
--- /dev/null
+++ b/scim-sdk-common/src/test/java/de/captaingoldfish/scim/sdk/common/constants/JavaStringUnescaperTest.java
@@ -0,0 +1,181 @@
+package de.captaingoldfish.scim.sdk.common.constants;
+
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.DisplayName;
+import org.junit.jupiter.api.Test;
+
+
+/**
+ * Test suite for {@link JavaStringUnescaper}.
+ * + * These tests validate the supported escape sequences and the explicitly defined edge-case behavior of the + * custom unescape implementation. + *
+ *+ * The goal is to verify the behavior required by the filter grammar and matching logic, not to replicate + * every detail of Apache Commons Text. + *
+ */ +class JavaStringUnescaperTest +{ + + /** + * Verifies that {@code null} input is returned unchanged. + *+ * This ensures callers do not need to add explicit null checks. + *
+ */ + @Test + @DisplayName("Should return null when input is null") + void shouldReturnNull() + { + Assertions.assertNull(JavaStringUnescaper.unescapeJava(null)); + } + + /** + * Ensures that strings without escape sequences remain unchanged. + */ + @Test + @DisplayName("Should return unchanged string when no escape sequences are present") + void shouldReturnUnchangedString() + { + String input = "hello world"; + + String result = JavaStringUnescaper.unescapeJava(input); + + Assertions.assertEquals("hello world", result); + } + + /** + * Validates common control character escapes. + */ + @Test + @DisplayName("Should correctly unescape common control sequences") + void shouldUnescapeControlSequences() + { + Assertions.assertEquals("hello\nworld", JavaStringUnescaper.unescapeJava("hello\\nworld")); + Assertions.assertEquals("a\tb", JavaStringUnescaper.unescapeJava("a\\tb")); + Assertions.assertEquals("line1\rline2", JavaStringUnescaper.unescapeJava("line1\\rline2")); + } + + /** + * Validates less frequently used control characters. + */ + @Test + @DisplayName("Should correctly unescape backspace and form feed") + void shouldUnescapeBackspaceAndFormFeed() + { + Assertions.assertEquals("a\bb", JavaStringUnescaper.unescapeJava("a\\bb")); + Assertions.assertEquals("a\fb", JavaStringUnescaper.unescapeJava("a\\fb")); + } + + /** + * Ensures correct handling of quotes and backslashes. + */ + @Test + @DisplayName("Should correctly unescape quotes and backslash") + void shouldUnescapeQuotesAndBackslash() + { + Assertions.assertEquals("\"test\"", JavaStringUnescaper.unescapeJava("\\\"test\\\"")); + Assertions.assertEquals("'", JavaStringUnescaper.unescapeJava("\\'")); + Assertions.assertEquals("\\", JavaStringUnescaper.unescapeJava("\\\\")); + } + + /** + * Ensures JSON-style escaped forward slashes are supported. + */ + @Test + @DisplayName("Should correctly unescape forward slash") + void shouldUnescapeForwardSlash() + { + Assertions.assertEquals("foo/bar", JavaStringUnescaper.unescapeJava("foo\\/bar")); + Assertions.assertEquals("/", JavaStringUnescaper.unescapeJava("\\/")); + } + + /** + * Verifies correct decoding of unicode escape sequences. + */ + @Test + @DisplayName("Should correctly unescape unicode sequences") + void shouldUnescapeUnicode() + { + Assertions.assertEquals("A", JavaStringUnescaper.unescapeJava("\\u0041")); + Assertions.assertEquals("รถ", JavaStringUnescaper.unescapeJava("\\u00F6")); + Assertions.assertEquals("!", JavaStringUnescaper.unescapeJava("\\u0021")); + } + + /** + * Ensures incomplete unicode escapes fail fast. + */ + @Test + @DisplayName("Should throw exception on incomplete unicode escape sequence") + void shouldThrowOnIncompleteUnicode() + { + IllegalArgumentException exception = Assertions.assertThrows(IllegalArgumentException.class, + () -> JavaStringUnescaper.unescapeJava("\\u12")); + + Assertions.assertTrue(exception.getMessage().contains("Incomplete unicode")); + } + + /** + * Ensures invalid unicode escapes fail fast. + */ + @Test + @DisplayName("Should throw exception on invalid unicode escape sequence") + void shouldThrowOnInvalidUnicode() + { + IllegalArgumentException exception = Assertions.assertThrows(IllegalArgumentException.class, + () -> JavaStringUnescaper.unescapeJava("\\uZZZZ")); + + Assertions.assertTrue(exception.getMessage().contains("Invalid unicode")); + } + + /** + * Verifies that unknown escape sequences are preserved. + */ + @Test + @DisplayName("Should preserve unknown escape sequences as-is") + void shouldPreserveUnknownEscapes() + { + Assertions.assertEquals("\\q", JavaStringUnescaper.unescapeJava("\\q")); + Assertions.assertEquals("\\x", JavaStringUnescaper.unescapeJava("\\x")); + } + + /** + * Ensures trailing backslashes are preserved. + */ + @Test + @DisplayName("Should preserve trailing backslash") + void shouldPreserveTrailingBackslash() + { + Assertions.assertEquals("test\\", JavaStringUnescaper.unescapeJava("test\\")); + } + + /** + * Validates mixed escape usage in a realistic input. + */ + @Test + @DisplayName("Should handle mixed content with multiple escape types") + void shouldHandleMixedContent() + { + String input = "Hello\\nWorld\\t\\u0021 \\\"test\\\" foo\\/bar"; + + String result = JavaStringUnescaper.unescapeJava(input); + + Assertions.assertEquals("Hello\nWorld\t! \"test\" foo/bar", result); + } + + /** + * Verifies SCIM filter-style usage: escaped quotes are correctly converted before matching. + */ + @Test + @DisplayName("Should unescape filter-style quoted content for matching") + void shouldUnescapeFilterStyleQuotedContentForMatching() + { + String input = "This is \\\"test\\\" user"; + + String result = JavaStringUnescaper.unescapeJava(input); + + Assertions.assertEquals("This is \"test\" user", result); + } +} diff --git a/scim-sdk-server/src/main/antlr4/de/captaingoldfish/scim/sdk/server/filter/antlr/ScimFilter.g4 b/scim-sdk-server/src/main/antlr4/de/captaingoldfish/scim/sdk/server/filter/antlr/ScimFilter.g4 index e8f643d1e..9fedfaebe 100644 --- a/scim-sdk-server/src/main/antlr4/de/captaingoldfish/scim/sdk/server/filter/antlr/ScimFilter.g4 +++ b/scim-sdk-server/src/main/antlr4/de/captaingoldfish/scim/sdk/server/filter/antlr/ScimFilter.g4 @@ -50,14 +50,21 @@ DECIMAL: '-'? INTEGER '.' DIGIT+ | '-'? INTEGER; ATTRIBUTE_NAME: ALPHA (NAMECHAR)*; NAMECHAR: '_' | DIGIT | ALPHA; NAME_URI: ALPHA (NAMECHAR | ':' | '.')* NAMECHAR+ ':'; -TEXT: '"' STRING '"'; +TEXT: '"' (ESC | SAFE_CODE_POINT)* '"'; EXCLUDE: [ \b\t\n]+ -> skip ; - fragment ALPHA: ([a-zA-Z_]); fragment INTEGER: '0' | [1-9] DIGIT*; fragment DIGIT: [0-9] ; -fragment STRING: .+?; +fragment ESC + : '\\' (["\\/bfnrt] | UNICODE); +fragment UNICODE + : 'u' HEX HEX HEX HEX; +fragment HEX + : [0-9a-fA-F]; +fragment SAFE_CODE_POINT + : ~ ["\\\u0000-\u001F]; + fragment A : [aA]; fragment B : [bB]; diff --git a/scim-sdk-server/src/main/java/de/captaingoldfish/scim/sdk/server/filter/antlr/CompareValue.java b/scim-sdk-server/src/main/java/de/captaingoldfish/scim/sdk/server/filter/antlr/CompareValue.java index 53b2b90bd..3111dd810 100644 --- a/scim-sdk-server/src/main/java/de/captaingoldfish/scim/sdk/server/filter/antlr/CompareValue.java +++ b/scim-sdk-server/src/main/java/de/captaingoldfish/scim/sdk/server/filter/antlr/CompareValue.java @@ -5,6 +5,7 @@ import java.util.Optional; import java.util.function.Supplier; +import de.captaingoldfish.scim.sdk.common.constants.JavaStringUnescaper; import de.captaingoldfish.scim.sdk.common.exceptions.InvalidDateTimeRepresentationException; import de.captaingoldfish.scim.sdk.common.exceptions.InvalidFilterException; import de.captaingoldfish.scim.sdk.common.schemas.SchemaAttribute; @@ -51,7 +52,9 @@ public CompareValue(ScimFilterParser.CompareValueContext compareValueContext, Sc } else { - this.value = compareValueContext.getText().replaceFirst("^\"", "").replaceFirst("\"$", ""); + this.value = JavaStringUnescaper.unescapeJava(compareValueContext.getText()) + .replaceFirst("^\"", "") + .replaceFirst("\"$", ""); } validateCompareValue(schemaAttribute); } diff --git a/scim-sdk-server/src/test/java/de/captaingoldfish/scim/sdk/server/endpoints/ResourceEndpointTest.java b/scim-sdk-server/src/test/java/de/captaingoldfish/scim/sdk/server/endpoints/ResourceEndpointTest.java index 9e399a1e9..09a15afc5 100644 --- a/scim-sdk-server/src/test/java/de/captaingoldfish/scim/sdk/server/endpoints/ResourceEndpointTest.java +++ b/scim-sdk-server/src/test/java/de/captaingoldfish/scim/sdk/server/endpoints/ResourceEndpointTest.java @@ -12,6 +12,7 @@ import java.util.Optional; import java.util.Set; import java.util.UUID; +import java.util.concurrent.ThreadLocalRandom; import java.util.concurrent.atomic.AtomicBoolean; import java.util.function.Consumer; import java.util.function.Supplier; @@ -665,6 +666,69 @@ public void testQueryResourcesWithGet() Mockito.notNull()); } + @Test + public void testQueryResourcesWithGetAndFilterContainsDoubleQuotes() + { + int maxUsers = 150; + serviceProvider.getFilterConfig().setSupported(true); + serviceProvider.getFilterConfig().setMaxResults(maxUsers); + resourceEndpoint.getResourceTypeFactory() + .getResourceType(EndpointPaths.USERS) + .setFeatures(ResourceTypeFeatures.builder().autoFiltering(true).build()); + + int counter = 0; + for ( int i = 0 ; i < maxUsers ; i++ ) + { + final String id = UUID.randomUUID().toString(); + Meta meta = Meta.builder() + .resourceType(ResourceTypeNames.USER) + .created(LocalDateTime.now()) + .lastModified(LocalDateTime.now()) + .build(); + final String username = ThreadLocalRandom.current().nextBoolean() ? "This is \"test\" user " + i + : "This is test user " + i; + + if (username.startsWith("This is \"test\"")) + { + counter++; + } + + User user = User.builder().id(id).userName(username).meta(meta).build(); + + userHandler.getInMemoryMap().put(id, user); + } + final String url = BASE_URI + EndpointPaths.USERS + + String.format("?startIndex=1&count=%d&filter=%s", + maxUsers, + "userName sw \"This is \\\"test\\\" user\""); + ScimResponse scimResponse = resourceEndpoint.handleRequest(url, + HttpMethod.GET, + null, + httpHeaders, + new Context(null)); + MatcherAssert.assertThat(scimResponse.getClass(), Matchers.typeCompatibleWith(ListResponse.class)); + ListResponse