Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
package de.captaingoldfish.scim.sdk.common.constants;

/**
* Utility class for unescaping Java/JSON-style escape sequences within strings.
* <p>
* This implementation is intentionally lightweight and independent of external libraries. It is designed for
* use cases such as SCIM filter parsing, where quoted string values may contain escaped characters that must
* be converted back into their literal form before comparison.
* </p>
* <p>
* The following escape sequences are supported:
* </p>
* <ul>
* <li>{@code \b} -> backspace</li>
* <li>{@code \t} -> tab</li>
* <li>{@code \n} -> newline</li>
* <li>{@code \f} -> form feed</li>
* <li>{@code \r} -> carriage return</li>
* <li>{@code \"} -> double quote</li>
* <li>{@code \'} -> single quote</li>
* <li>{@code \\} -> backslash</li>
* <li>{@code \/} -> forward slash</li>
* <li>{@code \\uXXXX} -> unicode escape with exactly four hexadecimal digits</li>
* </ul>
* <p>
* Unknown escape sequences (for example {@code \x}) are preserved as-is to avoid unintended data loss. A
* trailing backslash is also preserved literally.
* </p>
* <p>
* This class does not aim to be a full drop-in replacement for {@code StringEscapeUtils.unescapeJava(...)}.
* Instead, it deliberately supports the escape sequences required by the filter grammar and a few closely
* related variants that are commonly expected by developers.
* </p>
*/
public final class JavaStringUnescaper
{

private JavaStringUnescaper()
{
// Utility class
}

/**
* Unescapes supported Java/JSON-style escape sequences in the given input string.
* <p>
* If the input is {@code null}, this method returns {@code null}.
* </p>
* <p>
* Supported examples:
* </p>
*
* <pre>
* {@code
* unescapeJava("hello\\nworld") -> "hello\nworld"
* unescapeJava("\\\"test\\\"") -> "\"test\""
* unescapeJava("foo\\/bar") -> "foo/bar"
* unescapeJava("\\u0041") -> "A"
* }
* </pre>
*
* @param input the input string that may contain escape sequences
* @return the unescaped string, or {@code null} if the input is {@code null}
* @throws IllegalArgumentException if an incomplete or invalid unicode escape sequence is encountered
*/
public static String unescapeJava(String input)
{
// Preserve null semantics so callers do not need an additional null check.
if (input == null)
{
return null;
}

// Pre-size the builder to roughly the input length to reduce resizing overhead.
StringBuilder result = new StringBuilder(input.length());

// Walk through the input one character at a time.
for ( int i = 0 ; i < input.length() ; i++ )
{
char current = input.charAt(i);

// Fast path for ordinary characters: append directly.
if (current != '\\')
{
result.append(current);
continue;
}

// A trailing backslash cannot form a valid escape sequence.
// We preserve it literally instead of throwing an exception.
if (i + 1 >= input.length())
{
result.append('\\');
break;
}

// Consume the next character to determine the escape sequence.
char next = input.charAt(++i);

switch (next)
{
case 'b':
result.append('\b'); // backspace
break;

case 't':
result.append('\t'); // horizontal tab
break;

case 'n':
result.append('\n'); // newline
break;

case 'f':
result.append('\f'); // form feed
break;

case 'r':
result.append('\r'); // carriage return
break;

case '"':
result.append('\"'); // escaped double quote
break;

case '\'':
result.append('\''); // escaped single quote
break;

case '\\':
result.append('\\'); // escaped backslash
break;

case '/':
result.append('/'); // escaped forward slash (JSON-style)
break;

case 'u':
// Unicode escape sequence: \\uXXXX
// Exactly four hexadecimal digits must follow.
if (i + 4 >= input.length())
{
throw new IllegalArgumentException("Incomplete unicode escape sequence at index " + (i - 1));
}

String hex = input.substring(i + 1, i + 5);

try
{
int codePoint = Integer.parseInt(hex, 16);
result.append((char)codePoint);
}
catch (NumberFormatException ex)
{
throw new IllegalArgumentException("Invalid unicode escape sequence: \\u" + hex, ex);
}

// Skip the four hex digits because they were already consumed.
i += 4;
break;

default:
// Preserve unknown escape sequences literally.
// Example: "\x" remains "\x".
result.append('\\').append(next);
break;
}
}

return result.toString();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,181 @@
package de.captaingoldfish.scim.sdk.common.constants;

import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.DisplayName;
import org.junit.jupiter.api.Test;


/**
* Test suite for {@link JavaStringUnescaper}.
* <p>
* These tests validate the supported escape sequences and the explicitly defined edge-case behavior of the
* custom unescape implementation.
* </p>
* <p>
* The goal is to verify the behavior required by the filter grammar and matching logic, not to replicate
* every detail of Apache Commons Text.
* </p>
*/
class JavaStringUnescaperTest
{

/**
* Verifies that {@code null} input is returned unchanged.
* <p>
* This ensures callers do not need to add explicit null checks.
* </p>
*/
@Test
@DisplayName("Should return null when input is null")
void shouldReturnNull()
{
Assertions.assertNull(JavaStringUnescaper.unescapeJava(null));
}

/**
* Ensures that strings without escape sequences remain unchanged.
*/
@Test
@DisplayName("Should return unchanged string when no escape sequences are present")
void shouldReturnUnchangedString()
{
String input = "hello world";

String result = JavaStringUnescaper.unescapeJava(input);

Assertions.assertEquals("hello world", result);
}

/**
* Validates common control character escapes.
*/
@Test
@DisplayName("Should correctly unescape common control sequences")
void shouldUnescapeControlSequences()
{
Assertions.assertEquals("hello\nworld", JavaStringUnescaper.unescapeJava("hello\\nworld"));
Assertions.assertEquals("a\tb", JavaStringUnescaper.unescapeJava("a\\tb"));
Assertions.assertEquals("line1\rline2", JavaStringUnescaper.unescapeJava("line1\\rline2"));
}

/**
* Validates less frequently used control characters.
*/
@Test
@DisplayName("Should correctly unescape backspace and form feed")
void shouldUnescapeBackspaceAndFormFeed()
{
Assertions.assertEquals("a\bb", JavaStringUnescaper.unescapeJava("a\\bb"));
Assertions.assertEquals("a\fb", JavaStringUnescaper.unescapeJava("a\\fb"));
}

/**
* Ensures correct handling of quotes and backslashes.
*/
@Test
@DisplayName("Should correctly unescape quotes and backslash")
void shouldUnescapeQuotesAndBackslash()
{
Assertions.assertEquals("\"test\"", JavaStringUnescaper.unescapeJava("\\\"test\\\""));
Assertions.assertEquals("'", JavaStringUnescaper.unescapeJava("\\'"));
Assertions.assertEquals("\\", JavaStringUnescaper.unescapeJava("\\\\"));
}

/**
* Ensures JSON-style escaped forward slashes are supported.
*/
@Test
@DisplayName("Should correctly unescape forward slash")
void shouldUnescapeForwardSlash()
{
Assertions.assertEquals("foo/bar", JavaStringUnescaper.unescapeJava("foo\\/bar"));
Assertions.assertEquals("/", JavaStringUnescaper.unescapeJava("\\/"));
}

/**
* Verifies correct decoding of unicode escape sequences.
*/
@Test
@DisplayName("Should correctly unescape unicode sequences")
void shouldUnescapeUnicode()
{
Assertions.assertEquals("A", JavaStringUnescaper.unescapeJava("\\u0041"));
Assertions.assertEquals("ö", JavaStringUnescaper.unescapeJava("\\u00F6"));
Assertions.assertEquals("!", JavaStringUnescaper.unescapeJava("\\u0021"));
}

/**
* Ensures incomplete unicode escapes fail fast.
*/
@Test
@DisplayName("Should throw exception on incomplete unicode escape sequence")
void shouldThrowOnIncompleteUnicode()
{
IllegalArgumentException exception = Assertions.assertThrows(IllegalArgumentException.class,
() -> JavaStringUnescaper.unescapeJava("\\u12"));

Assertions.assertTrue(exception.getMessage().contains("Incomplete unicode"));
}

/**
* Ensures invalid unicode escapes fail fast.
*/
@Test
@DisplayName("Should throw exception on invalid unicode escape sequence")
void shouldThrowOnInvalidUnicode()
{
IllegalArgumentException exception = Assertions.assertThrows(IllegalArgumentException.class,
() -> JavaStringUnescaper.unescapeJava("\\uZZZZ"));

Assertions.assertTrue(exception.getMessage().contains("Invalid unicode"));
}

/**
* Verifies that unknown escape sequences are preserved.
*/
@Test
@DisplayName("Should preserve unknown escape sequences as-is")
void shouldPreserveUnknownEscapes()
{
Assertions.assertEquals("\\q", JavaStringUnescaper.unescapeJava("\\q"));
Assertions.assertEquals("\\x", JavaStringUnescaper.unescapeJava("\\x"));
}

/**
* Ensures trailing backslashes are preserved.
*/
@Test
@DisplayName("Should preserve trailing backslash")
void shouldPreserveTrailingBackslash()
{
Assertions.assertEquals("test\\", JavaStringUnescaper.unescapeJava("test\\"));
}

/**
* Validates mixed escape usage in a realistic input.
*/
@Test
@DisplayName("Should handle mixed content with multiple escape types")
void shouldHandleMixedContent()
{
String input = "Hello\\nWorld\\t\\u0021 \\\"test\\\" foo\\/bar";

String result = JavaStringUnescaper.unescapeJava(input);

Assertions.assertEquals("Hello\nWorld\t! \"test\" foo/bar", result);
}

/**
* Verifies SCIM filter-style usage: escaped quotes are correctly converted before matching.
*/
@Test
@DisplayName("Should unescape filter-style quoted content for matching")
void shouldUnescapeFilterStyleQuotedContentForMatching()
{
String input = "This is \\\"test\\\" user";

String result = JavaStringUnescaper.unescapeJava(input);

Assertions.assertEquals("This is \"test\" user", result);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -50,14 +50,21 @@ DECIMAL: '-'? INTEGER '.' DIGIT+ | '-'? INTEGER;
ATTRIBUTE_NAME: ALPHA (NAMECHAR)*;
NAMECHAR: '_' | DIGIT | ALPHA;
NAME_URI: ALPHA (NAMECHAR | ':' | '.')* NAMECHAR+ ':';
TEXT: '"' STRING '"';
TEXT: '"' (ESC | SAFE_CODE_POINT)* '"';
EXCLUDE: [ \b\t\n]+ -> skip ;


fragment ALPHA: ([a-zA-Z_]);
fragment INTEGER: '0' | [1-9] DIGIT*;
fragment DIGIT: [0-9] ;
fragment STRING: .+?;
fragment ESC
: '\\' (["\\/bfnrt] | UNICODE);
fragment UNICODE
: 'u' HEX HEX HEX HEX;
fragment HEX
: [0-9a-fA-F];
fragment SAFE_CODE_POINT
: ~ ["\\\u0000-\u001F];


fragment A : [aA];
fragment B : [bB];
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import java.util.Optional;
import java.util.function.Supplier;

import de.captaingoldfish.scim.sdk.common.constants.JavaStringUnescaper;
import de.captaingoldfish.scim.sdk.common.exceptions.InvalidDateTimeRepresentationException;
import de.captaingoldfish.scim.sdk.common.exceptions.InvalidFilterException;
import de.captaingoldfish.scim.sdk.common.schemas.SchemaAttribute;
Expand Down Expand Up @@ -51,7 +52,9 @@ public CompareValue(ScimFilterParser.CompareValueContext compareValueContext, Sc
}
else
{
this.value = compareValueContext.getText().replaceFirst("^\"", "").replaceFirst("\"$", "");
this.value = JavaStringUnescaper.unescapeJava(compareValueContext.getText())
.replaceFirst("^\"", "")
.replaceFirst("\"$", "");
}
validateCompareValue(schemaAttribute);
}
Expand Down
Loading