Skip to content

Commit ddbe45b

Browse files
committed
add new method.
1 parent 93f6018 commit ddbe45b

3 files changed

Lines changed: 75 additions & 0 deletions

File tree

pom.xml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,11 @@
7979
<artifactId>iban4j</artifactId>
8080
<version>3.2.11-RELEASE</version>
8181
</dependency>
82+
<dependency>
83+
<groupId>com.googlecode.owasp-java-html-sanitizer</groupId>
84+
<artifactId>owasp-java-html-sanitizer</artifactId>
85+
<version>20240325.1</version>
86+
</dependency>
8287
<!-- TEST ONLY PURPOSE -->
8388
<dependency>
8489
<groupId>org.junit.jupiter</groupId>

src/main/java/eu/righettod/SecurityUtils.java

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@
3232
import org.apache.tika.mime.MediaType;
3333
import org.apache.tika.mime.MimeTypes;
3434
import org.iban4j.IbanUtil;
35+
import org.owasp.html.HtmlPolicyBuilder;
36+
import org.owasp.html.PolicyFactory;
3537
import org.w3c.dom.Document;
3638
import org.xml.sax.EntityResolver;
3739
import org.xml.sax.InputSource;
@@ -1560,6 +1562,7 @@ public static Map<SensitiveInformationType, Set<String>> extractAllSensitiveInfo
15601562
*/
15611563
public static boolean isGZIPCompressedDataSafe(byte[] compressedBytes, long maxCountOfDecompressedBytesAllowed) {
15621564
boolean isSafe = false;
1565+
15631566
try {
15641567
long limit = maxCountOfDecompressedBytesAllowed;
15651568
long totalRead = 0L;
@@ -1580,6 +1583,52 @@ public static boolean isGZIPCompressedDataSafe(byte[] compressedBytes, long maxC
15801583
} catch (Exception e) {
15811584
isSafe = false;
15821585
}
1586+
15831587
return isSafe;
15841588
}
1589+
1590+
/**
1591+
* Process a string, intended to be written in a log, to remove as much as possible information that can lead to an exposure to a log injection vulnerability.<br><br>
1592+
* <b>Log injection</b> is also called <b>log forging</b>.<br><br>
1593+
* The following information are removed:
1594+
* <ul>
1595+
* <li>Characters: Carriage Return (CR), Linefeed (LF) and Tabulation (TAB).</li>
1596+
* <li>Leading and trailing spaces.</li>
1597+
* <li>Any HTML tags.</li>
1598+
* </ul><br><br>
1599+
* A parameter is also used to limit the maximum length of the sanitized message.
1600+
* To remove any HTML tags, the OWASP project <a href="https://owasp.org/www-project-java-html-sanitizer/">Java HTML Sanitizer</a> is leveraged.<br>
1601+
* I delegated such removal to a dedicated library to prevent missing of edge cases as well as potential bypasses.
1602+
*
1603+
* @param message The original string message intended to be written in a log.
1604+
* @param maxMessageLength The maximum number of characters after which the sanitized message must be truncated. If inferior to 1 then default to the value of 500.
1605+
* @return The string message cleaned.
1606+
* @see "https://www.wallarm.com/what/log-forging-attack"
1607+
* @see "https://www.invicti.com/learn/crlf-injection"
1608+
* @see "https://capec.mitre.org/data/definitions/93.html"
1609+
* @see "https://codeql.github.com/codeql-query-help/javascript/js-log-injection/"
1610+
* @see "https://owasp.org/www-project-java-html-sanitizer/"
1611+
* @see "https://github.com/OWASP/java-html-sanitizer"
1612+
*/
1613+
public static String sanitizeLogMessage(String message, int maxMessageLength) {
1614+
String sanitized = message;
1615+
int maxSanitizedMessageLength = maxMessageLength;
1616+
1617+
if (sanitized != null && !sanitized.isBlank()) {
1618+
if (maxSanitizedMessageLength < 1) {
1619+
maxSanitizedMessageLength = 500;
1620+
}
1621+
//Step 1: Remove any CR/LR/TAB characters as well as leading and trailing spaces
1622+
sanitized = sanitized.replaceAll("[\\n\\r\\t]", "").trim();
1623+
//Step 2: Remove any HTML tags
1624+
PolicyFactory htmlSanitizerPolicy = new HtmlPolicyBuilder().toFactory();
1625+
sanitized = htmlSanitizerPolicy.sanitize(sanitized);
1626+
//Step 3: Truncate the string in case of need
1627+
if (sanitized.length() > maxSanitizedMessageLength) {
1628+
sanitized = sanitized.substring(0, maxSanitizedMessageLength);
1629+
}
1630+
}
1631+
1632+
return sanitized;
1633+
}
15851634
}

src/test/java/eu/righettod/TestSecurityUtils.java

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -785,5 +785,26 @@ public void isGZIPCompressedDataSafe() throws Exception {
785785
isSafe = SecurityUtils.isGZIPCompressedDataSafe(testData, limit);
786786
assertTrue(isSafe, String.format(falsePositiveMsgTemplate, testData.length, limit));
787787
}
788+
789+
@Test
790+
public void sanitizeLogMessage() {
791+
//Case format is the following
792+
//[0]: The maximum number of characters after which the sanitized message must be truncated
793+
//[1]: The original string message intended to be written in a log
794+
//[2]: The expected sanitized message
795+
final List<String[]> cases = new ArrayList<>();
796+
cases.add(new String[]{"1000", "<b>test msg</b><script>alert(1)</script>", "test msg"});
797+
cases.add(new String[]{"1000", "test<xss>msg</xss>\n1\r2\t3\t4\n5\r6\t7", "testmsg1234567"});
798+
cases.add(new String[]{"1000", " test<xss>msg</xss>\n1\r2\t3\t4\n5\r6\t7\t\r\n ", "testmsg1234567"});
799+
cases.add(new String[]{"0", "<b>test msg</b><script>alert(1)</script>", "test msg"});
800+
cases.add(new String[]{"10", "AAAAAAAAAACCC<script src='https://evil.com/a.js'></script>BBBBBBBBBB", "AAAAAAAAAA"});
801+
cases.forEach(caseData -> {
802+
int maxMessageLength = Integer.parseInt(caseData[0].trim());
803+
String originalMessage = caseData[1];
804+
String expectedSanitizedMessage = caseData[2];
805+
String sanitizedMessage = SecurityUtils.sanitizeLogMessage(originalMessage, maxMessageLength);
806+
assertEquals(expectedSanitizedMessage, sanitizedMessage);
807+
});
808+
}
788809
}
789810

0 commit comments

Comments
 (0)