diff --git a/docs/ingestion/data-formats.md b/docs/ingestion/data-formats.md
index 6a2aedc39764..1427ce0164d4 100644
--- a/docs/ingestion/data-formats.md
+++ b/docs/ingestion/data-formats.md
@@ -73,6 +73,74 @@ Besides text formats, Druid also supports binary formats such as [Orc](#orc) and
Druid supports custom text data formats and can use the Regex input format to parse them. However, be aware doing this to
parse data is less efficient than writing a native Java `InputFormat` extension, or using an external stream processor. We welcome contributions of new input formats.
+## Regex engine configuration
+
+The `regex` input format supports configurable regex engines using the runtime property:
+
+```properties
+druid.regex.engine=JAVA
+```
+
+Supported values:
+
+| Value | Description |
+|----------|--------------------------------------------------------------------------|
+| `JAVA` | Uses Java's built-in `java.util.regex.Pattern` engine. |
+| `RE2J` | Uses `Google's RE2/J` regex engine with linear-time matching guarantees. |
+
+Default value:
+
+```properties
+druid.regex.engine=JAVA
+```
+
+### RE2/J engine
+
+Setting:
+
+```properties
+druid.regex.engine=RE2J
+```
+
+enables the RE2/J regex engine for ingestion task `regex` input formats.
+
+RE2/J helps protect against catastrophic backtracking and Regular Expression Denial of Service (ReDoS) attacks by guaranteeing linear-time regex evaluation.
+
+### Compatibility differences
+
+RE2/J does not support all Java regex features.
+
+Unsupported or partially supported features include:
+- back references
+- look behind assertions
+- some advanced backtracking behavior
+
+Patterns using unsupported constructs will fail during regex compilation.
+
+### Example of catastrophic backtracking
+
+The following Java regex may cause catastrophic backtracking:
+
+```regex
+^(.*a){20}$
+```
+
+against input such as:
+
+```text
+aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaX
+```
+
+Using `RE2J` avoids this issue.
+
+### Performance considerations
+
+- `JAVA` may support more advanced regex syntax and behavior.
+- `RE2J` provides safer and more predictable runtime characteristics.
+- For trusted internal ingestion specs, `JAVA` may be preferred for compatibility.
+- For externally supplied regex patterns, `RE2J` is recommended.
+
+
## Input format
You can use the `inputFormat` field to specify the data format for your input data.
diff --git a/extensions-core/druid-kerberos/pom.xml b/extensions-core/druid-kerberos/pom.xml
index f1843314434d..5700d5e24da2 100644
--- a/extensions-core/druid-kerberos/pom.xml
+++ b/extensions-core/druid-kerberos/pom.xml
@@ -248,6 +248,10 @@
org.eclipse.jetty
jetty-servlet
+
+ com.google.re2j
+ re2j
+
diff --git a/licenses.yaml b/licenses.yaml
index 23f58a0d3b3d..d70eab9e1ebb 100644
--- a/licenses.yaml
+++ b/licenses.yaml
@@ -5239,7 +5239,7 @@ name: RE2/J
license_category: binary
module: java-core
license_name: The Go license
-version: 1.1
+version: 1.7
license_file_path: licenses/bin/re2j.GO
libraries:
- com.google.re2j: re2j
diff --git a/processing/pom.xml b/processing/pom.xml
index 48a4d4999ef0..4469d67fb518 100644
--- a/processing/pom.xml
+++ b/processing/pom.xml
@@ -292,6 +292,11 @@
io.timeandspace
cron-scheduler
+
+ com.google.re2j
+ re2j
+ 1.7
+
diff --git a/processing/src/main/java/org/apache/druid/data/input/impl/RegexInputFormat.java b/processing/src/main/java/org/apache/druid/data/input/impl/RegexInputFormat.java
index a3057fa212b2..e807427a6862 100644
--- a/processing/src/main/java/org/apache/druid/data/input/impl/RegexInputFormat.java
+++ b/processing/src/main/java/org/apache/druid/data/input/impl/RegexInputFormat.java
@@ -19,6 +19,7 @@
package org.apache.druid.data.input.impl;
+import com.fasterxml.jackson.annotation.JacksonInject;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonIgnore;
import com.fasterxml.jackson.annotation.JsonInclude;
@@ -29,12 +30,14 @@
import org.apache.druid.data.input.InputEntityReader;
import org.apache.druid.data.input.InputFormat;
import org.apache.druid.data.input.InputRowSchema;
+import org.apache.druid.regex.RegexConfig;
+import org.apache.druid.regex.RegexPattern;
+import org.apache.druid.regex.RegexPatternFactory;
import org.apache.druid.utils.CompressionUtils;
import javax.annotation.Nullable;
import java.io.File;
import java.util.List;
-import java.util.regex.Pattern;
public class RegexInputFormat implements InputFormat
{
@@ -44,10 +47,11 @@ public class RegexInputFormat implements InputFormat
private final String listDelimiter;
private final List columns;
@JsonIgnore
- private final Supplier compiledPatternSupplier;
+ private final Supplier compiledPatternSupplier;
@JsonCreator
public RegexInputFormat(
+ @JacksonInject RegexConfig regexConfig,
@JsonProperty("pattern") String pattern,
@JsonProperty("listDelimiter") @Nullable String listDelimiter,
@JsonProperty("columns") @Nullable List columns
@@ -56,7 +60,7 @@ public RegexInputFormat(
this.pattern = pattern;
this.listDelimiter = listDelimiter;
this.columns = columns;
- this.compiledPatternSupplier = Suppliers.memoize(() -> Pattern.compile(pattern));
+ this.compiledPatternSupplier = Suppliers.memoize(() -> RegexPatternFactory.compile(regexConfig.getEngine(), pattern));
}
@JsonProperty
diff --git a/processing/src/main/java/org/apache/druid/data/input/impl/RegexReader.java b/processing/src/main/java/org/apache/druid/data/input/impl/RegexReader.java
index 66f4d25b474e..5b6dbee41809 100644
--- a/processing/src/main/java/org/apache/druid/data/input/impl/RegexReader.java
+++ b/processing/src/main/java/org/apache/druid/data/input/impl/RegexReader.java
@@ -30,19 +30,19 @@
import org.apache.druid.java.util.common.parsers.ParseException;
import org.apache.druid.java.util.common.parsers.ParserUtils;
import org.apache.druid.java.util.common.parsers.Parsers;
+import org.apache.druid.regex.RegexMatcher;
+import org.apache.druid.regex.RegexPattern;
import javax.annotation.Nullable;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
public class RegexReader extends TextReader.Strings
{
private final String pattern;
- private final Pattern compiledPattern;
+ private final RegexPattern compiledPattern;
private final Function transformationFunction;
private List columns;
@@ -51,7 +51,7 @@ public class RegexReader extends TextReader.Strings
InputRowSchema inputRowSchema,
InputEntity source,
String pattern,
- Pattern compiledPattern,
+ RegexPattern compiledPattern,
@Nullable String listDelimiter,
@Nullable List columns
)
@@ -83,7 +83,7 @@ protected List