Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,11 @@
<artifactId>javacpp</artifactId>
<version>1.5.12</version>
</dependency>
<dependency>
<groupId>com.github.ben-manes.caffeine</groupId>
<artifactId>caffeine</artifactId>
<version>3.1.8</version>
</dependency>
<dependency>
<groupId>org.junit.jupiter</groupId>
<artifactId>junit-jupiter-api</artifactId>
Expand Down
44 changes: 44 additions & 0 deletions src/main/java/com/gliwka/hyperscan/util/ExpressionUtil.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
package com.gliwka.hyperscan.util;

import com.gliwka.hyperscan.wrapper.Expression;
import com.gliwka.hyperscan.wrapper.ExpressionFlag;

import java.util.EnumSet;
import java.util.regex.Pattern;

final class ExpressionUtil {

private ExpressionUtil() {

throw new IllegalStateException("Utility class");
}

static Expression mapToExpression(Pattern pattern, int id) {
EnumSet<ExpressionFlag> flags = EnumSet.of(ExpressionFlag.UTF8, ExpressionFlag.PREFILTER, ExpressionFlag.ALLOWEMPTY, ExpressionFlag.SINGLEMATCH);

if (hasFlag(pattern, Pattern.CASE_INSENSITIVE)) {
flags.add(ExpressionFlag.CASELESS);
}

if (hasFlag(pattern, Pattern.MULTILINE)) {
flags.add(ExpressionFlag.MULTILINE);
}

if (hasFlag(pattern, Pattern.DOTALL)) {
flags.add(ExpressionFlag.DOTALL);
}

Expression expression = new Expression(pattern.pattern(), flags, id);

if (!expression.validate().isValid()) {
return null;
}

return expression;
}

static boolean hasFlag(Pattern pattern, int flag) {
return (pattern.flags() & flag) == flag;
}

}
35 changes: 1 addition & 34 deletions src/main/java/com/gliwka/hyperscan/util/PatternFilter.java
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ public PatternFilter(List<Pattern> patterns) throws CompileErrorException {
int id = 0;

for(Pattern pattern : patterns) {
Expression expression = mapToExpression(pattern, id);
Expression expression = ExpressionUtil.mapToExpression(pattern, id);

if(expression == null) {
//can't be compiled to expression -> not filterable
Expand Down Expand Up @@ -91,39 +91,6 @@ public List<Matcher> filter(String input) {
return matchedMatchers;
}

private Expression mapToExpression(Pattern pattern, int id) {
EnumSet<ExpressionFlag> flags = EnumSet.of(
ExpressionFlag.UTF8,
ExpressionFlag.PREFILTER,
ExpressionFlag.ALLOWEMPTY,
ExpressionFlag.SINGLEMATCH
);

if(hasFlag(pattern, Pattern.CASE_INSENSITIVE)) {
flags.add(ExpressionFlag.CASELESS);
}

if(hasFlag(pattern, Pattern.MULTILINE)) {
flags.add(ExpressionFlag.MULTILINE);
}

if(hasFlag(pattern, Pattern.DOTALL)) {
flags.add(ExpressionFlag.DOTALL);
}

Expression expression = new Expression(pattern.pattern(), flags, id);

if(!expression.validate().isValid()) {
return null;
}

return expression;
}

private boolean hasFlag(Pattern pattern, int flag) {
return (pattern.flags() & flag) == flag;
}

@Override
public void close() throws IOException {
scanner.close();
Expand Down
27 changes: 27 additions & 0 deletions src/main/java/com/gliwka/hyperscan/util/PatternFilterCleaner.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
package com.gliwka.hyperscan.util;


import java.lang.ref.PhantomReference;
import java.lang.ref.ReferenceQueue;

final class PatternFilterCleaner extends PhantomReference<ScopedPatternFilter<?>> {

private final Runnable thunk;

PatternFilterCleaner(
ScopedPatternFilter<?> referent, ReferenceQueue<? super ScopedPatternFilter<?>> q) {
super(referent, q);
this.thunk = referent.getCloseAction();
}

public void clean() {
if (thunk != null) {
try {
thunk.run();
} catch (Exception e) {
// Swallow exceptions to avoid disrupting the cleaner thread
}
}
}
}

75 changes: 75 additions & 0 deletions src/main/java/com/gliwka/hyperscan/util/ScopedPatternFilter.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
package com.gliwka.hyperscan.util;

import java.io.Closeable;
import java.util.List;
import java.util.function.Function;

/**
* Represents a pre-compiled filter for a set of regular expression patterns, optimized
* for high-performance scanning using the Hyperscan library.
*
* <p>This interface is designed as an optimization layer to quickly eliminate non-matching
* patterns from a large collection before performing more expensive, full regex matches.
* It functions as a "pre-filter" or "candidate selection" tool.
*
* <h3>Usage Pattern and Contract</h3>
* The core method, {@link #filter(String)}, takes an input string and returns a list
* of "potential matches". This list includes:
* <ol>
* <li>All patterns that were successfully matched by the high-performance Hyperscan engine.</li>
* <li>All patterns that could not be compiled by Hyperscan (e.g., those containing
* lookarounds or other unsupported features). These are always included as they
* cannot be definitively ruled out by this filter.</li>
* </ol>
*
* <p><b>Crucially, the caller is responsible for performing a final, precise match on the
* returned candidates using a standard regex engine like {@link java.util.regex.Matcher}.</b>
*
* <h3>Resource Management</h3>
* As this interface extends {@link Closeable}, it holds native resources (a compiled
* Hyperscan database and scratch space) that must be released. It is intended for use
* within a try-with-resources statement to ensure proper cleanup.
*
* <p>Example usage:
* <pre>{@code
* ScopedPatternFilterFactory<Pattern> factory = ...;
*
* try (ScopedPatternFilter<Pattern> filter = factory.get()) {
* List<Pattern> potentialMatches = filter.filter("Some input string to test");
* for (Pattern candidate : potentialMatches) {
* if (candidate.matcher("Some input string to test").find()) {
* // This is a confirmed match.
* }
* }
* }
* }</pre>
*
* @param <T> The type of the original object associated with a pattern. This allows the
* filter to be used with custom classes, not just {@link java.util.regex.Pattern} objects.
* @see ScopedPatternFilterFactory
* @see java.util.regex.Pattern
*/
public interface ScopedPatternFilter<T> extends Closeable, Function<String, List<T>> {

/**
* Filters the provided input and returns a list of potentially matching patterns. This method
* uses the high-performance Hyperscan library for compatible patterns. Any patterns that could
* not be compiled for Hyperscan are always included in the returned list, as they are considered
* potential matches that require further checking.
*
* @param input Input to be filtered
* @return A list of patterns that either matched via Hyperscan or could not be filtered by it.
*/
List<T> filter(String input);

@Override
default List<T> apply(String s) {
return filter(s);
}

default Runnable getCloseAction() {
return () -> {
};
}
}

Loading