This interface is designed as an optimization layer to quickly eliminate non-matching
+ * patterns from a large collection before performing more expensive, full regex matches.
+ * It functions as a "pre-filter" or "candidate selection" tool.
+ *
+ *
Usage Pattern and Contract
+ * The core method, {@link #filter(String)}, takes an input string and returns a list
+ * of "potential matches". This list includes:
+ *
+ *
All patterns that were successfully matched by the high-performance Hyperscan engine.
+ *
All patterns that could not be compiled by Hyperscan (e.g., those containing
+ * lookarounds or other unsupported features). These are always included as they
+ * cannot be definitively ruled out by this filter.
+ *
+ *
+ *
Crucially, the caller is responsible for performing a final, precise match on the
+ * returned candidates using a standard regex engine like {@link java.util.regex.Matcher}.
+ *
+ *
Resource Management
+ * As this interface extends {@link Closeable}, it holds native resources (a compiled
+ * Hyperscan database and scratch space) that must be released. It is intended for use
+ * within a try-with-resources statement to ensure proper cleanup.
+ *
+ *
Example usage:
+ *
{@code
+ * ScopedPatternFilterFactory factory = ...;
+ *
+ * try (ScopedPatternFilter filter = factory.get()) {
+ * List potentialMatches = filter.filter("Some input string to test");
+ * for (Pattern candidate : potentialMatches) {
+ * if (candidate.matcher("Some input string to test").find()) {
+ * // This is a confirmed match.
+ * }
+ * }
+ * }
+ * }
+ *
+ * @param The type of the original object associated with a pattern. This allows the
+ * filter to be used with custom classes, not just {@link java.util.regex.Pattern} objects.
+ * @see ScopedPatternFilterFactory
+ * @see java.util.regex.Pattern
+ */
+public interface ScopedPatternFilter extends Closeable, Function> {
+
+ /**
+ * Filters the provided input and returns a list of potentially matching patterns. This method
+ * uses the high-performance Hyperscan library for compatible patterns. Any patterns that could
+ * not be compiled for Hyperscan are always included in the returned list, as they are considered
+ * potential matches that require further checking.
+ *
+ * @param input Input to be filtered
+ * @return A list of patterns that either matched via Hyperscan or could not be filtered by it.
+ */
+ List filter(String input);
+
+ @Override
+ default List apply(String s) {
+ return filter(s);
+ }
+
+ default Runnable getCloseAction() {
+ return () -> {
+ };
+ }
+}
+
diff --git a/src/main/java/com/gliwka/hyperscan/util/ScopedPatternFilterFactory.java b/src/main/java/com/gliwka/hyperscan/util/ScopedPatternFilterFactory.java
new file mode 100644
index 0000000..4eafce4
--- /dev/null
+++ b/src/main/java/com/gliwka/hyperscan/util/ScopedPatternFilterFactory.java
@@ -0,0 +1,227 @@
+package com.gliwka.hyperscan.util;
+
+import com.github.benmanes.caffeine.cache.Caffeine;
+import com.github.benmanes.caffeine.cache.RemovalCause;
+import com.gliwka.hyperscan.wrapper.CompileErrorException;
+import lombok.AccessLevel;
+import lombok.Getter;
+
+import java.io.Closeable;
+import java.io.IOException;
+import java.lang.ref.ReferenceQueue;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+import java.util.Set;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.ConcurrentMap;
+import java.util.concurrent.Executors;
+import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.ScheduledFuture;
+import java.util.concurrent.ThreadFactory;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.function.Function;
+import java.util.function.Supplier;
+import java.util.regex.Pattern;
+
+/**
+ * A factory for creating and managing thread-local instances of {@link ScopedPatternFilter}.
+ *
+ *
This class is the primary entry point for using the Hyperscan filtering mechanism.
+ * It is designed to be created once and shared across an application. It addresses two
+ * key challenges:
+ *
+ *
Performance: The high cost of compiling Hyperscan databases is amortized
+ * by creating a single, thread-local filter instance that is reused for all
+ * subsequent operations on that thread.
+ *
Thread Safety: Hyperscan's scanning context (scratch space) is not
+ * thread-safe. This factory ensures each thread gets its own isolated instance,
+ * preventing concurrent access issues.
+ *
+ *
+ *
Usage Pattern
+ * A single factory instance should be created and retained for the lifetime of the
+ * application. In methods that require filtering, {@link #get()} should be called within
+ * a try-with-resources block to obtain a thread-safe filter instance.
+ *
+ *
Example usage:
+ *
{@code
+ * // In application initialization:
+ * List myPatterns = loadPatterns();
+ * ScopedPatternFilterFactory filterFactory = ScopedPatternFilterFactory.ofPatterns(myPatterns);
+ *
+ * // In a service method (called by multiple threads):
+ * public void processText(String text) {
+ * try (ScopedPatternFilter filter = filterFactory.get()) {
+ * List candidates = filter.filter(text);
+ * // ... perform final matching on candidates ...
+ * }
+ * }
+ *
+ * // In application shutdown:
+ * filterFactory.close();
+ * }
+ *
+ *
Lifecycle and Resource Management
+ * The factory manages a complex lifecycle:
+ *
+ *
Thread-Local Caching: Calling {@link #get()} returns a lightweight proxy to a
+ * thread-local {@code ScopedPatternFilter} instance. The actual filter implementation is
+ * cached and reused for the lifetime of the thread. The proxy prevents callers from
+ * accidentally closing the shared, thread-local instance.
+ *
Automatic Cleanup: The factory automatically manages the cleanup of resources
+ * for threads that have terminated. It uses a background cleaner thread to release the
+ * native Hyperscan resources associated with a dead thread, preventing memory leaks.
+ *
Factory Closure: The factory itself is {@link Closeable}. When the factory is no
+ * longer needed (e.g., during application shutdown), its {@link #close()} method
+ * must be called. This will explicitly release all active filter resources it has
+ * created and shut down its background cleanup task. Failure to close the factory
+ * will result in resource leaks.
+ *
+ *
+ * @param The type of the original object from which a pattern can be derived.
+ * @see ScopedPatternFilter
+ */
+public final class ScopedPatternFilterFactory implements Supplier>, Closeable {
+
+
+ // --- Instance-specific fields ---
+ private final ReferenceQueue> referenceQueue = new ReferenceQueue<>();
+ private final AtomicBoolean closed = new AtomicBoolean(false);
+
+ /**
+ * This set holds strong references to the PatternFilterCleaner objects.
+ * This is necessary because if the PhantomReference objects themselves were only weakly
+ * reachable, they could be garbage collected before they are enqueued, and the
+ * cleanup logic would never run.
+ */
+ @Getter(AccessLevel.PACKAGE)
+ @SuppressWarnings("MismatchedQueryAndUpdateOfCollection")
+ private final Set refKeeper = ConcurrentHashMap.newKeySet();
+
+ @Getter(AccessLevel.PACKAGE)
+ private final ConcurrentMap> threadLocalFilters = Caffeine.newBuilder().weakKeys().removalListener(this::handleRemoval).build().asMap();
+ private final ScheduledFuture> cleanerTaskFuture; // Handle to this instance's cleanup task.
+ private final List patterns;
+ private final Function super T, ? extends Pattern> patternMapper;
+
+ public ScopedPatternFilterFactory(Iterable patterns, Function super T, ? extends Pattern> patternMapper) {
+ Objects.requireNonNull(patternMapper, "patternMapper cannot be null");
+ Objects.requireNonNull(patterns, "patterns cannot be null");
+ this.patterns = new ArrayList<>();
+ for (T pattern : patterns) {
+ Objects.requireNonNull(pattern, "patterns cannot contain null elements");
+ this.patterns.add(pattern);
+ }
+ if (this.patterns.isEmpty()) {
+ throw new IllegalArgumentException("patterns cannot be empty");
+ }
+ this.patternMapper = patternMapper;
+
+ // Schedule this instance's cleanup task on the shared executor.
+ this.cleanerTaskFuture = ExecutorHolder.CLEANER_SERVICE.scheduleWithFixedDelay(this::cleanUp, 1, 1, TimeUnit.SECONDS);
+ }
+
+ public static ScopedPatternFilterFactory ofPatterns(Iterable patterns) {
+ return new ScopedPatternFilterFactory<>(patterns, Function.identity());
+ }
+
+ private void handleRemoval(Thread thread, ScopedPatternFilter filter, RemovalCause cause) {
+ if (filter != null) {
+ try {
+ filter.close();
+ } catch (IOException e) {
+ // Log this error.
+ }
+ }
+ }
+
+ // This is an instance method that knows about this instance's queue and refKeeper.
+ private void cleanUp() {
+ try {
+ PatternFilterCleaner ref;
+ while ((ref = (PatternFilterCleaner) referenceQueue.poll()) != null) {
+ refKeeper.remove(ref);
+ ref.clean();
+ }
+ } catch (Exception e) {
+ // Log or handle exception
+ }
+ }
+
+ private void ensureOpen() {
+ if (closed.get()) {
+ throw new IllegalStateException("ScopedPatternFilterFactory is closed.");
+ }
+ }
+
+ private ScopedPatternFilter createFilter() {
+ ensureOpen();
+ try {
+ ScopedPatternFilterImpl filter = new ScopedPatternFilterImpl<>(patterns, patternMapper);
+ // Use this instance's referenceQueue.
+ PatternFilterCleaner cleaner = new PatternFilterCleaner(filter, referenceQueue);
+ refKeeper.add(cleaner);
+ return filter;
+ } catch (CompileErrorException e) {
+ throw new RuntimeException("Failed to compile patterns into ScopedPatternFilter", e);
+ }
+ }
+
+ @Override
+ public ScopedPatternFilter get() {
+ ensureOpen();
+ ScopedPatternFilter filter = threadLocalFilters.computeIfAbsent(Thread.currentThread(), t -> createFilter());
+ return new ScopedPatternFilterProxy<>(filter);
+ }
+
+ @Override
+ public void close() {
+ if (closed.compareAndSet(false, true)) {
+
+ // 1. Explicitly close all still-active filter instances.
+ for (Map.Entry> entry : threadLocalFilters.entrySet()) {
+ try {
+ entry.getValue().close();
+ } catch (IOException e) {
+ // Log this error.
+ }
+ }
+
+ // 2. Clear the map to release references.
+ threadLocalFilters.clear();
+
+ // 3. Cancel this instance's scheduled cleanup task.
+ // Other factory instances' tasks on the shared executor are unaffected.
+ this.cleanerTaskFuture.cancel(false);
+
+ // 4. Perform a final cleanup pass and clear the reference keeper.
+ cleanUp();
+ refKeeper.clear();
+ }
+ }
+
+ private enum ExecutorHolder {
+ ;
+ // A single, shared, daemon cleaner thread for all factory instances.
+ static final ScheduledExecutorService CLEANER_SERVICE = Executors.newSingleThreadScheduledExecutor(new NamedDaemonThreadFactory());
+ }
+
+ private static final class NamedDaemonThreadFactory implements ThreadFactory {
+ private static final String NAME_FORMAT = "ScopedPatternFilter-Shared-Cleaner-%d";
+ private final ThreadFactory delegate = Executors.defaultThreadFactory();
+ private final AtomicInteger counter = new AtomicInteger(0);
+
+ @Override
+ public Thread newThread(Runnable r) {
+ Thread t = delegate.newThread(r);
+ t.setName(String.format(NAME_FORMAT, counter.getAndIncrement()));
+ t.setDaemon(true);
+ return t;
+ }
+ }
+}
+
diff --git a/src/main/java/com/gliwka/hyperscan/util/ScopedPatternFilterImpl.java b/src/main/java/com/gliwka/hyperscan/util/ScopedPatternFilterImpl.java
new file mode 100644
index 0000000..e45f726
--- /dev/null
+++ b/src/main/java/com/gliwka/hyperscan/util/ScopedPatternFilterImpl.java
@@ -0,0 +1,115 @@
+package com.gliwka.hyperscan.util;
+
+
+import com.gliwka.hyperscan.wrapper.CompileErrorException;
+import com.gliwka.hyperscan.wrapper.Database;
+import com.gliwka.hyperscan.wrapper.Expression;
+import com.gliwka.hyperscan.wrapper.Match;
+import com.gliwka.hyperscan.wrapper.Scanner;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Objects;
+import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.function.Function;
+import java.util.regex.Pattern;
+
+final class ScopedPatternFilterImpl implements ScopedPatternFilter {
+
+ private final AtomicBoolean closed = new AtomicBoolean(false);
+ private final Database database;
+ private final Scanner scanner;
+ private final List filterable;
+ private final List notFilterable;
+
+ /**
+ * Create a pattern filter for the provided patterns
+ *
+ * @param patterns Patterns to be filtered
+ * @throws CompileErrorException in case the compilation of the hyperscan representation fails
+ */
+ ScopedPatternFilterImpl(List patterns, Function super T, ? extends Pattern> patternMapper) throws CompileErrorException {
+ List expressions = new ArrayList<>();
+ List notFilterable = new ArrayList<>();
+ List filterable = new ArrayList<>();
+
+ for (T pattern : patterns) {
+ Pattern p = patternMapper.apply(pattern);
+ Objects.requireNonNull(p, "a patternMapper returned null for " + pattern);
+ Expression expression = ExpressionUtil.mapToExpression(p, filterable.size());
+
+ if (expression == null) {
+ // can't be compiled to expression -> not filterable
+ notFilterable.add(pattern);
+ } else {
+ expressions.add(expression);
+ filterable.add(pattern);
+ }
+ }
+
+ this.database = Database.compile(expressions);
+ this.scanner = new Scanner();
+ this.scanner.allocScratch(database);
+ this.filterable = filterable;
+ this.notFilterable = notFilterable;
+ }
+
+ @SuppressWarnings("SynchronizationOnLocalVariableOrMethodParameter")
+ private static void close(AtomicBoolean closed, Scanner scanner, Database database) throws IOException {
+ if (closed.compareAndSet(false, true)) {
+ // Ensure scanner and database are closed in a thread-safe manner
+ synchronized (scanner) {
+ scanner.close();
+ database.close();
+ }
+ }
+ }
+
+ private void ensureOpen() {
+ if (closed.get()) {
+ throw new IllegalStateException("Pattern filter is closed.");
+ }
+ }
+
+ @Override
+ public List filter(String input) {
+ Objects.requireNonNull(input, "input cannot be null");
+ ensureOpen();
+ List matches;
+ // Close is performed by another thread, so we need to synchronize access to the scanner
+ // In a single-threaded context because of the bias locking mechanism by JVM, the performance
+ // impact should be minimal
+ synchronized (scanner) {
+ ensureOpen();
+ matches = scanner.scan(database, input);
+ }
+ List potentialMatches = new ArrayList<>(matches.size() + notFilterable.size());
+ for (Match match : matches) {
+ potentialMatches.add(filterable.get(match.getMatchedExpression().getId()));
+ }
+ potentialMatches.addAll(notFilterable);
+ return potentialMatches;
+ }
+
+ @Override
+ public void close() throws IOException {
+ close(closed, scanner, database);
+ }
+
+ @Override
+ public Runnable getCloseAction() {
+ AtomicBoolean closed = this.closed;
+ Database database = this.database;
+ Scanner scanner = this.scanner;
+ // Use local copies to avoid lambda capturing the whole instance, which could prevent GC
+ return () -> {
+ try {
+ close(closed, scanner, database);
+ } catch (IOException e) {
+ // Log or handle exception if needed
+ }
+ };
+ }
+}
+
diff --git a/src/main/java/com/gliwka/hyperscan/util/ScopedPatternFilterProxy.java b/src/main/java/com/gliwka/hyperscan/util/ScopedPatternFilterProxy.java
new file mode 100644
index 0000000..ea41bcc
--- /dev/null
+++ b/src/main/java/com/gliwka/hyperscan/util/ScopedPatternFilterProxy.java
@@ -0,0 +1,23 @@
+package com.gliwka.hyperscan.util;
+
+import java.util.List;
+
+final class ScopedPatternFilterProxy implements ScopedPatternFilter {
+
+ private final ScopedPatternFilter delegate;
+
+ ScopedPatternFilterProxy(ScopedPatternFilter delegate) {
+ this.delegate = delegate;
+ }
+
+ @Override
+ public void close() {
+ // No operation performed on close
+ }
+
+ @Override
+ public List filter(String input) {
+ return delegate.filter(input);
+ }
+}
+
diff --git a/src/test/java/com/gliwka/hyperscan/util/PatternFilterCleanerTest.java b/src/test/java/com/gliwka/hyperscan/util/PatternFilterCleanerTest.java
new file mode 100644
index 0000000..0d9487a
--- /dev/null
+++ b/src/test/java/com/gliwka/hyperscan/util/PatternFilterCleanerTest.java
@@ -0,0 +1,74 @@
+package com.gliwka.hyperscan.util;
+
+import org.junit.jupiter.api.Test;
+
+import java.io.IOException;
+import java.lang.ref.ReferenceQueue;
+import java.util.Collections;
+import java.util.List;
+import java.util.concurrent.atomic.AtomicBoolean;
+
+import static org.assertj.core.api.Assertions.assertThat;
+import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;
+
+class PatternFilterCleanerTest {
+
+ @Test
+ void clean_shouldExecuteTheCloseAction() {
+ // Setup
+ final AtomicBoolean closeActionWasRun = new AtomicBoolean(false);
+ Runnable thunk = () -> closeActionWasRun.set(true);
+
+ FakeScopedPatternFilter referent = new FakeScopedPatternFilter(thunk);
+ ReferenceQueue> queue = new ReferenceQueue<>();
+ PatternFilterCleaner cleaner = new PatternFilterCleaner(referent, queue);
+
+ // Execute
+ cleaner.clean();
+
+ // Verify
+ assertThat(closeActionWasRun.get()).isTrue();
+ }
+
+ @Test
+ void clean_shouldSwallowExceptionsThrownByTheCloseAction() {
+ // Setup: A close action that always throws an exception
+ Runnable thunk = () -> {
+ throw new IllegalStateException("Test exception");
+ };
+ FakeScopedPatternFilter referent = new FakeScopedPatternFilter(thunk);
+ ReferenceQueue> queue = new ReferenceQueue<>();
+ PatternFilterCleaner cleaner = new PatternFilterCleaner(referent, queue);
+
+ // Execute & Verify
+ // The test passes if clean() does not throw an exception
+ assertDoesNotThrow(cleaner::clean);
+ }
+
+ /**
+ * A simple, concrete implementation of ScopedPatternFilter for testing purposes.
+ */
+ private static class FakeScopedPatternFilter implements ScopedPatternFilter