Skip to content

Commit e269fc1

Browse files
authored
New PromptGuardFilter to mitigate prompt injection attacks (#148)
1 parent 80e6892 commit e269fc1

13 files changed

Lines changed: 1727 additions & 0 deletions

File tree

openig-ai/src/main/java/org/openidentityplatform/openig/ai/AiClassAliasResolver.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
import org.forgerock.openig.alias.ClassAliasResolver;
2020
import org.openidentityplatform.openig.ai.filter.LLMProxyFilter;
2121
import org.openidentityplatform.openig.ai.filter.MCPServerFeaturesFilter;
22+
import org.openidentityplatform.openig.ai.filter.LLMPromptGuardFilter;
2223

2324
import java.util.HashMap;
2425
import java.util.Map;
@@ -30,6 +31,7 @@ public class AiClassAliasResolver implements ClassAliasResolver {
3031
private static final Map<String, Class<?>> ALIASES = new HashMap<>();
3132

3233
static {
34+
ALIASES.put("LLMPromptGuardFilter", LLMPromptGuardFilter.class);
3335
ALIASES.put("LLMProxyFilter", LLMProxyFilter.class);
3436
ALIASES.put("MCPServerFeaturesFilter", MCPServerFeaturesFilter.class);
3537
}
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
/*
2+
* The contents of this file are subject to the terms of the Common Development and
3+
* Distribution License (the License). You may not use this file except in compliance with the
4+
* License.
5+
*
6+
* You can obtain a copy of the License at legal/CDDLv1.0.txt. See the License for the
7+
* specific language governing permission and limitations under the License.
8+
*
9+
* When distributing Covered Software, include this CDDL Header Notice in each file and include
10+
* the License file at legal/CDDLv1.0.txt. If applicable, add the following below the CDDL
11+
* Header, with the fields enclosed by brackets [] replaced by your own identifying
12+
* information: "Portions copyright [year] [name of copyright owner]".
13+
*
14+
* Copyright 2026 3A Systems LLC.
15+
*/
16+
17+
package org.openidentityplatform.openig.ai.filter;
18+
19+
import org.slf4j.Logger;
20+
import org.slf4j.LoggerFactory;
21+
22+
import java.util.List;
23+
import java.util.Objects;
24+
25+
/**
26+
* Composite injection detector that chains multiple {@link InjectionDetector}
27+
* implementations in priority order, short-circuiting on the first positive.
28+
*/
29+
public final class CompositeDetector implements InjectionDetector {
30+
31+
private static final Logger logger = LoggerFactory.getLogger(CompositeDetector.class);
32+
33+
private final List<InjectionDetector> detectors;
34+
35+
public CompositeDetector(InjectionDetector... detectors) {
36+
this(List.of(detectors));
37+
}
38+
39+
public CompositeDetector(List<InjectionDetector> detectors) {
40+
Objects.requireNonNull(detectors, "detectors must not be null");
41+
if (detectors.isEmpty()) {
42+
throw new IllegalArgumentException("At least one detector is required");
43+
}
44+
this.detectors = List.copyOf(detectors);
45+
}
46+
@Override
47+
public DetectionResult scan(String prompt) {
48+
for (InjectionDetector detector : detectors) {
49+
DetectionResult result = detector.scan(prompt);
50+
if (result.isInjection()) {
51+
logger.info("Injection confirmed by detector={} reason={} score={}",
52+
result.getDetector(), result.getReason(), result.getScore());
53+
return result;
54+
}
55+
}
56+
return DetectionResult.clean();
57+
}
58+
59+
@Override
60+
public void destroy() {
61+
detectors.forEach(d -> {
62+
try {
63+
d.destroy();
64+
} catch (Exception e) {
65+
logger.warn("Error destroying detector {}: {}", d.getClass().getSimpleName(), e.getMessage());
66+
}
67+
});
68+
}
69+
}
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
/*
2+
* The contents of this file are subject to the terms of the Common Development and
3+
* Distribution License (the License). You may not use this file except in compliance with the
4+
* License.
5+
*
6+
* You can obtain a copy of the License at legal/CDDLv1.0.txt. See the License for the
7+
* specific language governing permission and limitations under the License.
8+
*
9+
* When distributing Covered Software, include this CDDL Header Notice in each file and include
10+
* the License file at legal/CDDLv1.0.txt. If applicable, add the following below the CDDL
11+
* Header, with the fields enclosed by brackets [] replaced by your own identifying
12+
* information: "Portions copyright [year] [name of copyright owner]".
13+
*
14+
* Copyright 2026 3A Systems LLC.
15+
*/
16+
17+
package org.openidentityplatform.openig.ai.filter;
18+
19+
/**
20+
* Immutable result produced by any {@link InjectionDetector} implementation.
21+
*
22+
* <p>A result carries:
23+
* <ul>
24+
* <li>whether an injection was detected</li>
25+
* <li>the confidence score (0.0 – 1.0; -1 when unavailable)</li>
26+
* <li>a machine-readable reason code for structured audit logging</li>
27+
* <li>the detector layer that made the final determination</li>
28+
* </ul>
29+
*/
30+
public final class DetectionResult {
31+
32+
public static final DetectionResult CLEAN = new DetectionResult(false, 0.0, "none", "none");
33+
34+
private final boolean injection;
35+
private final double score;
36+
private final String reason; // e.g. "override_instruction"
37+
private final String detector; // e.g. "regex"
38+
39+
private DetectionResult(boolean injection, double score, String reason, String detector) {
40+
this.injection = injection;
41+
this.score = score;
42+
this.reason = reason;
43+
this.detector = detector;
44+
}
45+
46+
public static DetectionResult clean() {
47+
return CLEAN;
48+
}
49+
50+
public static DetectionResult injection(double score, String reason, String detector) {
51+
return new DetectionResult(true, score, reason, detector);
52+
}
53+
54+
public boolean isInjection() { return injection; }
55+
public double getScore() { return score; }
56+
public String getReason() { return reason; }
57+
public String getDetector() { return detector; }
58+
59+
@Override
60+
public String toString() {
61+
return "DetectionResult{injection=" + injection
62+
+ ", score=" + score
63+
+ ", reason='" + reason + '\''
64+
+ ", detector='" + detector + '\''
65+
+ '}';
66+
}
67+
}
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
/*
2+
* The contents of this file are subject to the terms of the Common Development and
3+
* Distribution License (the License). You may not use this file except in compliance with the
4+
* License.
5+
*
6+
* You can obtain a copy of the License at legal/CDDLv1.0.txt. See the License for the
7+
* specific language governing permission and limitations under the License.
8+
*
9+
* When distributing Covered Software, include this CDDL Header Notice in each file and include
10+
* the License file at legal/CDDLv1.0.txt. If applicable, add the following below the CDDL
11+
* Header, with the fields enclosed by brackets [] replaced by your own identifying
12+
* information: "Portions copyright [year] [name of copyright owner]".
13+
*
14+
* Copyright 2026 3A Systems LLC.
15+
*/
16+
17+
package org.openidentityplatform.openig.ai.filter;
18+
19+
/**
20+
* Strategy interface for prompt-injection detection.
21+
*
22+
* <p>Implementations must be <strong>thread-safe</strong>: a single detector
23+
* instance is shared across all concurrent requests.
24+
*
25+
* <p>Known implementations:
26+
* <ul>
27+
* <li>{@link RegexDetector} – fast, deterministic regex pre-filter</li>
28+
* <li>{@link TypoglycemiaDetector} – fast, catches injection keywords whose interior
29+
* letters have been transposed to evade exactmatching</li>
30+
* <li>{@link CompositeDetector} – chains the above with short-circuit logic</li>
31+
* </ul>
32+
*/
33+
public interface InjectionDetector {
34+
35+
/**
36+
* Scan {@code prompt} for injection signals.
37+
*
38+
* @param prompt the normalized prompt text extracted from the LLM request body
39+
* @return a {@link DetectionResult}; never {@code null}
40+
*/
41+
DetectionResult scan(String prompt);
42+
43+
default void destroy() {}
44+
}

0 commit comments

Comments
 (0)