Skip to content

Commit 1e60798

Browse files
authored
fix: honor default mask strategy in rust pii filter (#23)
* fix: honor default mask strategy in rust pii filter Signed-off-by: lucarlig <luca.carlig@ibm.com> * fix: format rust pii filter sources Signed-off-by: lucarlig <luca.carlig@ibm.com> * fix: inherit default mask strategy for custom patterns Signed-off-by: lucarlig <luca.carlig@ibm.com> * fix: normalize generated pii filter stubs Signed-off-by: lucarlig <luca.carlig@ibm.com> * fix: stabilize generated stub newlines Signed-off-by: lucarlig <luca.carlig@ibm.com> * fix: handle none mask strategy in model configs Signed-off-by: lucarlig <luca.carlig@ibm.com> * fix: validate pii filter mask strategy configs Signed-off-by: lucarlig <luca.carlig@ibm.com> * fix: bump pii_filter to 0.2.1 Signed-off-by: lucarlig <luca.carlig@ibm.com> * test: update pii filter release-info kind check Signed-off-by: lucarlig <luca.carlig@ibm.com> * fix(pii_filter): add payload clone regression coverage Signed-off-by: lucarlig <luca.carlig@ibm.com> * fix(pii_filter): isolate prompt post-fetch payloads Signed-off-by: lucarlig <luca.carlig@ibm.com> --------- Signed-off-by: lucarlig <luca.carlig@ibm.com>
1 parent ebcdc97 commit 1e60798

12 files changed

Lines changed: 676 additions & 99 deletions

File tree

Cargo.lock

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

plugins/rust/python-package/pii_filter/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "pii_filter"
3-
version = "0.2.0"
3+
version = "0.2.1"
44
edition.workspace = true
55
authors.workspace = true
66
license.workspace = true

plugins/rust/python-package/pii_filter/README.md

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ High-performance PII detection and masking library for ContextForge.
55
## Features
66

77
- Detects 12+ PII types (SSN, email, credit cards, phone numbers, and more)
8-
- Built-in detectors default to `redact` masking
8+
- Built-in detectors follow `default_mask_strategy` and default to `redact`
99
- Multiple masking strategies (redact, partial, hash, tokenize, remove)
1010
- Parallel regex matching with RegexSet (5-10x faster than Python)
1111
- Zero-copy operations for nested JSON/dict traversal
@@ -28,8 +28,12 @@ The Python plugin requires the compiled Rust extension and uses it for all detec
2828

2929
Version `0.2.0` intentionally changes the built-in default masking policy from partial masking to `redact`. Set `default_mask_strategy: "partial"` explicitly if you need the previous behavior.
3030

31+
Version `0.2.1` changes custom-pattern inheritance: when `custom_patterns[].mask_strategy` is omitted or set to `null`/`None`, the pattern inherits `default_mask_strategy` instead of forcing `redact`.
32+
3133
Version `0.2.0` also tightens the default privacy posture for observability: detection logging and detection-detail metadata are now disabled unless you opt in with `log_detections: true` or `include_detection_details: true`.
3234

35+
Version `0.2.1` validates `default_mask_strategy` and `custom_patterns[].mask_strategy` strictly. Invalid values that older builds silently treated as `redact` now fail fast during plugin initialization.
36+
3337
## Detection Coverage
3438

3539
This section describes the current Rust detector behavior so users know what is intentionally matched and what is intentionally left alone. The detector is optimized to reduce noisy false positives, which means some generic identifiers are only matched when they appear with clear context labels.

plugins/rust/python-package/pii_filter/cpex_pii_filter/pii_filter_rust/__init__.pyi

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -12,29 +12,29 @@ __all__ = [
1212
class PIIDetectorRust:
1313
r"""
1414
Main PII detector exposed to Python
15-
15+
1616
# Example (Python)
1717
```python
1818
from cpex_pii_filter import PIIDetectorRust
19-
19+
2020
config = {"detect_ssn": True, "detect_email": True}
2121
detector = PIIDetectorRust(config)
22-
22+
2323
text = "My SSN is 123-45-6789 and email is john@example.com"
2424
detections = detector.detect(text)
2525
print(detections) # {"ssn": [...], "email": [...]}
26-
26+
2727
masked = detector.mask(text, detections)
2828
print(masked) # "My SSN is [REDACTED] and email is [REDACTED]"
2929
```
3030
"""
3131
def __new__(cls, config: typing.Any) -> PIIDetectorRust:
3232
r"""
3333
Create a new PII detector
34-
34+
3535
# Arguments
3636
* `config` - Python dictionary or Pydantic model with configuration
37-
37+
3838
# Configuration Keys
3939
* `detect_ssn` (bool): Detect Social Security Numbers
4040
* `detect_bsn` (bool): Detect Dutch citizen service numbers
@@ -55,16 +55,17 @@ class PIIDetectorRust:
5555
* `max_text_bytes` (int): Maximum text payload size to inspect
5656
* `max_nested_depth` (int): Maximum nested container depth to inspect
5757
* `max_collection_items` (int): Maximum items to inspect per collection
58-
* `custom_patterns` (list[dict]): Additional regex-based PII patterns
58+
* `custom_patterns` (list[dict]): Additional regex-based PII patterns.
59+
`mask_strategy` is optional and inherits `default_mask_strategy` when omitted or `None`.
5960
* `whitelist_patterns` (list[str]): Regex patterns to exclude from detection
6061
"""
6162
def detect(self, text: builtins.str) -> typing.Any:
6263
r"""
6364
Detect PII in text
64-
65+
6566
# Arguments
6667
* `text` - Text to scan for PII
67-
68+
6869
# Returns
6970
Dictionary mapping PII type to list of detections:
7071
```python
@@ -81,22 +82,22 @@ class PIIDetectorRust:
8182
def mask(self, text: builtins.str, detections: typing.Any) -> builtins.str:
8283
r"""
8384
Mask detected PII in text
84-
85+
8586
# Arguments
8687
* `text` - Original text
8788
* `detections` - Detection results from detect()
88-
89+
8990
# Returns
9091
Masked text with PII replaced
9192
"""
9293
def process_nested(self, data: typing.Any, path: builtins.str) -> tuple[builtins.bool, typing.Any, typing.Any]:
9394
r"""
9495
Process nested data structures (dicts, lists, strings)
95-
96+
9697
# Arguments
9798
* `data` - Python object (dict, list, str, or other)
9899
* `path` - Current path in the structure (for logging)
99-
100+
100101
# Returns
101102
Tuple of (modified: bool, new_data: Any, detections: dict)
102103
"""
@@ -108,4 +109,3 @@ class PIIFilterPluginCore:
108109
def prompt_post_fetch(self, payload: typing.Any, context: typing.Any) -> typing.Any: ...
109110
def tool_pre_invoke(self, payload: typing.Any, context: typing.Any) -> typing.Any: ...
110111
def tool_post_invoke(self, payload: typing.Any, context: typing.Any) -> typing.Any: ...
111-

plugins/rust/python-package/pii_filter/cpex_pii_filter/plugin-manifest.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
description: "Rust-backed PII detection and masking for prompt arguments, tool inputs, and tool outputs"
22
author: "ContextForge Contributors"
3-
version: "0.2.0"
3+
version: "0.2.1"
44
kind: "cpex_pii_filter.pii_filter.PIIFilterPlugin"
55
available_hooks:
66
- "prompt_pre_fetch"

plugins/rust/python-package/pii_filter/src/bin/stub_gen.rs

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,17 @@ fn curate_extension_stub_content(content: &str) -> String {
3333
"curated extension stub is missing PIIFilterPluginCore class definition",
3434
);
3535

36-
curated
36+
normalize_trailing_whitespace(&curated)
37+
}
38+
39+
fn normalize_trailing_whitespace(content: &str) -> String {
40+
let normalized = content
41+
.lines()
42+
.map(str::trim_end)
43+
.collect::<Vec<_>>()
44+
.join("\n");
45+
let normalized = normalized.trim_end_matches('\n');
46+
format!("{normalized}\n")
3747
}
3848

3949
fn curate_extension_stub() {
@@ -91,4 +101,17 @@ mod tests {
91101

92102
let _ = curate_extension_stub_content(original);
93103
}
104+
105+
#[test]
106+
fn test_curate_extension_stub_normalizes_whitespace_and_is_idempotent() {
107+
let generated = "# This file is automatically generated by pyo3_stub_gen\n# ruff: noqa: E501, F401, F403, F405\n\nimport typing \n\n__all__ = [\n \"PIIDetectorRust\",\n]\n\n";
108+
109+
let curated = curate_extension_stub_content(generated);
110+
let curated_again = curate_extension_stub_content(&curated);
111+
112+
assert_eq!(curated, curated_again);
113+
assert!(curated.ends_with('\n'));
114+
assert!(!curated.ends_with("\n\n"));
115+
assert!(!curated.contains("typing "));
116+
}
94117
}

0 commit comments

Comments
 (0)