Skip to content

Commit 9c3c326

Browse files
authored
Fix secrets detector model dump counting (#65)
* Fix secrets detector model dump counting Signed-off-by: lucarlig <luca.carlig@ibm.com> * Harden secrets model dump dedupe Signed-off-by: lucarlig <luca.carlig@ibm.com> * Address model dump review edge cases Signed-off-by: lucarlig <luca.carlig@ibm.com> * Harden serialized duplicate gate Signed-off-by: lucarlig <luca.carlig@ibm.com> * Harden safe scalar duplicate checks Signed-off-by: lucarlig <luca.carlig@ibm.com> * Guard root duplicate key lookup Signed-off-by: lucarlig <luca.carlig@ibm.com> * Filter mixed-key object state safely Signed-off-by: lucarlig <luca.carlig@ibm.com> * Fix serialized object scan regressions Signed-off-by: lucarlig <luca.carlig@ibm.com> * Fix scan-state redaction regressions Signed-off-by: lucarlig <luca.carlig@ibm.com> * Preserve clean scan-state values Signed-off-by: lucarlig <luca.carlig@ibm.com> * Preserve scan-state through serialized redaction Signed-off-by: lucarlig <luca.carlig@ibm.com> * Scope pending scan-state restoration Signed-off-by: lucarlig <luca.carlig@ibm.com> * Redact overlapping secrets once Signed-off-by: lucarlig <luca.carlig@ibm.com> * Bump secrets detection version Signed-off-by: lucarlig <luca.carlig@ibm.com> * Centralize secrets detection test helpers Signed-off-by: lucarlig <luca.carlig@ibm.com> * Address secrets detection review follow-ups Signed-off-by: lucarlig <luca.carlig@ibm.com> * Use existing secrets detection version bump Signed-off-by: lucarlig <luca.carlig@ibm.com> * Cover clean scan state with clean model dump Signed-off-by: lucarlig <luca.carlig@ibm.com> --------- Signed-off-by: lucarlig <luca.carlig@ibm.com>
1 parent 1770855 commit 9c3c326

18 files changed

Lines changed: 3591 additions & 1668 deletions

File tree

AGENTS.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@ make check-all # fmt-check + clippy + Rust tests
3333

3434
## Versioning
3535

36+
Every change to a core plugin must include a plugin version bump.
37+
3638
When bumping a plugin version, update all of these:
3739

3840
1. `Cargo.toml` — the single source of truth for the version number.

Cargo.lock

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

DEVELOPING.md

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,18 @@ make test-all
3232

3333
Swap `rate_limiter` for any other managed plugin slug.
3434

35+
## Secrets Detection Count Semantics
36+
37+
`secrets_detection` reports one finding per non-overlapping secret span. When
38+
multiple enabled patterns match the same bytes, or overlapping bytes, the scanner
39+
redacts the merged span once and reports the most specific matching detector
40+
type. Distinct non-overlapping secrets in the same payload still count
41+
separately.
42+
43+
This changed older behavior that could count overlapping broad and specific
44+
pattern matches as multiple findings. Operators using `min_findings_to_block`
45+
values greater than `1` should audit thresholds when upgrading.
46+
3547
## Repo-Level Commands
3648

3749
```bash

plugins/rust/python-package/secrets_detection/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "secrets_detection"
3-
version = "0.2.0"
3+
version = "0.2.1"
44
edition.workspace = true
55
authors.workspace = true
66
license.workspace = true

plugins/rust/python-package/secrets_detection/cpex_secrets_detection/plugin-manifest.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
description: "Detect likely credentials and secrets in prompt input, tool output, and resource content"
22
author: "ContextForge Contributors"
3-
version: "0.2.0"
3+
version: "0.2.1"
44
kind: "cpex_secrets_detection.secrets_detection.SecretsDetectionPlugin"
55
available_hooks:
66
- "prompt_pre_fetch"

plugins/rust/python-package/secrets_detection/src/object_model.rs

Lines changed: 158 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -2,58 +2,66 @@
22
// SPDX-License-Identifier: Apache-2.0
33

44
use pyo3::prelude::*;
5-
use pyo3::types::{PyAny, PyDict, PyFrozenSet, PyList, PySet, PyTuple};
5+
use pyo3::types::{PyAny, PyDict, PyFrozenSet, PyList, PySet, PyString, PyTuple};
66

77
pub struct InspectedObjectState<'py> {
88
pub rebuild_state: Option<Bound<'py, PyDict>>,
99
pub serialized_state: Option<Bound<'py, PyAny>>,
10+
pub scan_state: Option<Bound<'py, PyDict>>,
1011
}
1112

1213
pub fn inspect_object_state<'py>(
1314
py: Python<'py>,
1415
container: &Bound<'py, PyAny>,
16+
) -> PyResult<InspectedObjectState<'py>> {
17+
inspect_object_state_inner(py, container, true)
18+
}
19+
20+
pub(crate) fn inspect_object_state_without_model_dump<'py>(
21+
py: Python<'py>,
22+
container: &Bound<'py, PyAny>,
23+
) -> PyResult<InspectedObjectState<'py>> {
24+
inspect_object_state_inner(py, container, false)
25+
}
26+
27+
fn inspect_object_state_inner<'py>(
28+
py: Python<'py>,
29+
container: &Bound<'py, PyAny>,
30+
include_model_dump: bool,
1531
) -> PyResult<InspectedObjectState<'py>> {
1632
let mut mappings = MappingStateAccumulator::new(py);
1733
let mut serialized_state = None;
1834

19-
if let Ok(model_dump) = container.call_method0("model_dump") {
35+
if include_model_dump && let Ok(model_dump) = container.call_method0("model_dump") {
2036
if let Ok(model_state) = model_dump.cast::<PyDict>() {
2137
serialized_state = Some(model_state.clone().into_any());
22-
mappings.push(model_state)?;
38+
if dict_has_only_exact_string_keys(model_state) {
39+
mappings.push(model_state)?;
40+
}
2341
} else if !model_dump.is(container) {
2442
serialized_state = Some(model_dump);
2543
}
2644
}
2745

46+
let mut scan_state = None;
2847
if let Ok(dict_state) = container.getattr("__dict__")
2948
&& let Ok(dict_state) = dict_state.cast::<PyDict>()
3049
{
31-
mappings.push(dict_state)?;
50+
let split_state = split_dict_state(py, dict_state)?;
51+
if let Some(dict_state) = split_state.string_keys {
52+
mappings.push(&dict_state)?;
53+
}
54+
scan_state = split_state.non_string_keys;
3255
}
3356

3457
if let Some(slot_state) = extract_slot_state(py, container)? {
3558
mappings.push(&slot_state)?;
3659
}
3760

38-
let rebuild_state = mappings.finish();
39-
if container.hasattr("root")?
40-
&& serialized_state.is_some()
41-
&& let Some(rebuild_state) = rebuild_state.as_ref()
42-
&& rebuild_state.contains("root")?
43-
&& let Some(serialized) = serialized_state.as_ref()
44-
&& rebuild_state
45-
.get_item("root")?
46-
.is_some_and(|root| root.eq(serialized).unwrap_or(false))
47-
{
48-
return Ok(InspectedObjectState {
49-
rebuild_state: Some(rebuild_state.clone()),
50-
serialized_state: None,
51-
});
52-
}
53-
5461
Ok(InspectedObjectState {
55-
rebuild_state,
62+
rebuild_state: mappings.finish(),
5663
serialized_state,
64+
scan_state,
5765
})
5866
}
5967

@@ -126,7 +134,30 @@ pub fn apply_object_state(
126134
let builtins = py.import("builtins")?;
127135
let object_type = builtins.getattr("object")?;
128136
for (key, value) in state.iter() {
129-
set_attr_without_hooks(&object_type, target, &key.extract::<String>()?, &value)?;
137+
set_dict_or_attr_without_hooks(&object_type, target, &key.extract::<String>()?, &value)?;
138+
}
139+
Ok(())
140+
}
141+
142+
pub fn apply_extra_dict_state(
143+
py: Python<'_>,
144+
target: &Bound<'_, PyAny>,
145+
updates: &Bound<'_, PyDict>,
146+
) -> PyResult<()> {
147+
let dict = target.getattr("__dict__")?.cast_into::<PyDict>()?;
148+
let builtins = py.import("builtins")?;
149+
let object_type = builtins.getattr("object")?;
150+
for (key, value) in updates.iter() {
151+
if key.is_exact_instance_of::<PyString>() {
152+
set_dict_or_attr_without_hooks(
153+
&object_type,
154+
target,
155+
&key.extract::<String>()?,
156+
&value,
157+
)?;
158+
} else {
159+
dict.set_item(key, value)?;
160+
}
130161
}
131162
Ok(())
132163
}
@@ -148,6 +179,25 @@ fn set_attr_without_hooks(
148179
Ok(())
149180
}
150181

182+
fn set_dict_or_attr_without_hooks(
183+
object_type: &Bound<'_, PyAny>,
184+
target: &Bound<'_, PyAny>,
185+
name: &str,
186+
value: &Bound<'_, PyAny>,
187+
) -> PyResult<()> {
188+
if name.starts_with("__")
189+
&& name.ends_with("__")
190+
&& let Ok(dict) = target.getattr("__dict__")
191+
&& let Ok(dict) = dict.cast_into::<PyDict>()
192+
{
193+
// Keep dunder keys as inert instance-dict entries instead of routing
194+
// them through object.__setattr__ on a blank reconstructed object.
195+
dict.set_item(name, value)?;
196+
return Ok(());
197+
}
198+
set_attr_without_hooks(object_type, target, name, value)
199+
}
200+
151201
fn extract_slot_state<'py>(
152202
py: Python<'py>,
153203
container: &Bound<'py, PyAny>,
@@ -244,6 +294,53 @@ fn merge_state_into(target: &Bound<'_, PyDict>, source: &Bound<'_, PyDict>) -> P
244294
Ok(())
245295
}
246296

297+
pub fn dict_has_only_exact_string_keys(dict: &Bound<'_, PyDict>) -> bool {
298+
dict.iter()
299+
.all(|(key, _)| key.is_exact_instance_of::<PyString>())
300+
}
301+
302+
struct SplitDictState<'py> {
303+
string_keys: Option<Bound<'py, PyDict>>,
304+
non_string_keys: Option<Bound<'py, PyDict>>,
305+
}
306+
307+
fn split_dict_state<'py>(
308+
py: Python<'py>,
309+
dict: &Bound<'py, PyDict>,
310+
) -> PyResult<SplitDictState<'py>> {
311+
if dict_has_only_exact_string_keys(dict) {
312+
return Ok(SplitDictState {
313+
string_keys: Some(dict.clone()),
314+
non_string_keys: None,
315+
});
316+
}
317+
318+
let string_state = PyDict::new(py);
319+
let non_string_state = PyDict::new(py);
320+
for (key, value) in dict.iter() {
321+
if key.is_exact_instance_of::<PyString>() {
322+
string_state.set_item(key, value)?;
323+
} else {
324+
non_string_state.set_item(key, value)?;
325+
}
326+
}
327+
328+
let string_state = if string_state.is_empty() {
329+
None
330+
} else {
331+
Some(string_state)
332+
};
333+
let non_string_state = if non_string_state.is_empty() {
334+
None
335+
} else {
336+
Some(non_string_state)
337+
};
338+
Ok(SplitDictState {
339+
string_keys: string_state,
340+
non_string_keys: non_string_state,
341+
})
342+
}
343+
247344
struct MappingStateAccumulator<'py> {
248345
py: Python<'py>,
249346
state: Option<Bound<'py, PyDict>>,
@@ -338,7 +435,7 @@ class StateObject:
338435
}
339436

340437
#[test]
341-
fn inspect_root_model_uses_rebuild_state_only_when_root_matches_serialized_state() {
438+
fn inspect_root_model_exposes_rebuild_and_serialized_state() {
342439
Python::initialize();
343440
Python::attach(|py| -> PyResult<()> {
344441
let code = CString::new(
@@ -358,7 +455,7 @@ class RootObject:
358455
let inspected = inspect_object_state(py, &instance)?;
359456

360457
assert!(inspected.rebuild_state.is_some());
361-
assert!(inspected.serialized_state.is_none());
458+
assert!(inspected.serialized_state.is_some());
362459
Ok(())
363460
})
364461
.unwrap();
@@ -448,6 +545,43 @@ class KwargsOnly:
448545
.unwrap();
449546
}
450547

548+
#[test]
549+
fn apply_state_keeps_dunder_names_in_instance_dict() {
550+
Python::initialize();
551+
Python::attach(|py| -> PyResult<()> {
552+
let code = CString::new(
553+
r#"
554+
class DunderState:
555+
pass
556+
"#,
557+
)
558+
.unwrap();
559+
let module =
560+
PyModule::from_code(py, code.as_c_str(), c"dunder_test.py", c"dunder_test")?;
561+
let instance = module.getattr("DunderState")?.call0()?;
562+
let target = prepare_rebuild_target(py, &instance)?;
563+
let state = PyDict::new(py);
564+
state.set_item("__reduce__", "safe")?;
565+
state.set_item("value", "normal")?;
566+
567+
apply_object_state(py, &target, &state.into_any())?;
568+
569+
assert_eq!(target.getattr("value")?.extract::<String>()?, "normal");
570+
assert_eq!(
571+
target
572+
.getattr("__dict__")?
573+
.cast_into::<PyDict>()?
574+
.get_item("__reduce__")?
575+
.unwrap()
576+
.extract::<String>()?,
577+
"safe"
578+
);
579+
assert!(!target.getattr("__reduce__")?.is(&PyString::new(py, "safe")));
580+
Ok(())
581+
})
582+
.unwrap();
583+
}
584+
451585
#[test]
452586
fn append_slot_names_accepts_collection_shapes() {
453587
Python::initialize();

plugins/rust/python-package/secrets_detection/src/patterns.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,10 @@ pub static PATTERNS: LazyLock<HashMap<&'static str, Regex>> = LazyLock::new(|| {
3939
);
4040
patterns.insert(
4141
"slack_token",
42-
Regex::new(r"\bxox[abpqr]-[0-9A-Za-z\-]{10,48}\b").expect("valid slack_token regex"),
42+
// Slack documents token prefixes such as `xoxb-` and warns that token
43+
// lengths are not fixed:
44+
// https://docs.slack.dev/changelog/2016/08/23/token-lengthening/
45+
Regex::new(r"\bxox[abpqr]-[0-9A-Za-z\-]{10,80}\b").expect("valid slack_token regex"),
4346
);
4447
patterns.insert(
4548
"private_key_block",

0 commit comments

Comments
 (0)