Skip to content

Commit f4c9cdf

Browse files
fix: bound native picklescan state simulation (#1501)
* fix: bound picklescan tracked state * fix: harden picklescan tracked state bounds * fix: preserve bounded picklescan coverage * fix: bound memo read stack materialization * fix: account for duplicated memoized values * fix: preserve bounded picklescan lookup semantics
1 parent 6d1ba2e commit f4c9cdf

3 files changed

Lines changed: 1741 additions & 215 deletions

File tree

packages/modelaudit-picklescan/CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,8 @@ and this package adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
5252
### Bug Fixes
5353

5454
- detect dynamic `type()` namespaces that can hide callable protocol hooks
55+
- bound native pickle state simulation for tracked dictionaries, memo clones,
56+
dotted globals, and recursive mappings
5557
- fail closed when encoded nested-pickle probe candidates exhaust the analysis cap
5658
- prevent call-graph alias cycles from hanging scans
5759
- detect nested brace-format lookups that reach tracked `defaultdict` factories

packages/modelaudit-picklescan/rust/src/policy.rs

Lines changed: 86 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,16 @@
1+
const MAX_DOTTED_GLOBAL_BYTES: usize = 4096;
2+
const MAX_DOTTED_GLOBAL_SEGMENTS: usize = 64;
3+
14
pub(crate) fn global_severity(module: &str, name: &str) -> Option<&'static str> {
2-
direct_global_severity(module, name).or_else(|| dotted_global_tail_severity(name))
5+
let direct = direct_global_severity(module, name);
6+
if direct == Some("critical") {
7+
return direct;
8+
}
9+
let tail = dotted_global_tail_severity(name);
10+
if tail == Some("critical") {
11+
return tail;
12+
}
13+
direct.or(tail)
314
}
415

516
pub(crate) fn callable_severity(module: &str, name: &str) -> Option<&'static str> {
@@ -21,6 +32,7 @@ fn direct_global_severity(module: &str, name: &str) -> Option<&'static str> {
2132
direct_global_severity_without_wrapper_alias(module, name)
2233
.or_else(|| wrapper_global_alias_severity(module, name))
2334
.or_else(|| dangerous_global_prefix_severity(module, name))
35+
.or_else(|| dotted_global_policy_budget_exceeded(name).then_some("warning"))
2436
}
2537

2638
fn wrapper_global_alias_severity(module: &str, name: &str) -> Option<&'static str> {
@@ -44,26 +56,26 @@ fn strip_wrapper_global_suffix(name: &str) -> Option<&str> {
4456
}
4557

4658
fn dangerous_global_prefix_severity(module: &str, name: &str) -> Option<&'static str> {
47-
let parts = name.split('.').collect::<Vec<_>>();
48-
for split in 1..parts.len() {
49-
if is_inert_global_metadata_tail(&parts[split..]) {
59+
for (segments_seen, (split, _)) in name.match_indices('.').enumerate() {
60+
if segments_seen >= MAX_DOTTED_GLOBAL_SEGMENTS || split > MAX_DOTTED_GLOBAL_BYTES {
61+
break;
62+
}
63+
let tail = &name[split + 1..];
64+
if is_inert_global_metadata_tail(tail) {
5065
continue;
5166
}
52-
let candidate = parts[..split].join(".");
53-
if let Some(severity) = direct_global_severity_without_wrapper_alias(module, &candidate)
54-
.or_else(|| wrapper_global_alias_severity(module, &candidate))
67+
let candidate = &name[..split];
68+
if let Some(severity) = direct_global_severity_without_wrapper_alias(module, candidate)
69+
.or_else(|| wrapper_global_alias_severity(module, candidate))
5570
{
5671
return Some(severity);
5772
}
5873
}
5974
None
6075
}
6176

62-
fn is_inert_global_metadata_tail(parts: &[&str]) -> bool {
63-
matches!(
64-
parts,
65-
["__doc__"] | ["__name__"] | ["__module__"] | ["__qualname__"]
66-
)
77+
fn is_inert_global_metadata_tail(tail: &str) -> bool {
78+
matches!(tail, "__doc__" | "__name__" | "__module__" | "__qualname__")
6779
}
6880

6981
fn direct_global_severity_without_wrapper_alias(module: &str, name: &str) -> Option<&'static str> {
@@ -118,17 +130,46 @@ fn dotted_global_tail_severity(name: &str) -> Option<&'static str> {
118130
return None;
119131
}
120132

121-
let parts = name.split('.').collect::<Vec<_>>();
133+
let parts = bounded_leading_dotted_parts(name);
134+
let mut warning = None;
122135
for start in 0..parts.len().saturating_sub(1) {
123136
for split in start + 1..parts.len() {
124137
let candidate_module = parts[start..split].join(".");
125138
let candidate_name = parts[split..].join(".");
126139
if let Some(severity) = direct_global_severity(&candidate_module, &candidate_name) {
127-
return Some(severity);
140+
if severity == "critical" {
141+
return Some(severity);
142+
}
143+
warning = Some(severity);
128144
}
129145
}
130146
}
131-
None
147+
warning.or_else(|| dotted_global_policy_budget_exceeded(name).then_some("warning"))
148+
}
149+
150+
fn dotted_global_policy_budget_exceeded(name: &str) -> bool {
151+
name.len() > MAX_DOTTED_GLOBAL_BYTES
152+
|| name.split('.').take(MAX_DOTTED_GLOBAL_SEGMENTS + 1).count() > MAX_DOTTED_GLOBAL_SEGMENTS
153+
}
154+
155+
fn bounded_leading_dotted_parts(name: &str) -> Vec<&str> {
156+
let mut parts = Vec::new();
157+
let mut bytes: usize = 0;
158+
for part in name.split('.') {
159+
let separator_bytes = usize::from(!parts.is_empty());
160+
let Some(next_bytes) = bytes
161+
.checked_add(separator_bytes)
162+
.and_then(|total| total.checked_add(part.len()))
163+
else {
164+
break;
165+
};
166+
if parts.len() >= MAX_DOTTED_GLOBAL_SEGMENTS || next_bytes > MAX_DOTTED_GLOBAL_BYTES {
167+
break;
168+
}
169+
parts.push(part);
170+
bytes = next_bytes;
171+
}
172+
parts
132173
}
133174

134175
#[derive(Clone, Copy)]
@@ -1145,6 +1186,36 @@ mod tests {
11451186
assert_eq!(global_severity("linecache", "clearcache"), None);
11461187
}
11471188

1189+
#[test]
1190+
fn dotted_global_lookup_is_segment_bounded() {
1191+
let long_dotted_name = (0..(MAX_DOTTED_GLOBAL_SEGMENTS + 4))
1192+
.map(|index| format!("part{index}"))
1193+
.collect::<Vec<_>>()
1194+
.join(".");
1195+
1196+
assert_eq!(global_severity("safe", &long_dotted_name), Some("warning"));
1197+
assert_eq!(
1198+
global_severity("os", &format!("system.{long_dotted_name}")),
1199+
Some("critical")
1200+
);
1201+
let long_dotted_tail = format!("pkg.os.system.{long_dotted_name}");
1202+
let bounded_tail = bounded_leading_dotted_parts(&long_dotted_tail);
1203+
assert_eq!(&bounded_tail[..3], &["pkg", "os", "system"]);
1204+
assert_eq!(
1205+
direct_global_severity("os", &bounded_tail[2..].join(".")),
1206+
Some("critical")
1207+
);
1208+
assert_eq!(
1209+
global_severity("safe", &format!("pkg.os.system.{long_dotted_name}")),
1210+
Some("critical")
1211+
);
1212+
assert_eq!(
1213+
global_severity("safe", "wrapper.os.system"),
1214+
Some("critical")
1215+
);
1216+
assert_eq!(global_severity("safe", "wrapper.custom.loader"), None);
1217+
}
1218+
11481219
#[test]
11491220
fn pathlib_filesystem_access_methods_are_dangerous_without_wildcarding_pathlib() {
11501221
assert_eq!(

0 commit comments

Comments
 (0)