|
5 | 5 |
|
6 | 6 | use log::{debug, warn}; |
7 | 7 | use pyo3::prelude::*; |
8 | | -use pyo3::types::{PyAny, PyDict, PyList, PySet, PyString, PyTuple}; |
| 8 | +use pyo3::types::{PyAny, PyDict, PyList, PyMapping, PySet, PyString, PyTuple}; |
9 | 9 | use pyo3_stub_gen::derive::*; |
10 | 10 | use std::collections::HashMap; |
11 | 11 |
|
@@ -357,24 +357,28 @@ impl PIIDetectorRust { |
357 | 357 | } |
358 | 358 | } |
359 | 359 |
|
360 | | - // Handle dictionaries |
361 | | - if let Ok(dict) = data.cast::<PyDict>() { |
362 | | - let mut entries: Vec<(Py<PyAny>, Py<PyAny>)> = Vec::with_capacity(dict.len()); |
| 360 | + // Handle mappings through the Python protocol. CPEX isolation wraps |
| 361 | + // dicts in copy-on-write dict subclasses whose visible entries are not |
| 362 | + // stored in the underlying PyDict table. |
| 363 | + if let Ok(mapping) = data.cast::<PyMapping>() { |
| 364 | + let mapping_len = mapping.len()?; |
| 365 | + let mut entries: Vec<(Py<PyAny>, Py<PyAny>)> = Vec::with_capacity(mapping_len); |
363 | 366 | let mut all_detections = HashMap::new(); |
364 | | - if dict.len() > self.config.max_collection_items { |
| 367 | + if mapping_len > self.config.max_collection_items { |
365 | 368 | warn!( |
366 | 369 | "Rejected nested mapping at path '{}' because size {} exceeds max {}", |
367 | | - path, |
368 | | - dict.len(), |
369 | | - self.config.max_collection_items |
| 370 | + path, mapping_len, self.config.max_collection_items |
370 | 371 | ); |
371 | 372 | return Err(PyErr::new::<pyo3::exceptions::PyValueError, _>(format!( |
372 | 373 | "Nested mapping exceeds maximum size of {} items", |
373 | 374 | self.config.max_collection_items |
374 | 375 | ))); |
375 | 376 | } |
376 | 377 |
|
377 | | - for (key, value) in dict.iter() { |
| 378 | + for item in mapping.items()?.iter() { |
| 379 | + let item = item.cast::<PyTuple>()?; |
| 380 | + let key = item.get_item(0)?; |
| 381 | + let value = item.get_item(1)?; |
378 | 382 | let key_str = key.str()?.to_string_lossy().into_owned(); |
379 | 383 | let new_path = if path.is_empty() { |
380 | 384 | key_str.clone() |
@@ -1659,6 +1663,57 @@ class ConfigModel: |
1659 | 1663 | }); |
1660 | 1664 | } |
1661 | 1665 |
|
| 1666 | + #[test] |
| 1667 | + fn test_process_nested_mapping_allows_collection_limit_boundary() { |
| 1668 | + Python::initialize(); |
| 1669 | + Python::attach(|py| { |
| 1670 | + let config = PyDict::new(py); |
| 1671 | + config.set_item("detect_email", true).unwrap(); |
| 1672 | + config.set_item("max_collection_items", 1).unwrap(); |
| 1673 | + |
| 1674 | + let detector = PIIDetectorRust::new(&config.into_any()).unwrap(); |
| 1675 | + let data = PyDict::new(py); |
| 1676 | + data.set_item("email", "alice@example.com").unwrap(); |
| 1677 | + |
| 1678 | + let (modified, new_data, _) = |
| 1679 | + detector.process_nested(py, &data.into_any(), "").unwrap(); |
| 1680 | + |
| 1681 | + assert!(modified); |
| 1682 | + assert_eq!( |
| 1683 | + new_data |
| 1684 | + .bind(py) |
| 1685 | + .cast::<PyDict>() |
| 1686 | + .unwrap() |
| 1687 | + .get_item("email") |
| 1688 | + .unwrap() |
| 1689 | + .unwrap() |
| 1690 | + .extract::<String>() |
| 1691 | + .unwrap(), |
| 1692 | + "[REDACTED]" |
| 1693 | + ); |
| 1694 | + }); |
| 1695 | + } |
| 1696 | + |
| 1697 | + #[test] |
| 1698 | + fn test_process_nested_mapping_rejects_over_collection_limit() { |
| 1699 | + Python::initialize(); |
| 1700 | + Python::attach(|py| { |
| 1701 | + let config = PyDict::new(py); |
| 1702 | + config.set_item("detect_email", true).unwrap(); |
| 1703 | + config.set_item("max_collection_items", 1).unwrap(); |
| 1704 | + |
| 1705 | + let detector = PIIDetectorRust::new(&config.into_any()).unwrap(); |
| 1706 | + let data = PyDict::new(py); |
| 1707 | + data.set_item("first", "alice@example.com").unwrap(); |
| 1708 | + data.set_item("second", "bob@example.com").unwrap(); |
| 1709 | + |
| 1710 | + let err = detector |
| 1711 | + .process_nested(py, &data.into_any(), "") |
| 1712 | + .unwrap_err(); |
| 1713 | + assert!(err.is_instance_of::<pyo3::exceptions::PyValueError>(py)); |
| 1714 | + }); |
| 1715 | + } |
| 1716 | + |
1662 | 1717 | #[test] |
1663 | 1718 | fn test_detects_plus_prefixed_international_phone_number() { |
1664 | 1719 | let config = PIIConfig { |
|
0 commit comments