@@ -33,7 +33,8 @@ fn scan_container_inner<'py>(
3333) -> PyResult < ( usize , Bound < ' py , PyAny > , Bound < ' py , PyList > ) > {
3434 let findings = PyList :: empty ( py) ;
3535
36- if let Ok ( text) = container. extract :: < String > ( ) {
36+ if container. is_exact_instance_of :: < PyString > ( ) {
37+ let text = container. extract :: < String > ( ) ?;
3738 let ( matches, redacted) = detect_and_redact ( & text, config) ;
3839 for finding in & matches {
3940 let finding_dict = PyDict :: new ( py) ;
@@ -137,7 +138,7 @@ fn scan_container_inner<'py>(
137138 for finding in child_findings. iter ( ) {
138139 findings. append ( finding) ?;
139140 }
140- if count > 0 || !redacted_state . eq ( & state_any) ? {
141+ if count > 0 || !same_safe_value ( & redacted_state , & state_any, & mut HashSet :: new ( ) ) ? {
141142 apply_object_state ( py, & target, & redacted_state) ?;
142143 rebuilt = Some ( target. into_any ( ) ) ;
143144 }
@@ -245,15 +246,19 @@ fn serialized_dict_duplicates_rebuild_state(
245246 let Some ( rebuild_value) = rebuild_dict. get_item ( & key) ? else {
246247 return Ok ( false ) ;
247248 } ;
248- if !same_safe_value ( & serialized_value, & rebuild_value) ? {
249+ if !same_safe_value ( & serialized_value, & rebuild_value, & mut HashSet :: new ( ) ) ? {
249250 return Ok ( false ) ;
250251 }
251252 }
252253
253254 Ok ( true )
254255}
255256
256- fn same_safe_value ( left : & Bound < ' _ , PyAny > , right : & Bound < ' _ , PyAny > ) -> PyResult < bool > {
257+ fn same_safe_value (
258+ left : & Bound < ' _ , PyAny > ,
259+ right : & Bound < ' _ , PyAny > ,
260+ seen : & mut HashSet < ( usize , usize ) > ,
261+ ) -> PyResult < bool > {
257262 if left. is ( right) {
258263 return Ok ( true ) ;
259264 }
@@ -262,31 +267,44 @@ fn same_safe_value(left: &Bound<'_, PyAny>, right: &Bound<'_, PyAny>) -> PyResul
262267 return Ok ( left. extract :: < String > ( ) ? == right. extract :: < String > ( ) ?) ;
263268 }
264269
270+ if is_exact_safe_scalar_pair ( left, right) {
271+ return left. eq ( right) ;
272+ }
273+
265274 if let ( Ok ( left_list) , Ok ( right_list) ) = ( left. cast :: < PyList > ( ) , right. cast :: < PyList > ( ) ) {
275+ if !seen. insert ( ( left. as_ptr ( ) as usize , right. as_ptr ( ) as usize ) ) {
276+ return Ok ( false ) ;
277+ }
266278 if left_list. len ( ) != right_list. len ( ) {
267279 return Ok ( false ) ;
268280 }
269281 for ( left_item, right_item) in left_list. iter ( ) . zip ( right_list. iter ( ) ) {
270- if !same_safe_value ( & left_item, & right_item) ? {
282+ if !same_safe_value ( & left_item, & right_item, seen ) ? {
271283 return Ok ( false ) ;
272284 }
273285 }
274286 return Ok ( true ) ;
275287 }
276288
277289 if let ( Ok ( left_tuple) , Ok ( right_tuple) ) = ( left. cast :: < PyTuple > ( ) , right. cast :: < PyTuple > ( ) ) {
290+ if !seen. insert ( ( left. as_ptr ( ) as usize , right. as_ptr ( ) as usize ) ) {
291+ return Ok ( false ) ;
292+ }
278293 if left_tuple. len ( ) != right_tuple. len ( ) {
279294 return Ok ( false ) ;
280295 }
281296 for ( left_item, right_item) in left_tuple. iter ( ) . zip ( right_tuple. iter ( ) ) {
282- if !same_safe_value ( & left_item, & right_item) ? {
297+ if !same_safe_value ( & left_item, & right_item, seen ) ? {
283298 return Ok ( false ) ;
284299 }
285300 }
286301 return Ok ( true ) ;
287302 }
288303
289304 if let ( Ok ( left_dict) , Ok ( right_dict) ) = ( left. cast :: < PyDict > ( ) , right. cast :: < PyDict > ( ) ) {
305+ if !seen. insert ( ( left. as_ptr ( ) as usize , right. as_ptr ( ) as usize ) ) {
306+ return Ok ( false ) ;
307+ }
290308 if left_dict. len ( ) != right_dict. len ( )
291309 || !dict_has_only_exact_string_keys ( left_dict)
292310 || !dict_has_only_exact_string_keys ( right_dict)
@@ -297,7 +315,7 @@ fn same_safe_value(left: &Bound<'_, PyAny>, right: &Bound<'_, PyAny>) -> PyResul
297315 let Some ( right_value) = right_dict. get_item ( & key) ? else {
298316 return Ok ( false ) ;
299317 } ;
300- if !same_safe_value ( & left_value, & right_value) ? {
318+ if !same_safe_value ( & left_value, & right_value, seen ) ? {
301319 return Ok ( false ) ;
302320 }
303321 }
@@ -312,6 +330,36 @@ fn dict_has_only_exact_string_keys(dict: &Bound<'_, PyDict>) -> bool {
312330 . all ( |( key, _) | key. is_exact_instance_of :: < PyString > ( ) )
313331}
314332
333+ fn is_exact_safe_scalar_pair ( left : & Bound < ' _ , PyAny > , right : & Bound < ' _ , PyAny > ) -> bool {
334+ let left_module = left
335+ . get_type ( )
336+ . module ( )
337+ . and_then ( |module| module. extract :: < String > ( ) ) ;
338+ let left_name = left
339+ . get_type ( )
340+ . name ( )
341+ . and_then ( |name| name. extract :: < String > ( ) ) ;
342+ let right_module = right
343+ . get_type ( )
344+ . module ( )
345+ . and_then ( |module| module. extract :: < String > ( ) ) ;
346+ let right_name = right
347+ . get_type ( )
348+ . name ( )
349+ . and_then ( |name| name. extract :: < String > ( ) ) ;
350+ match (
351+ left_module. as_deref ( ) ,
352+ left_name. as_deref ( ) ,
353+ right_module. as_deref ( ) ,
354+ right_name. as_deref ( ) ,
355+ ) {
356+ ( Ok ( "builtins" ) , Ok ( left_name) , Ok ( "builtins" ) , Ok ( right_name) ) => {
357+ left_name == right_name && matches ! ( left_name, "int" | "float" | "bool" | "bytes" )
358+ }
359+ _ => false ,
360+ }
361+ }
362+
315363fn serialized_result < ' py > (
316364 py : Python < ' py > ,
317365 container : & Bound < ' py , PyAny > ,
@@ -525,6 +573,70 @@ class Model:
525573 . unwrap ( ) ;
526574 }
527575
576+ #[ test]
577+ fn scan_container_does_not_double_count_with_copied_model_dump_scalar ( ) {
578+ Python :: initialize ( ) ;
579+ Python :: attach ( |py| -> PyResult < ( ) > {
580+ let code = CString :: new (
581+ r#"
582+ class Model:
583+ def __init__(self):
584+ self.text = "AWS_ACCESS_KEY_ID=AKIAFAKE12345EXAMPLE"
585+ self.num = int("1000000000000000000000")
586+
587+ def model_dump(self):
588+ return {"text": self.text, "num": int(str(self.num))}
589+ "# ,
590+ )
591+ . unwrap ( ) ;
592+ let module =
593+ PyModule :: from_code ( py, code. as_c_str ( ) , c"test_module.py" , c"test_module" ) ?;
594+ let instance = module. getattr ( "Model" ) ?. call0 ( ) ?;
595+ let config = SecretsDetectionConfig :: default ( ) ;
596+
597+ let ( count, _, findings) = scan_container ( py, & instance, & config) ?;
598+
599+ assert_eq ! ( count, 1 ) ;
600+ assert_eq ! ( findings. len( ) , 1 ) ;
601+
602+ Ok ( ( ) )
603+ } )
604+ . unwrap ( ) ;
605+ }
606+
607+ #[ test]
608+ fn scan_container_scans_cyclic_model_dump_without_duplicate_gate_recursion ( ) {
609+ Python :: initialize ( ) ;
610+ Python :: attach ( |py| -> PyResult < ( ) > {
611+ let code = CString :: new (
612+ r#"
613+ class Model:
614+ def __init__(self):
615+ self.items = []
616+ self.items.append(self.items)
617+
618+ def model_dump(self):
619+ items = []
620+ items.append(items)
621+ return {"items": items}
622+ "# ,
623+ )
624+ . unwrap ( ) ;
625+ let module =
626+ PyModule :: from_code ( py, code. as_c_str ( ) , c"test_module.py" , c"test_module" ) ?;
627+ let instance = module. getattr ( "Model" ) ?. call0 ( ) ?;
628+ let config = SecretsDetectionConfig :: default ( ) ;
629+
630+ let ( count, _, findings) = scan_container ( py, & instance, & config) ?;
631+
632+ assert_eq ! ( count, 0 ) ;
633+ assert_eq ! ( findings. len( ) , 0 ) ;
634+
635+ Ok ( ( ) )
636+ } )
637+ . unwrap ( ) ;
638+ }
639+
528640 #[ test]
529641 fn duplicate_gate_ignores_non_string_model_dump_keys_without_lookup ( ) {
530642 Python :: initialize ( ) ;
@@ -533,7 +645,7 @@ class Model:
533645 r#"
534646class BadKey:
535647 def __hash__(self):
536- return hash("text ")
648+ return hash("bad-key ")
537649
538650 def __eq__(self, other):
539651 raise RuntimeError("duplicate gate should not compare custom keys")
@@ -557,4 +669,41 @@ class BadKey:
557669 } )
558670 . unwrap ( ) ;
559671 }
672+
673+ #[ test]
674+ fn scan_container_ignores_duplicate_gate_for_non_string_model_dump_keys ( ) {
675+ Python :: initialize ( ) ;
676+ Python :: attach ( |py| -> PyResult < ( ) > {
677+ let code = CString :: new (
678+ r#"
679+ class BadKey:
680+ def __hash__(self):
681+ return hash("bad-key")
682+
683+ def __eq__(self, other):
684+ raise RuntimeError("duplicate gate should not compare custom keys")
685+
686+ class Model:
687+ def __init__(self):
688+ self.text = "clean"
689+
690+ def model_dump(self):
691+ return {BadKey(): "clean"}
692+ "# ,
693+ )
694+ . unwrap ( ) ;
695+ let module =
696+ PyModule :: from_code ( py, code. as_c_str ( ) , c"test_module.py" , c"test_module" ) ?;
697+ let instance = module. getattr ( "Model" ) ?. call0 ( ) ?;
698+ let config = SecretsDetectionConfig :: default ( ) ;
699+
700+ let ( count, _, findings) = scan_container ( py, & instance, & config) ?;
701+
702+ assert_eq ! ( count, 0 ) ;
703+ assert_eq ! ( findings. len( ) , 0 ) ;
704+
705+ Ok ( ( ) )
706+ } )
707+ . unwrap ( ) ;
708+ }
560709}
0 commit comments