11use pyo3:: prelude:: * ;
22use pyo3:: types:: { PyAny , PyDict , PyList , PyString } ;
3+ use std:: collections:: HashMap ;
34
45const MASKED_VALUE : & str = "******" ;
56const NESTED_TOO_DEEP : & str = "<nested too deep>" ;
@@ -43,48 +44,6 @@ fn has_non_sensitive_suffix(normalized_key: &str) -> bool {
4344 . any ( |suffix| normalized_key. ends_with ( suffix) )
4445}
4546
46- fn normalized_contains_sensitive_phrase ( normalized_key : & str ) -> bool {
47- const SINGLE_TOKENS : & [ & str ] = & [
48- "password" ,
49- "passphrase" ,
50- "secret" ,
51- "token" ,
52- "apikey" ,
53- "authorization" ,
54- ] ;
55- const DOUBLE_TOKENS : & [ & str ] = & [
56- "api_key" ,
57- "access_token" ,
58- "refresh_token" ,
59- "client_secret" ,
60- "auth_token" ,
61- "jwt_token" ,
62- "private_key" ,
63- ] ;
64-
65- let parts: Vec < & str > = normalized_key
66- . split ( '_' )
67- . filter ( |part| !part. is_empty ( ) )
68- . collect ( ) ;
69-
70- if parts. iter ( ) . any ( |part| SINGLE_TOKENS . contains ( part) ) {
71- return true ;
72- }
73-
74- if parts. len ( ) < 2 {
75- return false ;
76- }
77-
78- for window in parts. windows ( 2 ) {
79- let joined = format ! ( "{}_{}" , window[ 0 ] , window[ 1 ] ) ;
80- if DOUBLE_TOKENS . iter ( ) . any ( |candidate| * candidate == joined) {
81- return true ;
82- }
83- }
84-
85- false
86- }
87-
8847fn is_sensitive_key ( key : & str ) -> bool {
8948 let normalized_key = normalize_key_for_masking ( key) ;
9049 if normalized_key. is_empty ( ) {
@@ -124,7 +83,42 @@ fn is_sensitive_key(key: &str) -> bool {
12483 return false ;
12584 }
12685
127- normalized_contains_sensitive_phrase ( & normalized_key)
86+ let mut previous = "" ;
87+ for token in normalized_key. split ( '_' ) . filter ( |part| !part. is_empty ( ) ) {
88+ if matches ! (
89+ token,
90+ "password" | "passphrase" | "secret" | "token" | "apikey" | "authorization"
91+ ) {
92+ return true ;
93+ }
94+
95+ if matches ! (
96+ ( previous, token) ,
97+ ( "api" , "key" )
98+ | ( "access" , "token" )
99+ | ( "refresh" , "token" )
100+ | ( "client" , "secret" )
101+ | ( "auth" , "token" )
102+ | ( "jwt" , "token" )
103+ | ( "private" , "key" )
104+ ) {
105+ return true ;
106+ }
107+
108+ previous = token;
109+ }
110+
111+ false
112+ }
113+
114+ fn is_sensitive_key_cached ( key : & str , cache : & mut HashMap < String , bool > ) -> bool {
115+ if let Some ( result) = cache. get ( key) {
116+ return * result;
117+ }
118+
119+ let result = is_sensitive_key ( key) ;
120+ cache. insert ( key. to_owned ( ) , result) ;
121+ result
128122}
129123
130124fn mask_cookie_header ( cookie_header : & str ) -> String {
@@ -156,6 +150,7 @@ fn mask_sensitive_data_inner(
156150 py : Python < ' _ > ,
157151 data : & Bound < ' _ , PyAny > ,
158152 max_depth : i32 ,
153+ key_cache : & mut HashMap < String , bool > ,
159154) -> PyResult < Py < PyAny > > {
160155 if max_depth <= 0 {
161156 return Ok ( PyString :: new ( py, NESTED_TOO_DEEP ) . into_any ( ) . unbind ( ) ) ;
@@ -165,10 +160,13 @@ fn mask_sensitive_data_inner(
165160 let masked = PyDict :: new ( py) ;
166161 for ( key, value) in dict. iter ( ) {
167162 let key_string = key. str ( ) ?. to_string_lossy ( ) . into_owned ( ) ;
168- if is_sensitive_key ( & key_string) {
163+ if is_sensitive_key_cached ( & key_string, key_cache ) {
169164 masked. set_item ( key, MASKED_VALUE ) ?;
170165 } else {
171- masked. set_item ( key, mask_sensitive_data_inner ( py, & value, max_depth - 1 ) ?) ?;
166+ masked. set_item (
167+ key,
168+ mask_sensitive_data_inner ( py, & value, max_depth - 1 , key_cache) ?,
169+ ) ?;
172170 }
173171 }
174172 return Ok ( masked. into_any ( ) . unbind ( ) ) ;
@@ -177,7 +175,12 @@ fn mask_sensitive_data_inner(
177175 if let Ok ( list) = data. cast :: < PyList > ( ) {
178176 let masked = PyList :: empty ( py) ;
179177 for item in list. iter ( ) {
180- masked. append ( mask_sensitive_data_inner ( py, & item, max_depth - 1 ) ?) ?;
178+ masked. append ( mask_sensitive_data_inner (
179+ py,
180+ & item,
181+ max_depth - 1 ,
182+ key_cache,
183+ ) ?) ?;
181184 }
182185 return Ok ( masked. into_any ( ) . unbind ( ) ) ;
183186 }
@@ -191,17 +194,19 @@ fn mask_sensitive_data(
191194 data : & Bound < ' _ , PyAny > ,
192195 max_depth : Option < i32 > ,
193196) -> PyResult < Py < PyAny > > {
194- mask_sensitive_data_inner ( py, data, max_depth. unwrap_or ( 10 ) )
197+ let mut key_cache = HashMap :: new ( ) ;
198+ mask_sensitive_data_inner ( py, data, max_depth. unwrap_or ( 10 ) , & mut key_cache)
195199}
196200
197201#[ pyfunction]
198202fn mask_sensitive_headers ( py : Python < ' _ > , headers : & Bound < ' _ , PyAny > ) -> PyResult < Py < PyAny > > {
199203 let source = headers. cast :: < PyDict > ( ) ?;
200204 let masked = PyDict :: new ( py) ;
205+ let mut key_cache = HashMap :: with_capacity ( source. len ( ) ) ;
201206
202207 for ( key, value) in source. iter ( ) {
203208 let key_string = key. str ( ) ?. to_string_lossy ( ) . into_owned ( ) ;
204- if is_sensitive_key ( & key_string) {
209+ if is_sensitive_key_cached ( & key_string, & mut key_cache ) {
205210 masked. set_item ( key, MASKED_VALUE ) ?;
206211 continue ;
207212 }
0 commit comments