Skip to content

Commit 413a5fd

Browse files
committed
perf: cache key sensitivity in rust masking sidecar
Signed-off-by: lucarlig <luca.carlig@ibm.com>
1 parent 21b1dc3 commit 413a5fd

1 file changed

Lines changed: 53 additions & 48 deletions

File tree

  • tools_rust/request_logging_masking_sidecar/src

tools_rust/request_logging_masking_sidecar/src/lib.rs

Lines changed: 53 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
use pyo3::prelude::*;
22
use pyo3::types::{PyAny, PyDict, PyList, PyString};
3+
use std::collections::HashMap;
34

45
const MASKED_VALUE: &str = "******";
56
const NESTED_TOO_DEEP: &str = "<nested too deep>";
@@ -43,48 +44,6 @@ fn has_non_sensitive_suffix(normalized_key: &str) -> bool {
4344
.any(|suffix| normalized_key.ends_with(suffix))
4445
}
4546

46-
fn normalized_contains_sensitive_phrase(normalized_key: &str) -> bool {
47-
const SINGLE_TOKENS: &[&str] = &[
48-
"password",
49-
"passphrase",
50-
"secret",
51-
"token",
52-
"apikey",
53-
"authorization",
54-
];
55-
const DOUBLE_TOKENS: &[&str] = &[
56-
"api_key",
57-
"access_token",
58-
"refresh_token",
59-
"client_secret",
60-
"auth_token",
61-
"jwt_token",
62-
"private_key",
63-
];
64-
65-
let parts: Vec<&str> = normalized_key
66-
.split('_')
67-
.filter(|part| !part.is_empty())
68-
.collect();
69-
70-
if parts.iter().any(|part| SINGLE_TOKENS.contains(part)) {
71-
return true;
72-
}
73-
74-
if parts.len() < 2 {
75-
return false;
76-
}
77-
78-
for window in parts.windows(2) {
79-
let joined = format!("{}_{}", window[0], window[1]);
80-
if DOUBLE_TOKENS.iter().any(|candidate| *candidate == joined) {
81-
return true;
82-
}
83-
}
84-
85-
false
86-
}
87-
8847
fn is_sensitive_key(key: &str) -> bool {
8948
let normalized_key = normalize_key_for_masking(key);
9049
if normalized_key.is_empty() {
@@ -124,7 +83,42 @@ fn is_sensitive_key(key: &str) -> bool {
12483
return false;
12584
}
12685

127-
normalized_contains_sensitive_phrase(&normalized_key)
86+
let mut previous = "";
87+
for token in normalized_key.split('_').filter(|part| !part.is_empty()) {
88+
if matches!(
89+
token,
90+
"password" | "passphrase" | "secret" | "token" | "apikey" | "authorization"
91+
) {
92+
return true;
93+
}
94+
95+
if matches!(
96+
(previous, token),
97+
("api", "key")
98+
| ("access", "token")
99+
| ("refresh", "token")
100+
| ("client", "secret")
101+
| ("auth", "token")
102+
| ("jwt", "token")
103+
| ("private", "key")
104+
) {
105+
return true;
106+
}
107+
108+
previous = token;
109+
}
110+
111+
false
112+
}
113+
114+
fn is_sensitive_key_cached(key: &str, cache: &mut HashMap<String, bool>) -> bool {
115+
if let Some(result) = cache.get(key) {
116+
return *result;
117+
}
118+
119+
let result = is_sensitive_key(key);
120+
cache.insert(key.to_owned(), result);
121+
result
128122
}
129123

130124
fn mask_cookie_header(cookie_header: &str) -> String {
@@ -156,6 +150,7 @@ fn mask_sensitive_data_inner(
156150
py: Python<'_>,
157151
data: &Bound<'_, PyAny>,
158152
max_depth: i32,
153+
key_cache: &mut HashMap<String, bool>,
159154
) -> PyResult<Py<PyAny>> {
160155
if max_depth <= 0 {
161156
return Ok(PyString::new(py, NESTED_TOO_DEEP).into_any().unbind());
@@ -165,10 +160,13 @@ fn mask_sensitive_data_inner(
165160
let masked = PyDict::new(py);
166161
for (key, value) in dict.iter() {
167162
let key_string = key.str()?.to_string_lossy().into_owned();
168-
if is_sensitive_key(&key_string) {
163+
if is_sensitive_key_cached(&key_string, key_cache) {
169164
masked.set_item(key, MASKED_VALUE)?;
170165
} else {
171-
masked.set_item(key, mask_sensitive_data_inner(py, &value, max_depth - 1)?)?;
166+
masked.set_item(
167+
key,
168+
mask_sensitive_data_inner(py, &value, max_depth - 1, key_cache)?,
169+
)?;
172170
}
173171
}
174172
return Ok(masked.into_any().unbind());
@@ -177,7 +175,12 @@ fn mask_sensitive_data_inner(
177175
if let Ok(list) = data.cast::<PyList>() {
178176
let masked = PyList::empty(py);
179177
for item in list.iter() {
180-
masked.append(mask_sensitive_data_inner(py, &item, max_depth - 1)?)?;
178+
masked.append(mask_sensitive_data_inner(
179+
py,
180+
&item,
181+
max_depth - 1,
182+
key_cache,
183+
)?)?;
181184
}
182185
return Ok(masked.into_any().unbind());
183186
}
@@ -191,17 +194,19 @@ fn mask_sensitive_data(
191194
data: &Bound<'_, PyAny>,
192195
max_depth: Option<i32>,
193196
) -> PyResult<Py<PyAny>> {
194-
mask_sensitive_data_inner(py, data, max_depth.unwrap_or(10))
197+
let mut key_cache = HashMap::new();
198+
mask_sensitive_data_inner(py, data, max_depth.unwrap_or(10), &mut key_cache)
195199
}
196200

197201
#[pyfunction]
198202
fn mask_sensitive_headers(py: Python<'_>, headers: &Bound<'_, PyAny>) -> PyResult<Py<PyAny>> {
199203
let source = headers.cast::<PyDict>()?;
200204
let masked = PyDict::new(py);
205+
let mut key_cache = HashMap::with_capacity(source.len());
201206

202207
for (key, value) in source.iter() {
203208
let key_string = key.str()?.to_string_lossy().into_owned();
204-
if is_sensitive_key(&key_string) {
209+
if is_sensitive_key_cached(&key_string, &mut key_cache) {
205210
masked.set_item(key, MASKED_VALUE)?;
206211
continue;
207212
}

0 commit comments

Comments
 (0)