Skip to content

Commit db6b04e

Browse files
authored
fix(query): harden connection info masking in logs and error messages (#19889)
* fix(query): harden connection info masking in logs and error messages - Improve regex patterns in mask_connection_info to handle quoted values with embedded parentheses and SQL-escaped quotes - Replace overly broad TOKEN key with specific keys (SECURITY_TOKEN, SESSION_TOKEN, SECRET_ID, SECRET_KEY) to avoid false positives - Add AST-level masking for CreateConnection and AlterTable(ModifyConnection) - Fix raw SQL leak in execute_state.rs error context - Mask SQL in http_query 'Creating new query' log line - Mask SQL in mysql_interactive_worker log and error suffix - Apply mask_connection_info in attach_query_str (visible in SHOW PROCESSLIST) - Add 17 unit tests covering all masking scenarios and false-positive checks * fix(query): add AWS secret aliases to connection info masker Add AWS_SECRET_KEY, AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, AWS_TOKEN, and AWS_SESSION_TOKEN to the bare key masking regex. These are accepted aliases in the parser/binder (parse_s3_params, operator.rs) and were previously leaking in logs when used outside a CONNECTION=(...) block. Added 4 tests covering the new aliases. * fix(query): handle backslash-escaped quotes in connection masking regex The quoted-string pattern now recognizes both '' (SQL doubled quotes) and \' (backslash-escaped quotes) as valid escape sequences inside string literals. This prevents partial masking when connection params contain backslash escapes like PASSWORD = 'abc\'tail'. Added 3 tests for backslash escape scenarios (24 total). * fix(query): add GCS CREDENTIAL key and double-quoted literal support to masker - Add CREDENTIAL to the bare-key allowlist (used by GCS parse_gcs_params) - Support double-quoted string literals (MySQL/Hive dialects) in both CONNECTION block and bare-key regexes - Added 4 tests for GCS credential and double-quoted scenarios (28 total) * fix(query): resolve clippy let_and_return warning in mask_connection_info * chore: retrigger CI * fix(query): show first/last 2 chars in masked connection secrets Replace full masking (***) with partial masking (ak***23) to aid debugging while still protecting secrets. Remove block-level CONNECTION regex in favor of unified key-value matching. * fix(query): fix typos CI failure and update integration tests for new masking - Change test value to avoid 'ue' typos false positive - Update integration tests to match new partial masking behavior - Add TOKEN to secret key list for HuggingFace credentials * fix(query): revert TOKEN from secret key list to avoid false positives TOKEN is too generic (matches WHERE token = 'abc'). HuggingFace tokens are still masked at the AST level via CreateConnection option masking. * fix(test): update test expectations for new mask_string format (first2+last2) * fix(query): add EXTERNAL_ID, AWS_EXTERNAL_ID, DELEGATION to connection masker
1 parent 0a7bb4f commit db6b04e

16 files changed

Lines changed: 419 additions & 89 deletions

File tree

src/common/base/src/base/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ pub use string::convert_number_size;
5050
pub use string::escape_for_key;
5151
pub use string::format_byte_size;
5252
pub use string::mask_connection_info;
53+
pub use string::mask_partial;
5354
pub use string::mask_string;
5455
pub use string::short_sql;
5556
pub use string::unescape_for_key;

src/common/base/src/base/string.rs

Lines changed: 298 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
// limitations under the License.
1414

1515
use std::string::FromUtf8Error;
16+
use std::sync::LazyLock;
1617

1718
use databend_common_exception::ErrorCode;
1819
use databend_common_exception::Result;
@@ -99,13 +100,14 @@ pub fn unescape_for_key(key: &str) -> std::result::Result<String, FromUtf8Error>
99100

100101
/// Mask a string by "******", but keep `unmask_len` of suffix.
101102
#[inline]
102-
pub fn mask_string(s: &str, unmask_len: usize) -> String {
103-
if s.len() <= unmask_len {
104-
s.to_string()
103+
pub fn mask_string(s: &str, _unmask_len: usize) -> String {
104+
let chars: Vec<char> = s.chars().collect();
105+
if chars.len() <= 4 {
106+
"***".to_string()
105107
} else {
106-
let mut ret = "******".to_string();
107-
ret.push_str(&s[(s.len() - unmask_len)..]);
108-
ret
108+
let head: String = chars[..2].iter().collect();
109+
let tail: String = chars[chars.len() - 2..].iter().collect();
110+
format!("{}***{}", head, tail)
109111
}
110112
}
111113

@@ -175,19 +177,52 @@ pub fn convert_number_size(num: f64) -> String {
175177
format!("{}{}{}", negative, pretty_bytes, unit)
176178
}
177179

180+
/// Mask a secret value, showing first 2 and last 2 characters.
181+
/// For values with 4 or fewer characters, fully mask.
182+
pub fn mask_partial(s: &str) -> String {
183+
let chars: Vec<char> = s.chars().collect();
184+
if chars.len() <= 4 {
185+
"***".to_string()
186+
} else {
187+
let head: String = chars[..2].iter().collect();
188+
let tail: String = chars[chars.len() - 2..].iter().collect();
189+
format!("{}***{}", head, tail)
190+
}
191+
}
192+
178193
/// Mask the connection info in the sql.
179194
pub fn mask_connection_info(sql: &str) -> String {
180-
let mut masked_sql = sql.to_string();
195+
// Quoted string pattern: handles both '' (SQL doubled) and \' (backslash escaped) quotes
196+
// Also handles other backslash escapes like \\
197+
// Supports both single-quoted and double-quoted literals (MySQL/Hive dialects)
198+
const SINGLE_QUOTED_STR: &str = r"'([^'\\]|''|\\.)*'";
199+
const DOUBLE_QUOTED_STR: &str = r#""([^"\\]|""|\\.)*""#;
181200

182-
// Regular expression to find the CONNECTION block
183-
let re_connection = Regex::new(r"CONNECTION\s*=\s*\([^)]+\)").unwrap();
201+
// Match individual secret key-value pairs
202+
// Supports both single-quoted and double-quoted values
203+
static RE_SECRET_KV: LazyLock<Regex> = LazyLock::new(|| {
204+
let pattern = format!(
205+
r"(?i)(ACCESS_KEY_ID|ACCESS_KEY_SECRET|SECRET_ACCESS_KEY|AWS_KEY_ID|AWS_KEY_SECRET|AWS_SECRET_KEY|AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_TOKEN|AWS_SESSION_TOKEN|MASTER_KEY|ACCOUNT_KEY|ACCOUNT_NAME|PASSWORD|SECURITY_TOKEN|SESSION_TOKEN|SECRET_ID|SECRET_KEY|CREDENTIAL|EXTERNAL_ID|AWS_EXTERNAL_ID|DELEGATION)\s*=\s*({}|{})",
206+
SINGLE_QUOTED_STR, DOUBLE_QUOTED_STR
207+
);
208+
Regex::new(&pattern).unwrap()
209+
});
184210

185-
// Replace the entire CONNECTION block with 'CONNECTION = (***masked***)'
186-
masked_sql = re_connection
187-
.replace_all(&masked_sql, "CONNECTION = (***masked***)")
188-
.to_string();
189-
190-
masked_sql
211+
RE_SECRET_KV
212+
.replace_all(sql, |caps: &regex::Captures| {
213+
let key = &caps[1];
214+
let quoted_val = &caps[2];
215+
let quote_char = &quoted_val[..1];
216+
let inner = &quoted_val[1..quoted_val.len() - 1];
217+
format!(
218+
"{} = {}{}{}",
219+
key,
220+
quote_char,
221+
mask_partial(inner),
222+
quote_char
223+
)
224+
})
225+
.to_string()
191226
}
192227

193228
/// Maximum length of the SQL query to be displayed or log.
@@ -218,3 +253,251 @@ pub fn short_sql(sql: String, max_length: u64) -> String {
218253
query.to_string()
219254
}
220255
}
256+
257+
#[cfg(test)]
258+
mod tests {
259+
use super::*;
260+
261+
#[test]
262+
fn test_mask_connection_eq() {
263+
let sql = "CREATE STAGE s URL='s3://b' CONNECTION = (ACCESS_KEY_ID = 'akid123' SECRET_ACCESS_KEY = 'secret456')";
264+
let masked = mask_connection_info(sql);
265+
assert_eq!(
266+
masked,
267+
"CREATE STAGE s URL='s3://b' CONNECTION = (ACCESS_KEY_ID = 'ak***23' SECRET_ACCESS_KEY = 'se***56')"
268+
);
269+
assert!(!masked.contains("akid123"));
270+
assert!(!masked.contains("secret456"));
271+
}
272+
273+
#[test]
274+
fn test_mask_connection_arrow() {
275+
let sql = "SELECT * FROM 's3://b/data.csv' (CONNECTION => (ACCESS_KEY_ID = 'akid123', SECRET_ACCESS_KEY = 'secret456'))";
276+
let masked = mask_connection_info(sql);
277+
assert!(masked.contains("ACCESS_KEY_ID = 'ak***23'"));
278+
assert!(masked.contains("SECRET_ACCESS_KEY = 'se***56'"));
279+
assert!(!masked.contains("akid123"));
280+
assert!(!masked.contains("secret456"));
281+
}
282+
283+
#[test]
284+
fn test_mask_connection_with_parens_in_value() {
285+
// Value contains ')' inside quotes — should not break the regex
286+
let sql = "CONNECTION = (PASSWORD = 'p@ss(w)rd' ACCESS_KEY_ID = 'mykey123')";
287+
let masked = mask_connection_info(sql);
288+
assert!(masked.contains("PASSWORD = 'p@***rd'"));
289+
assert!(masked.contains("ACCESS_KEY_ID = 'my***23'"));
290+
assert!(!masked.contains("p@ss(w)rd"));
291+
assert!(!masked.contains("mykey123"));
292+
}
293+
294+
#[test]
295+
fn test_mask_connection_with_escaped_quotes() {
296+
// SQL-style escaped quotes ('') inside values
297+
let sql = "CONNECTION = (PASSWORD = 'it''s_secret' ACCESS_KEY_ID = 'ak')";
298+
let masked = mask_connection_info(sql);
299+
assert!(masked.contains("PASSWORD = 'it***et'"));
300+
assert!(masked.contains("ACCESS_KEY_ID = '***'"));
301+
assert!(!masked.contains("it''s_secret"));
302+
}
303+
304+
#[test]
305+
fn test_mask_connection_with_backslash_escaped_quotes() {
306+
// Backslash-escaped quotes (\') inside values
307+
let sql = r"CONNECTION = (PASSWORD = 'abc\'tail' ACCESS_KEY_ID = 'mykey')";
308+
let masked = mask_connection_info(sql);
309+
assert!(!masked.contains("tail"));
310+
assert!(!masked.contains("mykey"));
311+
}
312+
313+
#[test]
314+
fn test_mask_bare_key_with_backslash_escaped_quote() {
315+
// Backslash-escaped quote in a bare key value
316+
let sql = r"PASSWORD = 'p@ss\'word'";
317+
let masked = mask_connection_info(sql);
318+
assert!(!masked.contains("p@ss"));
319+
}
320+
321+
#[test]
322+
fn test_mask_connection_with_backslash_in_value() {
323+
// Backslash-escaped backslash (\\) inside values
324+
let sql = r"CONNECTION = (PASSWORD = 'path\\to\\secret' ACCESS_KEY_ID = 'key123')";
325+
let masked = mask_connection_info(sql);
326+
assert!(!masked.contains("path"));
327+
assert!(!masked.contains("key123"));
328+
}
329+
330+
#[test]
331+
fn test_mask_secret_kv_standalone() {
332+
let sql = "CREATE CONNECTION myconn STORAGE_TYPE = 's3' ACCESS_KEY_ID = 'AKID123' SECRET_ACCESS_KEY = 'secret456'";
333+
let masked = mask_connection_info(sql);
334+
assert!(masked.contains("ACCESS_KEY_ID = 'AK***23'"));
335+
assert!(masked.contains("SECRET_ACCESS_KEY = 'se***56'"));
336+
assert!(masked.contains("STORAGE_TYPE = 's3'"));
337+
assert!(!masked.contains("AKID123"));
338+
assert!(!masked.contains("secret456"));
339+
}
340+
341+
#[test]
342+
fn test_mask_password() {
343+
let sql = "PASSWORD = 'my_password'";
344+
let masked = mask_connection_info(sql);
345+
assert_eq!(masked, "PASSWORD = 'my***rd'");
346+
}
347+
348+
#[test]
349+
fn test_mask_session_token() {
350+
let sql = "SESSION_TOKEN = 'tok123'";
351+
let masked = mask_connection_info(sql);
352+
assert_eq!(masked, "SESSION_TOKEN = 'to***23'");
353+
}
354+
355+
#[test]
356+
fn test_mask_security_token() {
357+
let sql = "SECURITY_TOKEN = 'sectok'";
358+
let masked = mask_connection_info(sql);
359+
assert_eq!(masked, "SECURITY_TOKEN = 'se***ok'");
360+
}
361+
362+
#[test]
363+
fn test_mask_secret_id_and_key() {
364+
let sql = "SECRET_ID = 'sid' SECRET_KEY = 'skey'";
365+
let masked = mask_connection_info(sql);
366+
assert_eq!(masked, "SECRET_ID = '***' SECRET_KEY = '***'");
367+
}
368+
369+
#[test]
370+
fn test_mask_account_key() {
371+
let sql = "ACCOUNT_KEY = 'azure_key_123'";
372+
let masked = mask_connection_info(sql);
373+
assert_eq!(masked, "ACCOUNT_KEY = 'az***23'");
374+
assert!(!masked.contains("azure_key_123"));
375+
}
376+
377+
#[test]
378+
fn test_no_false_positive_on_token() {
379+
// Plain 'token' column should NOT be masked (we only match specific prefixed tokens)
380+
let sql = "WHERE token = 'abc'";
381+
let masked = mask_connection_info(sql);
382+
assert_eq!(masked, sql);
383+
}
384+
385+
#[test]
386+
fn test_no_false_positive_on_normal_settings() {
387+
let sql = "SET max_threads = '8'";
388+
let masked = mask_connection_info(sql);
389+
assert_eq!(masked, sql);
390+
}
391+
392+
#[test]
393+
fn test_no_false_positive_on_select() {
394+
let sql = "SELECT name FROM users WHERE id = '123'";
395+
let masked = mask_connection_info(sql);
396+
assert_eq!(masked, sql);
397+
}
398+
399+
#[test]
400+
fn test_mask_case_insensitive() {
401+
let sql = "connection = (access_key_id = 'akid123' secret_access_key = 'secret456')";
402+
let masked = mask_connection_info(sql);
403+
assert!(masked.contains("access_key_id = 'ak***23'"));
404+
assert!(masked.contains("secret_access_key = 'se***56'"));
405+
}
406+
407+
#[test]
408+
fn test_mask_mixed_connection_and_bare_keys() {
409+
// Both CONNECTION block keys and bare keys get masked
410+
let sql =
411+
"COPY INTO t FROM 's3://b' CONNECTION = (ACCESS_KEY_ID = 'akid123') PASSWORD = 'pw123'";
412+
let masked = mask_connection_info(sql);
413+
assert!(masked.contains("ACCESS_KEY_ID = 'ak***23'"));
414+
assert!(masked.contains("PASSWORD = 'pw***23'"));
415+
assert!(!masked.contains("akid123"));
416+
assert!(!masked.contains("pw123"));
417+
}
418+
419+
#[test]
420+
fn test_mask_empty_value() {
421+
let sql = "PASSWORD = ''";
422+
let masked = mask_connection_info(sql);
423+
assert_eq!(masked, "PASSWORD = '***'");
424+
}
425+
426+
#[test]
427+
fn test_mask_no_space_around_eq() {
428+
let sql = "CONNECTION=(ACCESS_KEY_ID='akid123')";
429+
let masked = mask_connection_info(sql);
430+
assert!(masked.contains("ACCESS_KEY_ID = 'ak***23'"));
431+
}
432+
433+
#[test]
434+
fn test_mask_aws_secret_key() {
435+
let sql = "CREATE CONNECTION c STORAGE_TYPE='s3' AWS_SECRET_KEY='my_secret'";
436+
let masked = mask_connection_info(sql);
437+
assert!(masked.contains("AWS_SECRET_KEY = 'my***et'"));
438+
assert!(masked.contains("STORAGE_TYPE='s3'"));
439+
assert!(!masked.contains("my_secret"));
440+
}
441+
442+
#[test]
443+
fn test_mask_aws_token() {
444+
let sql = "CREATE CONNECTION c STORAGE_TYPE='s3' AWS_TOKEN='tok_val99'";
445+
let masked = mask_connection_info(sql);
446+
assert!(masked.contains("AWS_TOKEN = 'to***99'"));
447+
assert!(!masked.contains("tok_val99"));
448+
}
449+
450+
#[test]
451+
fn test_mask_aws_access_key_id() {
452+
let sql = "AWS_ACCESS_KEY_ID='AKIA1234' AWS_SECRET_ACCESS_KEY='secret_val'";
453+
let masked = mask_connection_info(sql);
454+
assert!(masked.contains("AWS_ACCESS_KEY_ID = 'AK***34'"));
455+
assert!(masked.contains("AWS_SECRET_ACCESS_KEY = 'se***al'"));
456+
assert!(!masked.contains("AKIA1234"));
457+
assert!(!masked.contains("secret_val"));
458+
}
459+
460+
#[test]
461+
fn test_mask_aws_session_token() {
462+
let sql = "AWS_SESSION_TOKEN='session_tok_123'";
463+
let masked = mask_connection_info(sql);
464+
assert_eq!(masked, "AWS_SESSION_TOKEN = 'se***23'");
465+
assert!(!masked.contains("session_tok_123"));
466+
}
467+
468+
#[test]
469+
fn test_mask_gcs_credential() {
470+
let sql =
471+
"CREATE CONNECTION c STORAGE_TYPE='gcs' CREDENTIAL='service-account-json-content'";
472+
let masked = mask_connection_info(sql);
473+
assert!(masked.contains("CREDENTIAL = 'se***nt'"));
474+
assert!(masked.contains("STORAGE_TYPE='gcs'"));
475+
assert!(!masked.contains("service-account-json-content"));
476+
}
477+
478+
#[test]
479+
fn test_mask_double_quoted_password() {
480+
// MySQL/Hive dialect uses double-quoted string literals
481+
let sql = r#"CREATE CONNECTION c STORAGE_TYPE="s3" PASSWORD="secret_val""#;
482+
let masked = mask_connection_info(sql);
483+
assert!(masked.contains(r#"PASSWORD = "se***al""#));
484+
assert!(!masked.contains("secret_val"));
485+
}
486+
487+
#[test]
488+
fn test_mask_double_quoted_connection_block() {
489+
let sql = r#"CONNECTION = (ACCESS_KEY_ID = "akid123" SECRET_ACCESS_KEY = "secret456")"#;
490+
let masked = mask_connection_info(sql);
491+
assert!(masked.contains(r#"ACCESS_KEY_ID = "ak***23""#));
492+
assert!(masked.contains(r#"SECRET_ACCESS_KEY = "se***56""#));
493+
assert!(!masked.contains("akid123"));
494+
assert!(!masked.contains("secret456"));
495+
}
496+
497+
#[test]
498+
fn test_mask_double_quoted_with_backslash_escape() {
499+
let sql = r#"PASSWORD = "pass\"word""#;
500+
let masked = mask_connection_info(sql);
501+
assert!(!masked.contains("pass"));
502+
}
503+
}

src/common/base/tests/it/string.rs

Lines changed: 14 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -29,12 +29,11 @@ fn test_progress() -> anyhow::Result<()> {
2929

3030
#[test]
3131
fn mask_string_test() {
32-
assert_eq!(mask_string("", 10), "".to_string());
33-
assert_eq!(mask_string("string", 0), "******".to_string());
34-
assert_eq!(mask_string("string", 1), "******g".to_string());
35-
assert_eq!(mask_string("string", 2), "******ng".to_string());
36-
assert_eq!(mask_string("string", 3), "******ing".to_string());
37-
assert_eq!(mask_string("string", 20), "string".to_string());
32+
assert_eq!(mask_string("", 10), "***".to_string());
33+
assert_eq!(mask_string("ab", 0), "***".to_string());
34+
assert_eq!(mask_string("abcd", 3), "***".to_string());
35+
assert_eq!(mask_string("string", 3), "st***ng".to_string());
36+
assert_eq!(mask_string("longstring", 20), "lo***ng".to_string());
3837
}
3938

4039
#[test]
@@ -94,18 +93,15 @@ fn test_mask_connection_info() {
9493
);"#;
9594

9695
let actual = mask_connection_info(sql);
97-
let expect = r#"COPY INTO table1
98-
FROM 's3://xx/yy
99-
CONNECTION = (***masked***)
100-
PATTERN = '.*[.]csv'
101-
FILE_FORMAT = (
102-
TYPE = CSV,
103-
FIELD_DELIMITER = '\t',
104-
RECORD_DELIMITER = '\n',
105-
SKIP_HEADER = 1
106-
);"#;
107-
108-
assert_eq!(expect, actual);
96+
// ACCESS_KEY_ID and SECRET_ACCESS_KEY are sensitive keys, their values get masked
97+
// 'aaa' and 'sss' are <= 4 chars so fully masked
98+
assert!(actual.contains("ACCESS_KEY_ID = '***'"));
99+
assert!(actual.contains("SECRET_ACCESS_KEY = '***'"));
100+
// REGION is not a sensitive key, should remain unchanged
101+
assert!(actual.contains("REGION = 'us-east-2'"));
102+
// Original secrets should not appear
103+
assert!(!actual.contains("= 'aaa'"));
104+
assert!(!actual.contains("= 'sss'"));
109105
}
110106

111107
#[test]

0 commit comments

Comments
 (0)