|
13 | 13 | // limitations under the License. |
14 | 14 |
|
15 | 15 | use std::string::FromUtf8Error; |
| 16 | +use std::sync::LazyLock; |
16 | 17 |
|
17 | 18 | use databend_common_exception::ErrorCode; |
18 | 19 | use databend_common_exception::Result; |
@@ -99,13 +100,14 @@ pub fn unescape_for_key(key: &str) -> std::result::Result<String, FromUtf8Error> |
99 | 100 |
|
100 | 101 | /// Mask a string by "******", but keep `unmask_len` of suffix. |
101 | 102 | #[inline] |
102 | | -pub fn mask_string(s: &str, unmask_len: usize) -> String { |
103 | | - if s.len() <= unmask_len { |
104 | | - s.to_string() |
| 103 | +pub fn mask_string(s: &str, _unmask_len: usize) -> String { |
| 104 | + let chars: Vec<char> = s.chars().collect(); |
| 105 | + if chars.len() <= 4 { |
| 106 | + "***".to_string() |
105 | 107 | } else { |
106 | | - let mut ret = "******".to_string(); |
107 | | - ret.push_str(&s[(s.len() - unmask_len)..]); |
108 | | - ret |
| 108 | + let head: String = chars[..2].iter().collect(); |
| 109 | + let tail: String = chars[chars.len() - 2..].iter().collect(); |
| 110 | + format!("{}***{}", head, tail) |
109 | 111 | } |
110 | 112 | } |
111 | 113 |
|
@@ -175,19 +177,52 @@ pub fn convert_number_size(num: f64) -> String { |
175 | 177 | format!("{}{}{}", negative, pretty_bytes, unit) |
176 | 178 | } |
177 | 179 |
|
| 180 | +/// Mask a secret value, showing first 2 and last 2 characters. |
| 181 | +/// For values with 4 or fewer characters, fully mask. |
| 182 | +pub fn mask_partial(s: &str) -> String { |
| 183 | + let chars: Vec<char> = s.chars().collect(); |
| 184 | + if chars.len() <= 4 { |
| 185 | + "***".to_string() |
| 186 | + } else { |
| 187 | + let head: String = chars[..2].iter().collect(); |
| 188 | + let tail: String = chars[chars.len() - 2..].iter().collect(); |
| 189 | + format!("{}***{}", head, tail) |
| 190 | + } |
| 191 | +} |
| 192 | + |
178 | 193 | /// Mask the connection info in the sql. |
179 | 194 | pub fn mask_connection_info(sql: &str) -> String { |
180 | | - let mut masked_sql = sql.to_string(); |
| 195 | + // Quoted string pattern: handles both '' (SQL doubled) and \' (backslash escaped) quotes |
| 196 | + // Also handles other backslash escapes like \\ |
| 197 | + // Supports both single-quoted and double-quoted literals (MySQL/Hive dialects) |
| 198 | + const SINGLE_QUOTED_STR: &str = r"'([^'\\]|''|\\.)*'"; |
| 199 | + const DOUBLE_QUOTED_STR: &str = r#""([^"\\]|""|\\.)*""#; |
181 | 200 |
|
182 | | - // Regular expression to find the CONNECTION block |
183 | | - let re_connection = Regex::new(r"CONNECTION\s*=\s*\([^)]+\)").unwrap(); |
| 201 | + // Match individual secret key-value pairs |
| 202 | + // Supports both single-quoted and double-quoted values |
| 203 | + static RE_SECRET_KV: LazyLock<Regex> = LazyLock::new(|| { |
| 204 | + let pattern = format!( |
| 205 | + r"(?i)(ACCESS_KEY_ID|ACCESS_KEY_SECRET|SECRET_ACCESS_KEY|AWS_KEY_ID|AWS_KEY_SECRET|AWS_SECRET_KEY|AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_TOKEN|AWS_SESSION_TOKEN|MASTER_KEY|ACCOUNT_KEY|ACCOUNT_NAME|PASSWORD|SECURITY_TOKEN|SESSION_TOKEN|SECRET_ID|SECRET_KEY|CREDENTIAL|EXTERNAL_ID|AWS_EXTERNAL_ID|DELEGATION)\s*=\s*({}|{})", |
| 206 | + SINGLE_QUOTED_STR, DOUBLE_QUOTED_STR |
| 207 | + ); |
| 208 | + Regex::new(&pattern).unwrap() |
| 209 | + }); |
184 | 210 |
|
185 | | - // Replace the entire CONNECTION block with 'CONNECTION = (***masked***)' |
186 | | - masked_sql = re_connection |
187 | | - .replace_all(&masked_sql, "CONNECTION = (***masked***)") |
188 | | - .to_string(); |
189 | | - |
190 | | - masked_sql |
| 211 | + RE_SECRET_KV |
| 212 | + .replace_all(sql, |caps: ®ex::Captures| { |
| 213 | + let key = &caps[1]; |
| 214 | + let quoted_val = &caps[2]; |
| 215 | + let quote_char = "ed_val[..1]; |
| 216 | + let inner = "ed_val[1..quoted_val.len() - 1]; |
| 217 | + format!( |
| 218 | + "{} = {}{}{}", |
| 219 | + key, |
| 220 | + quote_char, |
| 221 | + mask_partial(inner), |
| 222 | + quote_char |
| 223 | + ) |
| 224 | + }) |
| 225 | + .to_string() |
191 | 226 | } |
192 | 227 |
|
193 | 228 | /// Maximum length of the SQL query to be displayed or log. |
@@ -218,3 +253,251 @@ pub fn short_sql(sql: String, max_length: u64) -> String { |
218 | 253 | query.to_string() |
219 | 254 | } |
220 | 255 | } |
| 256 | + |
| 257 | +#[cfg(test)] |
| 258 | +mod tests { |
| 259 | + use super::*; |
| 260 | + |
| 261 | + #[test] |
| 262 | + fn test_mask_connection_eq() { |
| 263 | + let sql = "CREATE STAGE s URL='s3://b' CONNECTION = (ACCESS_KEY_ID = 'akid123' SECRET_ACCESS_KEY = 'secret456')"; |
| 264 | + let masked = mask_connection_info(sql); |
| 265 | + assert_eq!( |
| 266 | + masked, |
| 267 | + "CREATE STAGE s URL='s3://b' CONNECTION = (ACCESS_KEY_ID = 'ak***23' SECRET_ACCESS_KEY = 'se***56')" |
| 268 | + ); |
| 269 | + assert!(!masked.contains("akid123")); |
| 270 | + assert!(!masked.contains("secret456")); |
| 271 | + } |
| 272 | + |
| 273 | + #[test] |
| 274 | + fn test_mask_connection_arrow() { |
| 275 | + let sql = "SELECT * FROM 's3://b/data.csv' (CONNECTION => (ACCESS_KEY_ID = 'akid123', SECRET_ACCESS_KEY = 'secret456'))"; |
| 276 | + let masked = mask_connection_info(sql); |
| 277 | + assert!(masked.contains("ACCESS_KEY_ID = 'ak***23'")); |
| 278 | + assert!(masked.contains("SECRET_ACCESS_KEY = 'se***56'")); |
| 279 | + assert!(!masked.contains("akid123")); |
| 280 | + assert!(!masked.contains("secret456")); |
| 281 | + } |
| 282 | + |
| 283 | + #[test] |
| 284 | + fn test_mask_connection_with_parens_in_value() { |
| 285 | + // Value contains ')' inside quotes — should not break the regex |
| 286 | + let sql = "CONNECTION = (PASSWORD = 'p@ss(w)rd' ACCESS_KEY_ID = 'mykey123')"; |
| 287 | + let masked = mask_connection_info(sql); |
| 288 | + assert!(masked.contains("PASSWORD = 'p@***rd'")); |
| 289 | + assert!(masked.contains("ACCESS_KEY_ID = 'my***23'")); |
| 290 | + assert!(!masked.contains("p@ss(w)rd")); |
| 291 | + assert!(!masked.contains("mykey123")); |
| 292 | + } |
| 293 | + |
| 294 | + #[test] |
| 295 | + fn test_mask_connection_with_escaped_quotes() { |
| 296 | + // SQL-style escaped quotes ('') inside values |
| 297 | + let sql = "CONNECTION = (PASSWORD = 'it''s_secret' ACCESS_KEY_ID = 'ak')"; |
| 298 | + let masked = mask_connection_info(sql); |
| 299 | + assert!(masked.contains("PASSWORD = 'it***et'")); |
| 300 | + assert!(masked.contains("ACCESS_KEY_ID = '***'")); |
| 301 | + assert!(!masked.contains("it''s_secret")); |
| 302 | + } |
| 303 | + |
| 304 | + #[test] |
| 305 | + fn test_mask_connection_with_backslash_escaped_quotes() { |
| 306 | + // Backslash-escaped quotes (\') inside values |
| 307 | + let sql = r"CONNECTION = (PASSWORD = 'abc\'tail' ACCESS_KEY_ID = 'mykey')"; |
| 308 | + let masked = mask_connection_info(sql); |
| 309 | + assert!(!masked.contains("tail")); |
| 310 | + assert!(!masked.contains("mykey")); |
| 311 | + } |
| 312 | + |
| 313 | + #[test] |
| 314 | + fn test_mask_bare_key_with_backslash_escaped_quote() { |
| 315 | + // Backslash-escaped quote in a bare key value |
| 316 | + let sql = r"PASSWORD = 'p@ss\'word'"; |
| 317 | + let masked = mask_connection_info(sql); |
| 318 | + assert!(!masked.contains("p@ss")); |
| 319 | + } |
| 320 | + |
| 321 | + #[test] |
| 322 | + fn test_mask_connection_with_backslash_in_value() { |
| 323 | + // Backslash-escaped backslash (\\) inside values |
| 324 | + let sql = r"CONNECTION = (PASSWORD = 'path\\to\\secret' ACCESS_KEY_ID = 'key123')"; |
| 325 | + let masked = mask_connection_info(sql); |
| 326 | + assert!(!masked.contains("path")); |
| 327 | + assert!(!masked.contains("key123")); |
| 328 | + } |
| 329 | + |
| 330 | + #[test] |
| 331 | + fn test_mask_secret_kv_standalone() { |
| 332 | + let sql = "CREATE CONNECTION myconn STORAGE_TYPE = 's3' ACCESS_KEY_ID = 'AKID123' SECRET_ACCESS_KEY = 'secret456'"; |
| 333 | + let masked = mask_connection_info(sql); |
| 334 | + assert!(masked.contains("ACCESS_KEY_ID = 'AK***23'")); |
| 335 | + assert!(masked.contains("SECRET_ACCESS_KEY = 'se***56'")); |
| 336 | + assert!(masked.contains("STORAGE_TYPE = 's3'")); |
| 337 | + assert!(!masked.contains("AKID123")); |
| 338 | + assert!(!masked.contains("secret456")); |
| 339 | + } |
| 340 | + |
| 341 | + #[test] |
| 342 | + fn test_mask_password() { |
| 343 | + let sql = "PASSWORD = 'my_password'"; |
| 344 | + let masked = mask_connection_info(sql); |
| 345 | + assert_eq!(masked, "PASSWORD = 'my***rd'"); |
| 346 | + } |
| 347 | + |
| 348 | + #[test] |
| 349 | + fn test_mask_session_token() { |
| 350 | + let sql = "SESSION_TOKEN = 'tok123'"; |
| 351 | + let masked = mask_connection_info(sql); |
| 352 | + assert_eq!(masked, "SESSION_TOKEN = 'to***23'"); |
| 353 | + } |
| 354 | + |
| 355 | + #[test] |
| 356 | + fn test_mask_security_token() { |
| 357 | + let sql = "SECURITY_TOKEN = 'sectok'"; |
| 358 | + let masked = mask_connection_info(sql); |
| 359 | + assert_eq!(masked, "SECURITY_TOKEN = 'se***ok'"); |
| 360 | + } |
| 361 | + |
| 362 | + #[test] |
| 363 | + fn test_mask_secret_id_and_key() { |
| 364 | + let sql = "SECRET_ID = 'sid' SECRET_KEY = 'skey'"; |
| 365 | + let masked = mask_connection_info(sql); |
| 366 | + assert_eq!(masked, "SECRET_ID = '***' SECRET_KEY = '***'"); |
| 367 | + } |
| 368 | + |
| 369 | + #[test] |
| 370 | + fn test_mask_account_key() { |
| 371 | + let sql = "ACCOUNT_KEY = 'azure_key_123'"; |
| 372 | + let masked = mask_connection_info(sql); |
| 373 | + assert_eq!(masked, "ACCOUNT_KEY = 'az***23'"); |
| 374 | + assert!(!masked.contains("azure_key_123")); |
| 375 | + } |
| 376 | + |
| 377 | + #[test] |
| 378 | + fn test_no_false_positive_on_token() { |
| 379 | + // Plain 'token' column should NOT be masked (we only match specific prefixed tokens) |
| 380 | + let sql = "WHERE token = 'abc'"; |
| 381 | + let masked = mask_connection_info(sql); |
| 382 | + assert_eq!(masked, sql); |
| 383 | + } |
| 384 | + |
| 385 | + #[test] |
| 386 | + fn test_no_false_positive_on_normal_settings() { |
| 387 | + let sql = "SET max_threads = '8'"; |
| 388 | + let masked = mask_connection_info(sql); |
| 389 | + assert_eq!(masked, sql); |
| 390 | + } |
| 391 | + |
| 392 | + #[test] |
| 393 | + fn test_no_false_positive_on_select() { |
| 394 | + let sql = "SELECT name FROM users WHERE id = '123'"; |
| 395 | + let masked = mask_connection_info(sql); |
| 396 | + assert_eq!(masked, sql); |
| 397 | + } |
| 398 | + |
| 399 | + #[test] |
| 400 | + fn test_mask_case_insensitive() { |
| 401 | + let sql = "connection = (access_key_id = 'akid123' secret_access_key = 'secret456')"; |
| 402 | + let masked = mask_connection_info(sql); |
| 403 | + assert!(masked.contains("access_key_id = 'ak***23'")); |
| 404 | + assert!(masked.contains("secret_access_key = 'se***56'")); |
| 405 | + } |
| 406 | + |
| 407 | + #[test] |
| 408 | + fn test_mask_mixed_connection_and_bare_keys() { |
| 409 | + // Both CONNECTION block keys and bare keys get masked |
| 410 | + let sql = |
| 411 | + "COPY INTO t FROM 's3://b' CONNECTION = (ACCESS_KEY_ID = 'akid123') PASSWORD = 'pw123'"; |
| 412 | + let masked = mask_connection_info(sql); |
| 413 | + assert!(masked.contains("ACCESS_KEY_ID = 'ak***23'")); |
| 414 | + assert!(masked.contains("PASSWORD = 'pw***23'")); |
| 415 | + assert!(!masked.contains("akid123")); |
| 416 | + assert!(!masked.contains("pw123")); |
| 417 | + } |
| 418 | + |
| 419 | + #[test] |
| 420 | + fn test_mask_empty_value() { |
| 421 | + let sql = "PASSWORD = ''"; |
| 422 | + let masked = mask_connection_info(sql); |
| 423 | + assert_eq!(masked, "PASSWORD = '***'"); |
| 424 | + } |
| 425 | + |
| 426 | + #[test] |
| 427 | + fn test_mask_no_space_around_eq() { |
| 428 | + let sql = "CONNECTION=(ACCESS_KEY_ID='akid123')"; |
| 429 | + let masked = mask_connection_info(sql); |
| 430 | + assert!(masked.contains("ACCESS_KEY_ID = 'ak***23'")); |
| 431 | + } |
| 432 | + |
| 433 | + #[test] |
| 434 | + fn test_mask_aws_secret_key() { |
| 435 | + let sql = "CREATE CONNECTION c STORAGE_TYPE='s3' AWS_SECRET_KEY='my_secret'"; |
| 436 | + let masked = mask_connection_info(sql); |
| 437 | + assert!(masked.contains("AWS_SECRET_KEY = 'my***et'")); |
| 438 | + assert!(masked.contains("STORAGE_TYPE='s3'")); |
| 439 | + assert!(!masked.contains("my_secret")); |
| 440 | + } |
| 441 | + |
| 442 | + #[test] |
| 443 | + fn test_mask_aws_token() { |
| 444 | + let sql = "CREATE CONNECTION c STORAGE_TYPE='s3' AWS_TOKEN='tok_val99'"; |
| 445 | + let masked = mask_connection_info(sql); |
| 446 | + assert!(masked.contains("AWS_TOKEN = 'to***99'")); |
| 447 | + assert!(!masked.contains("tok_val99")); |
| 448 | + } |
| 449 | + |
| 450 | + #[test] |
| 451 | + fn test_mask_aws_access_key_id() { |
| 452 | + let sql = "AWS_ACCESS_KEY_ID='AKIA1234' AWS_SECRET_ACCESS_KEY='secret_val'"; |
| 453 | + let masked = mask_connection_info(sql); |
| 454 | + assert!(masked.contains("AWS_ACCESS_KEY_ID = 'AK***34'")); |
| 455 | + assert!(masked.contains("AWS_SECRET_ACCESS_KEY = 'se***al'")); |
| 456 | + assert!(!masked.contains("AKIA1234")); |
| 457 | + assert!(!masked.contains("secret_val")); |
| 458 | + } |
| 459 | + |
| 460 | + #[test] |
| 461 | + fn test_mask_aws_session_token() { |
| 462 | + let sql = "AWS_SESSION_TOKEN='session_tok_123'"; |
| 463 | + let masked = mask_connection_info(sql); |
| 464 | + assert_eq!(masked, "AWS_SESSION_TOKEN = 'se***23'"); |
| 465 | + assert!(!masked.contains("session_tok_123")); |
| 466 | + } |
| 467 | + |
| 468 | + #[test] |
| 469 | + fn test_mask_gcs_credential() { |
| 470 | + let sql = |
| 471 | + "CREATE CONNECTION c STORAGE_TYPE='gcs' CREDENTIAL='service-account-json-content'"; |
| 472 | + let masked = mask_connection_info(sql); |
| 473 | + assert!(masked.contains("CREDENTIAL = 'se***nt'")); |
| 474 | + assert!(masked.contains("STORAGE_TYPE='gcs'")); |
| 475 | + assert!(!masked.contains("service-account-json-content")); |
| 476 | + } |
| 477 | + |
| 478 | + #[test] |
| 479 | + fn test_mask_double_quoted_password() { |
| 480 | + // MySQL/Hive dialect uses double-quoted string literals |
| 481 | + let sql = r#"CREATE CONNECTION c STORAGE_TYPE="s3" PASSWORD="secret_val""#; |
| 482 | + let masked = mask_connection_info(sql); |
| 483 | + assert!(masked.contains(r#"PASSWORD = "se***al""#)); |
| 484 | + assert!(!masked.contains("secret_val")); |
| 485 | + } |
| 486 | + |
| 487 | + #[test] |
| 488 | + fn test_mask_double_quoted_connection_block() { |
| 489 | + let sql = r#"CONNECTION = (ACCESS_KEY_ID = "akid123" SECRET_ACCESS_KEY = "secret456")"#; |
| 490 | + let masked = mask_connection_info(sql); |
| 491 | + assert!(masked.contains(r#"ACCESS_KEY_ID = "ak***23""#)); |
| 492 | + assert!(masked.contains(r#"SECRET_ACCESS_KEY = "se***56""#)); |
| 493 | + assert!(!masked.contains("akid123")); |
| 494 | + assert!(!masked.contains("secret456")); |
| 495 | + } |
| 496 | + |
| 497 | + #[test] |
| 498 | + fn test_mask_double_quoted_with_backslash_escape() { |
| 499 | + let sql = r#"PASSWORD = "pass\"word""#; |
| 500 | + let masked = mask_connection_info(sql); |
| 501 | + assert!(!masked.contains("pass")); |
| 502 | + } |
| 503 | +} |
0 commit comments