|
1 | 1 | import json |
| 2 | +import re |
2 | 3 | import threading |
3 | 4 | import subprocess |
4 | 5 | from datetime import datetime |
|
12 | 13 |
|
13 | 14 | logger = get_logger(__name__) |
14 | 15 |
|
| 16 | +# Sensitive fields that need to be masked in logs |
| 17 | +SENSITIVE_FIELDS = [ |
| 18 | + "secretKey", |
| 19 | + "accessKey", |
| 20 | + "password", |
| 21 | + "passwd", |
| 22 | + "pwd", |
| 23 | + "secret", |
| 24 | + "token", |
| 25 | + "apiKey", |
| 26 | + "api_key", |
| 27 | +] |
| 28 | + |
| 29 | +MASK_PATTERN = "**********" |
| 30 | + |
| 31 | +def mask_sensitive_info(text: str) -> str: |
| 32 | + """ |
| 33 | + Mask sensitive information in text by replacing values with ********** |
| 34 | + |
| 35 | + Args: |
| 36 | + text: Original text that may contain sensitive information |
| 37 | + |
| 38 | + Returns: |
| 39 | + Text with sensitive values masked |
| 40 | + """ |
| 41 | + masked_text = text |
| 42 | + |
| 43 | + for field in SENSITIVE_FIELDS: |
| 44 | + # Match patterns like: "secretKey": "actual_value" or secretKey=actual_value |
| 45 | + patterns = [ |
| 46 | + # JSON format: "field": "value" |
| 47 | + rf'"{field}"\s*:\s*"[^"]*"', |
| 48 | + rf'"{field}"\s*:\s*"[^"]*"', |
| 49 | + # Key-value format: field=value |
| 50 | + rf'{field}\s*=\s*[^\s,\]]+', |
| 51 | + # Quoted format: 'field': 'value' |
| 52 | + rf"'{field}'\s*:\s*'[^']*'", |
| 53 | + ] |
| 54 | + |
| 55 | + for pattern in patterns: |
| 56 | + # Replace the value part while keeping the field name |
| 57 | + if '"' in pattern: |
| 58 | + masked_text = re.sub(pattern, f'"{field}": "{MASK_PATTERN}"', masked_text) |
| 59 | + elif "'" in pattern: |
| 60 | + masked_text = re.sub(pattern, f"'{field}': '{MASK_PATTERN}'", masked_text) |
| 61 | + else: |
| 62 | + masked_text = re.sub(pattern, f'{field}={MASK_PATTERN}', masked_text) |
| 63 | + |
| 64 | + return masked_text |
| 65 | + |
15 | 66 | class DataxClient: |
16 | 67 | def __init__(self, task: CollectionTask, execution: TaskExecution, template: CollectionTemplate): |
17 | 68 | self.execution = execution |
@@ -229,10 +280,12 @@ def write_header_log(self, cmd: str, log_f): |
229 | 280 |
|
230 | 281 | @staticmethod |
231 | 282 | def read_stream(stream, log_f): |
232 | | - """读取输出流""" |
| 283 | + """读取输出流并屏蔽敏感信息""" |
233 | 284 | for line in stream: |
234 | 285 | line = line.rstrip('\n') |
235 | 286 | if line: |
| 287 | + # Mask sensitive information before writing to log |
| 288 | + masked_line = mask_sensitive_info(line) |
236 | 289 | # 写入日志文件 |
237 | | - log_f.write(f"{line}\n") |
| 290 | + log_f.write(f"{masked_line}\n") |
238 | 291 | log_f.flush() |
0 commit comments