|
13 | 13 | r'\b(TRACE|DEBUG|INFO|NOTICE|WARN|WARNING|ERROR|ERR|CRITICAL|ALERT|FATAL|EMERGENCY)\b', |
14 | 14 | re.IGNORECASE |
15 | 15 | ) |
16 | | -_TIMESTAMP_PATTERN = re.compile( |
17 | | - r'(\d{4}-\d{2}-\d{2}[T\s]\d{2}:\d{2}:\d{2}(?:\.\d+)?Z?)' |
18 | | -) |
| 16 | + |
| 17 | +# Multiple timestamp patterns for different log formats |
| 18 | +_TIMESTAMP_PATTERNS = [ |
| 19 | + # ISO 8601: 2026-03-21T10:00:00Z or 2026-03-21T10:00:00.123Z or 2026-03-21T10:00:00+00:00 |
| 20 | + re.compile(r'(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(?:\.\d+)?(?:Z|[+-]\d{2}:?\d{2})?)'), |
| 21 | + # ISO-like with space: 2026-03-21 10:00:00 or 2026-03-21 10:00:00.123 |
| 22 | + re.compile(r'(\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}:\d{2}(?:\.\d+)?)'), |
| 23 | + # Common Log Format / Apache: 21/Mar/2026:10:00:00 +0000 or [21/Mar/2026:10:00:00 +0000] |
| 24 | + re.compile(r'(\d{2}/[A-Za-z]{3}/\d{4}:\d{2}:\d{2}:\d{2}(?:\s+[+-]\d{4})?)'), |
| 25 | + # Syslog-style: Mar 21 10:00:00 (year is assumed current year) |
| 26 | + re.compile(r'([A-Za-z]{3}\s+\d{1,2}\s+\d{2}:\d{2}:\d{2})'), |
| 27 | + # Unix timestamp: 1711054800 (10 digits for seconds) |
| 28 | + re.compile(r'\b(\d{10})\b'), |
| 29 | +] |
| 30 | + |
| 31 | +# Month name mapping for parsing |
| 32 | +_MONTH_MAP = { |
| 33 | + 'Jan': 1, 'Feb': 2, 'Mar': 3, 'Apr': 4, 'May': 5, 'Jun': 6, |
| 34 | + 'Jul': 7, 'Aug': 8, 'Sep': 9, 'Oct': 10, 'Nov': 11, 'Dec': 12 |
| 35 | +} |
19 | 36 |
|
20 | 37 | @dataclass |
21 | 38 | class LogEntry: |
@@ -135,11 +152,37 @@ def parse_line(line: str) -> LogEntry: |
135 | 152 | return LogEntry(level="UNKNOWN", message=line, raw=line, timestamp=extract_timestamp(line)) |
136 | 153 |
|
137 | 154 | def extract_timestamp(text: str) -> Optional[datetime]: |
138 | | - """Helper to extract a timestamp from a raw string using regex.""" |
139 | | - match = _TIMESTAMP_PATTERN.search(text) |
140 | | - if match: |
141 | | - try: |
142 | | - return datetime.fromisoformat(match.group(1).replace('Z', '+00:00')) |
143 | | - except ValueError: |
144 | | - return None |
| 155 | + """Extract a timestamp from a raw string using multiple format patterns.""" |
| 156 | + for pattern in _TIMESTAMP_PATTERNS: |
| 157 | + match = pattern.search(text) |
| 158 | + if match: |
| 159 | + ts_str = match.group(1) |
| 160 | + try: |
| 161 | + # Try ISO format first (handles most cases) |
| 162 | + if '-' in ts_str and ('T' in ts_str or ' ' in ts_str[:10]): |
| 163 | + # Handle ISO-like with space instead of T |
| 164 | + return datetime.fromisoformat(ts_str.replace('Z', '+00:00').replace(' ', 'T')) |
| 165 | + # Handle Common Log Format: 21/Mar/2026:10:00:00 +0000 |
| 166 | + elif '/' in ts_str: |
| 167 | + parts = ts_str.split() |
| 168 | + main_part = parts[0] |
| 169 | + # Parse: DD/Mon/YYYY:HH:MM:SS |
| 170 | + match_parts = re.match(r'(\d{2})/([A-Za-z]{3})/(\d{4}):(\d{2}):(\d{2}):(\d{2})', main_part) |
| 171 | + if match_parts: |
| 172 | + day, month_str, year, hour, minute, second = match_parts.groups() |
| 173 | + month = _MONTH_MAP.get(month_str, 1) |
| 174 | + return datetime(int(year), month, int(day), int(hour), int(minute), int(second)) |
| 175 | + # Handle Syslog-style: Mar 21 10:00:00 |
| 176 | + elif ts_str[0].isalpha(): |
| 177 | + match_parts = re.match(r'([A-Za-z]{3})\s+(\d{1,2})\s+(\d{2}):(\d{2}):(\d{2})', ts_str) |
| 178 | + if match_parts: |
| 179 | + month_str, day, hour, minute, second = match_parts.groups() |
| 180 | + month = _MONTH_MAP.get(month_str, 1) |
| 181 | + year = datetime.now().year # Assume current year |
| 182 | + return datetime(year, month, int(day), int(hour), int(minute), int(second)) |
| 183 | + # Handle Unix timestamp |
| 184 | + elif ts_str.isdigit(): |
| 185 | + return datetime.fromtimestamp(int(ts_str)) |
| 186 | + except (ValueError, OSError): |
| 187 | + continue |
145 | 188 | return None |
0 commit comments