Skip to content

Commit 8f43040

Browse files
vinnytherobotclaude
andcommitted
feat(parser): improve timestamp parsing for multiple formats
Add support for: - ISO 8601 with various timezone formats - Common Log Format (Apache) - Syslog-style timestamps - Unix timestamps Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 4363e7b commit 8f43040

2 files changed

Lines changed: 132 additions & 11 deletions

File tree

logscope/parser.py

Lines changed: 53 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,26 @@
1313
r'\b(TRACE|DEBUG|INFO|NOTICE|WARN|WARNING|ERROR|ERR|CRITICAL|ALERT|FATAL|EMERGENCY)\b',
1414
re.IGNORECASE
1515
)
16-
_TIMESTAMP_PATTERN = re.compile(
17-
r'(\d{4}-\d{2}-\d{2}[T\s]\d{2}:\d{2}:\d{2}(?:\.\d+)?Z?)'
18-
)
16+
17+
# Multiple timestamp patterns for different log formats
18+
_TIMESTAMP_PATTERNS = [
19+
# ISO 8601: 2026-03-21T10:00:00Z or 2026-03-21T10:00:00.123Z or 2026-03-21T10:00:00+00:00
20+
re.compile(r'(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(?:\.\d+)?(?:Z|[+-]\d{2}:?\d{2})?)'),
21+
# ISO-like with space: 2026-03-21 10:00:00 or 2026-03-21 10:00:00.123
22+
re.compile(r'(\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}:\d{2}(?:\.\d+)?)'),
23+
# Common Log Format / Apache: 21/Mar/2026:10:00:00 +0000 or [21/Mar/2026:10:00:00 +0000]
24+
re.compile(r'(\d{2}/[A-Za-z]{3}/\d{4}:\d{2}:\d{2}:\d{2}(?:\s+[+-]\d{4})?)'),
25+
# Syslog-style: Mar 21 10:00:00 (year is assumed current year)
26+
re.compile(r'([A-Za-z]{3}\s+\d{1,2}\s+\d{2}:\d{2}:\d{2})'),
27+
# Unix timestamp: 1711054800 (10 digits for seconds)
28+
re.compile(r'\b(\d{10})\b'),
29+
]
30+
31+
# Month name mapping for parsing
32+
_MONTH_MAP = {
33+
'Jan': 1, 'Feb': 2, 'Mar': 3, 'Apr': 4, 'May': 5, 'Jun': 6,
34+
'Jul': 7, 'Aug': 8, 'Sep': 9, 'Oct': 10, 'Nov': 11, 'Dec': 12
35+
}
1936

2037
@dataclass
2138
class LogEntry:
@@ -135,11 +152,37 @@ def parse_line(line: str) -> LogEntry:
135152
return LogEntry(level="UNKNOWN", message=line, raw=line, timestamp=extract_timestamp(line))
136153

137154
def extract_timestamp(text: str) -> Optional[datetime]:
138-
"""Helper to extract a timestamp from a raw string using regex."""
139-
match = _TIMESTAMP_PATTERN.search(text)
140-
if match:
141-
try:
142-
return datetime.fromisoformat(match.group(1).replace('Z', '+00:00'))
143-
except ValueError:
144-
return None
155+
"""Extract a timestamp from a raw string using multiple format patterns."""
156+
for pattern in _TIMESTAMP_PATTERNS:
157+
match = pattern.search(text)
158+
if match:
159+
ts_str = match.group(1)
160+
try:
161+
# Try ISO format first (handles most cases)
162+
if '-' in ts_str and ('T' in ts_str or ' ' in ts_str[:10]):
163+
# Handle ISO-like with space instead of T
164+
return datetime.fromisoformat(ts_str.replace('Z', '+00:00').replace(' ', 'T'))
165+
# Handle Common Log Format: 21/Mar/2026:10:00:00 +0000
166+
elif '/' in ts_str:
167+
parts = ts_str.split()
168+
main_part = parts[0]
169+
# Parse: DD/Mon/YYYY:HH:MM:SS
170+
match_parts = re.match(r'(\d{2})/([A-Za-z]{3})/(\d{4}):(\d{2}):(\d{2}):(\d{2})', main_part)
171+
if match_parts:
172+
day, month_str, year, hour, minute, second = match_parts.groups()
173+
month = _MONTH_MAP.get(month_str, 1)
174+
return datetime(int(year), month, int(day), int(hour), int(minute), int(second))
175+
# Handle Syslog-style: Mar 21 10:00:00
176+
elif ts_str[0].isalpha():
177+
match_parts = re.match(r'([A-Za-z]{3})\s+(\d{1,2})\s+(\d{2}):(\d{2}):(\d{2})', ts_str)
178+
if match_parts:
179+
month_str, day, hour, minute, second = match_parts.groups()
180+
month = _MONTH_MAP.get(month_str, 1)
181+
year = datetime.now().year # Assume current year
182+
return datetime(year, month, int(day), int(hour), int(minute), int(second))
183+
# Handle Unix timestamp
184+
elif ts_str.isdigit():
185+
return datetime.fromtimestamp(int(ts_str))
186+
except (ValueError, OSError):
187+
continue
145188
return None

tests/test_time.py

Lines changed: 79 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
from datetime import datetime
2-
from logscope.parser import parse_line
2+
from logscope.parser import parse_line, extract_timestamp
33
from logscope.cli import parse_relative_time
44

55
def test_parse_iso_timestamp():
@@ -31,3 +31,81 @@ def test_relative_time_days():
3131
assert parsed is not None
3232
diff = now - parsed
3333
assert 1.9 < diff.total_seconds() / 86400 < 2.1
34+
35+
36+
def test_extract_iso_with_space():
37+
"""Test ISO-like timestamp with space separator."""
38+
ts = extract_timestamp("2026-03-21 10:00:00 INFO message")
39+
assert ts is not None
40+
assert ts.year == 2026
41+
assert ts.month == 3
42+
assert ts.day == 21
43+
assert ts.hour == 10
44+
45+
46+
def test_extract_iso_with_milliseconds():
47+
"""Test ISO timestamp with milliseconds."""
48+
ts = extract_timestamp("2026-03-21T10:00:00.123Z INFO message")
49+
assert ts is not None
50+
assert ts.year == 2026
51+
assert ts.microsecond == 123000
52+
53+
54+
def test_extract_iso_with_timezone():
55+
"""Test ISO timestamp with timezone offset."""
56+
ts = extract_timestamp("2026-03-21T10:00:00+00:00 INFO message")
57+
assert ts is not None
58+
assert ts.year == 2026
59+
60+
61+
def test_extract_common_log_format():
62+
"""Test Apache/nginx common log format."""
63+
ts = extract_timestamp("21/Mar/2026:10:00:00 +0000 INFO message")
64+
assert ts is not None
65+
assert ts.year == 2026
66+
assert ts.month == 3
67+
assert ts.day == 21
68+
assert ts.hour == 10
69+
70+
71+
def test_extract_common_log_format_bracketed():
72+
"""Test Apache/nginx common log format in brackets."""
73+
ts = extract_timestamp("[21/Mar/2026:10:00:00 +0000] INFO message")
74+
assert ts is not None
75+
assert ts.year == 2026
76+
assert ts.month == 3
77+
assert ts.day == 21
78+
79+
80+
def test_extract_syslog_format():
81+
"""Test syslog-style timestamp."""
82+
ts = extract_timestamp("Mar 21 10:00:00 hostname process[123]: message")
83+
assert ts is not None
84+
assert ts.month == 3
85+
assert ts.day == 21
86+
assert ts.hour == 10
87+
assert ts.minute == 0
88+
assert ts.second == 0
89+
90+
91+
def test_extract_unix_timestamp():
92+
"""Test Unix timestamp (seconds since epoch)."""
93+
# 2026-03-21 10:00:00 UTC as Unix timestamp
94+
ts = extract_timestamp("1711018800 INFO message")
95+
assert ts is not None
96+
# Just verify it's a valid datetime, exact values depend on timezone
97+
assert ts.year in (2026, 2025) # May vary by timezone
98+
99+
100+
def test_extract_timestamp_none():
101+
"""Test that no timestamp returns None."""
102+
ts = extract_timestamp("This is just a plain message with no timestamp")
103+
assert ts is None
104+
105+
106+
def test_parse_line_with_common_log_format():
107+
"""Test that parse_line extracts timestamp from common log format."""
108+
log = '[INFO] 21/Mar/2026:10:00:00 +0000 Test message'
109+
entry = parse_line(log)
110+
assert entry.timestamp is not None
111+
assert entry.timestamp.month == 3

0 commit comments

Comments
 (0)