-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathlog-analyzer.py
More file actions
123 lines (109 loc) · 4.49 KB
/
log-analyzer.py
File metadata and controls
123 lines (109 loc) · 4.49 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
import re
from datetime import datetime
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
import logging
LOG_FILE = "logs.txt"
PDF_REPORT = "log_report.pdf"
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
insert_pattern = re.compile(r'Successfully inserted (\d+) documents')
duration_pattern = re.compile(r'duration:(\d+\.\d+)s')
timestamp_pattern = re.compile(r'"asctime": "(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d+)"')
def parse_logs(file_path):
records = []
try:
with open(file_path, 'r') as f:
lines = f.readlines()
except FileNotFoundError:
logger.error(f"File {file_path} not found")
return pd.DataFrame()
except Exception as e:
logger.error(f"Error when reading a file {file_path}: {e}")
return pd.DataFrame()
for i, line in enumerate(lines):
try:
insert_match = insert_pattern.search(line)
if insert_match:
num_docs = int(insert_match.group(1))
duration = 0.0
for j in range(i, min(i+3, len(lines))):
d_match = duration_pattern.search(lines[j])
if d_match:
duration = float(d_match.group(1))
break
ts_match = timestamp_pattern.search(line)
timestamp = datetime.strptime(ts_match.group(1), "%Y-%m-%d %H:%M:%S,%f") if ts_match else None
records.append({
'timestamp': timestamp,
'num_docs': num_docs,
'duration_sec': duration
})
except Exception as e:
logger.error(f"String parsing error {i}: {e}")
return pd.DataFrame(records)
# --- Analysis ---
def enrich_dataframe(df):
try:
df = df[df["duration_sec"] > 0].copy()
df.sort_values("timestamp", inplace=True)
df["speed_per_min"] = df["num_docs"] / df["duration_sec"] * 60
df["rolling_avg"] = df["speed_per_min"].rolling(window=30, min_periods=1).mean()
df["is_peak"] = df["speed_per_min"] > (df["rolling_avg"] * 1.2)
df["is_drop"] = df["speed_per_min"] < (df["rolling_avg"] * 0.8)
except Exception as e:
logger.error(f"Data enrichment error: {e}")
return df
# --- Graphing ---
def plot_speed_graph(df):
try:
plt.figure(figsize=(10, 5))
plt.plot(df["timestamp"], df["speed_per_min"], label="Speed", alpha=0.5)
plt.plot(df["timestamp"], df["rolling_avg"], label="Moving average", linewidth=2)
plt.scatter(df.loc[df["is_peak"], "timestamp"], df.loc[df["is_peak"], "speed_per_min"], color='green', label='Peak')
plt.scatter(df.loc[df["is_drop"], "timestamp"], df.loc[df["is_drop"], "speed_per_min"], color='red', label='Falling')
plt.title("Loading speed (documents/min)")
plt.xlabel("Time")
plt.ylabel("Speed")
plt.xticks(rotation=45)
plt.legend()
plt.grid(True)
except Exception as e:
logger.error(f"Error in plotting the graph: {e}")
return plt
# --- PDF generation ---
def generate_pdf_report(df):
try:
with PdfPages(PDF_REPORT) as pdf:
# Text information
fig, ax = plt.subplots(figsize=(8.5, 5))
ax.axis('off')
total_docs = int(df["num_docs"].sum())
total_batches = len(df)
total_sec = df["duration_sec"].sum()
total_min = total_sec / 60
min_speed = df["speed_per_min"].min()
ax.set_title("Document upload report", fontsize=16, fontweight='bold', loc='center')
text = f"""
• Total documents uploaded: {total_docs}
• Number of batches (inserts): {total_batches}
• Total insertion time: {total_sec:.2f} seconds (≈ {total_min:.2f} minutes)
• Minimum speed: {min_speed:.2f} documents/minute
"""
ax.text(0.05, 0.95, text, fontsize=12, verticalalignment='top')
pdf.savefig(fig)
plt.close()
plot_speed_graph(df)
pdf.savefig()
plt.close()
except Exception as e:
logger.error(f"Error during PDF report generation: {e}")
if __name__ == "__main__":
df = parse_logs(LOG_FILE)
if not df.empty:
enriched_df = enrich_dataframe(df)
generate_pdf_report(enriched_df)
logger.info(f"✅ The report is saved in {PDF_REPORT}")
else:
logger.warning("No data to process")