Skip to content

Commit f878dd5

Browse files
committed
Log Caddy access and add daily visitors report script
1 parent 3e43ce3 commit f878dd5

4 files changed

Lines changed: 233 additions & 0 deletions

File tree

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ venv/
1616

1717
# Logs
1818
*.log
19+
caddy_logs/
1920

2021
# MacOS
2122
.DS_Store

Caddyfile

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,13 @@
11
davidbingmann.de {
22
encode zstd gzip
3+
log {
4+
output file /var/log/caddy/access.jsonl {
5+
roll_size 25MB
6+
roll_keep 14
7+
roll_keep_for 336h
8+
}
9+
format json
10+
}
311
header {
412
Strict-Transport-Security "max-age=31536000"
513
X-Content-Type-Options "nosniff"

docker-compose.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ services:
2121
- "443:443"
2222
volumes:
2323
- ./Caddyfile:/etc/caddy/Caddyfile
24+
- ./caddy_logs:/var/log/caddy
2425
- caddy_data:/data
2526
- caddy_config:/config
2627
depends_on:

scripts/daily_visitors.py

Lines changed: 223 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,223 @@
1+
# /// script
2+
# dependencies = []
3+
# ///
4+
from __future__ import annotations
5+
6+
import argparse
7+
import glob
8+
import json
9+
import os
10+
import re
11+
import smtplib
12+
import sys
13+
from dataclasses import dataclass
14+
from datetime import UTC, date, datetime, timedelta
15+
from email.message import EmailMessage
16+
17+
DEFAULT_LOG_GLOB = os.environ.get("CADDY_ACCESS_LOG_GLOB", "caddy_logs/access.jsonl*")
18+
19+
20+
@dataclass(frozen=True)
21+
class Report:
22+
target_date: date
23+
unique_visitors: int
24+
pageviews: int
25+
26+
27+
def parse_args() -> argparse.Namespace:
28+
parser = argparse.ArgumentParser(
29+
description=(
30+
"Count daily visitors from Caddy JSON access logs, and optionally email a report."
31+
)
32+
)
33+
parser.add_argument(
34+
"--date",
35+
default="yesterday",
36+
help='Target date in YYYY-MM-DD, or "yesterday" (default).',
37+
)
38+
parser.add_argument(
39+
"--log-glob",
40+
default=DEFAULT_LOG_GLOB,
41+
help=f"Glob for Caddy access logs (default: {DEFAULT_LOG_GLOB!r}).",
42+
)
43+
parser.add_argument(
44+
"--site-host",
45+
default=os.environ.get("SITE_HOST", "davidbingmann.de"),
46+
help='Only count requests whose Host equals this (default: "davidbingmann.de").',
47+
)
48+
parser.add_argument(
49+
"--send-email",
50+
action="store_true",
51+
help="Send the report via SMTP using env vars (SMTP_*).",
52+
)
53+
return parser.parse_args()
54+
55+
56+
def resolve_target_date(value: str) -> date:
57+
if value == "yesterday":
58+
return (datetime.now().astimezone() - timedelta(days=1)).date()
59+
return date.fromisoformat(value)
60+
61+
62+
_ASSET_EXT_RE = re.compile(
63+
r"\.(?:css|js|mjs|map|png|jpe?g|gif|webp|svg|ico|woff2?|ttf|eot|txt|xml|json)$",
64+
flags=re.IGNORECASE,
65+
)
66+
67+
68+
def is_pageview(uri: str) -> bool:
69+
path = uri.split("?", 1)[0]
70+
71+
# Skip obvious static assets and well-known endpoints.
72+
if path.startswith("/assets/"):
73+
return False
74+
if path in {"/favicon.ico", "/robots.txt", "/sitemap.xml"}:
75+
return False
76+
77+
if _ASSET_EXT_RE.search(path):
78+
return False
79+
80+
# If the last path segment contains a dot, treat it as a file request.
81+
last_segment = path.rsplit("/", 1)[-1]
82+
if "." in last_segment:
83+
return False
84+
85+
return True
86+
87+
88+
def iter_log_paths(log_glob: str) -> list[str]:
89+
paths = sorted(glob.glob(log_glob))
90+
return [path for path in paths if os.path.isfile(path)]
91+
92+
93+
def compute_report(
94+
*,
95+
target_date: date,
96+
log_paths: list[str],
97+
site_host: str,
98+
) -> Report:
99+
start = datetime.combine(target_date, datetime.min.time()).astimezone()
100+
end = start + timedelta(days=1)
101+
102+
unique_ips: set[str] = set()
103+
pageviews = 0
104+
105+
for path in log_paths:
106+
with open(path, "r", encoding="utf-8") as handle:
107+
for line in handle:
108+
line = line.strip()
109+
if not line:
110+
continue
111+
112+
try:
113+
event = json.loads(line)
114+
except json.JSONDecodeError:
115+
continue
116+
117+
if event.get("logger", "").startswith("http.log.access") is False:
118+
continue
119+
120+
ts = event.get("ts")
121+
if not isinstance(ts, (int, float)):
122+
continue
123+
124+
when = datetime.fromtimestamp(ts, tz=UTC).astimezone()
125+
if when < start or when >= end:
126+
continue
127+
128+
request = event.get("request")
129+
if not isinstance(request, dict):
130+
continue
131+
132+
if request.get("host") != site_host:
133+
continue
134+
if request.get("method") != "GET":
135+
continue
136+
137+
status = event.get("status")
138+
if not isinstance(status, int) or status < 200 or status >= 400:
139+
continue
140+
141+
uri = request.get("uri")
142+
if not isinstance(uri, str) or not is_pageview(uri):
143+
continue
144+
145+
ip = request.get("remote_ip") or request.get("client_ip")
146+
if not isinstance(ip, str) or not ip:
147+
continue
148+
149+
unique_ips.add(ip)
150+
pageviews += 1
151+
152+
return Report(target_date=target_date, unique_visitors=len(unique_ips), pageviews=pageviews)
153+
154+
155+
def format_report_text(report: Report) -> str:
156+
return "\n".join(
157+
[
158+
f"Daily visitors report for {report.target_date.isoformat()}",
159+
"",
160+
f"Unique visitors (by IP): {report.unique_visitors}",
161+
f"Pageviews (HTML-ish GETs): {report.pageviews}",
162+
]
163+
)
164+
165+
166+
def send_email(*, subject: str, body: str) -> None:
167+
host = os.environ.get("SMTP_HOST")
168+
port = int(os.environ.get("SMTP_PORT", "587"))
169+
user = os.environ.get("SMTP_USER")
170+
password = os.environ.get("SMTP_PASSWORD")
171+
to_addr = os.environ.get("SMTP_TO")
172+
from_addr = os.environ.get("SMTP_FROM") or user
173+
174+
missing = [name for name, value in [("SMTP_HOST", host), ("SMTP_TO", to_addr)] if not value]
175+
if missing:
176+
raise RuntimeError(f"Missing required env vars: {', '.join(missing)}")
177+
178+
message = EmailMessage()
179+
message["Subject"] = subject
180+
message["From"] = from_addr or "no-reply@localhost"
181+
message["To"] = to_addr
182+
message.set_content(body)
183+
184+
with smtplib.SMTP(host, port, timeout=30) as smtp:
185+
smtp.ehlo()
186+
if os.environ.get("SMTP_STARTTLS", "1") != "0":
187+
smtp.starttls()
188+
smtp.ehlo()
189+
if user and password:
190+
smtp.login(user, password)
191+
smtp.send_message(message)
192+
193+
194+
def main() -> int:
195+
args = parse_args()
196+
target_date = resolve_target_date(args.date)
197+
198+
log_paths = iter_log_paths(args.log_glob)
199+
if not log_paths:
200+
print(
201+
f"No access logs found (glob: {args.log_glob!r}). "
202+
"Enable Caddy access logging first.",
203+
file=sys.stderr,
204+
)
205+
return 2
206+
207+
report = compute_report(target_date=target_date, log_paths=log_paths, site_host=args.site_host)
208+
body = format_report_text(report)
209+
210+
subject = f"{args.site_host} visitors: {report.unique_visitors} ({report.target_date.isoformat()})"
211+
212+
if args.send_email:
213+
send_email(subject=subject, body=body)
214+
else:
215+
print(subject)
216+
print(body)
217+
218+
return 0
219+
220+
221+
if __name__ == "__main__":
222+
raise SystemExit(main())
223+

0 commit comments

Comments
 (0)