Skip to content

Commit f57968b

Browse files
amosttAygentic
andauthored
feat(core): add structured logging and request pipeline middleware [AYG-66] (#2)
* feat(core): add structured logging and request pipeline middleware [AYG-66] Add structlog-based structured logging (JSON/console modes) and request pipeline middleware providing UUID v4 request IDs, correlation ID propagation, security headers on all responses (including CORS preflight), and status-based log levels (info/warning/error for 2xx/4xx/5xx). - Create backend/app/core/logging.py: structlog configuration with JSON renderer (production) and ConsoleRenderer (local dev), base fields (timestamp, level, event, service, version, environment), contextvars integration for request-scoped fields - Create backend/app/core/middleware.py: RequestPipelineMiddleware with request_id generation, X-Correlation-ID propagation, 6 security headers (X-Content-Type-Options, X-Frame-Options, X-XSS-Protection, Referrer-Policy, Permissions-Policy, HSTS production-only), X-Request-ID on every response including error and exception paths - Wire into main.py with documented middleware ordering (outermost wraps CORSMiddleware so preflight OPTIONS get headers too) - 32 new unit tests (6 logging + 26 middleware) covering CORS preflight header behavior, X-Request-ID on 4xx/5xx/exception paths, and negative tests proving Authorization/Cookie values never logged Fixes AYG-66 🤖 Generated by Aygentic Co-Authored-By: Aygentic <noreply@aygentic.com> * docs(core): update architecture, test registry, and deployment docs for AYG-66 [skip ci] Updated documentation based on structured logging and request pipeline middleware implementation. Related to AYG-66 🤖 Generated by Aygentic Co-Authored-By: Aygentic <noreply@aygentic.com> --------- Co-authored-by: Aygentic <noreply@aygentic.com>
1 parent c10f1bf commit f57968b

File tree

12 files changed

+1096
-2
lines changed

12 files changed

+1096
-2
lines changed

backend/app/core/logging.py

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
"""Structured logging configuration using structlog.
2+
3+
Provides JSON output for production/CI and human-readable console output
4+
for local development, controlled by the LOG_FORMAT setting.
5+
6+
Every log entry includes base fields: timestamp, level, event, service,
7+
version, environment. Request-scoped fields (request_id, correlation_id)
8+
are bound via structlog.contextvars by the request pipeline middleware.
9+
"""
10+
11+
from __future__ import annotations
12+
13+
import logging
14+
from collections.abc import MutableMapping
15+
from typing import Any
16+
17+
import structlog
18+
19+
20+
def _add_service_info(
21+
service: str,
22+
version: str,
23+
environment: str,
24+
) -> structlog.types.Processor:
25+
"""Return a processor that injects service metadata into every log entry."""
26+
27+
def processor(
28+
_logger: Any, _method_name: str, event_dict: MutableMapping[str, Any]
29+
) -> MutableMapping[str, Any]:
30+
event_dict.setdefault("service", service)
31+
event_dict.setdefault("version", version)
32+
event_dict.setdefault("environment", environment)
33+
return event_dict
34+
35+
return processor
36+
37+
38+
def setup_logging(settings: Any) -> None:
39+
"""Configure structlog with JSON or console rendering.
40+
41+
Args:
42+
settings: A settings object with LOG_LEVEL, LOG_FORMAT,
43+
SERVICE_NAME, SERVICE_VERSION, and ENVIRONMENT attributes.
44+
"""
45+
log_level = getattr(logging, settings.LOG_LEVEL.upper(), logging.INFO)
46+
47+
shared_processors: list[structlog.types.Processor] = [
48+
structlog.contextvars.merge_contextvars,
49+
structlog.processors.add_log_level,
50+
structlog.processors.TimeStamper(fmt="iso"),
51+
_add_service_info(
52+
service=settings.SERVICE_NAME,
53+
version=settings.SERVICE_VERSION,
54+
environment=settings.ENVIRONMENT,
55+
),
56+
structlog.processors.StackInfoRenderer(),
57+
structlog.processors.format_exc_info,
58+
structlog.processors.UnicodeDecoder(),
59+
]
60+
61+
if settings.LOG_FORMAT == "console":
62+
renderer: structlog.types.Processor = structlog.dev.ConsoleRenderer()
63+
else:
64+
renderer = structlog.processors.JSONRenderer()
65+
66+
structlog.configure(
67+
processors=[*shared_processors, renderer],
68+
wrapper_class=structlog.make_filtering_bound_logger(log_level),
69+
logger_factory=structlog.PrintLoggerFactory(),
70+
cache_logger_on_first_use=True,
71+
context_class=dict,
72+
)
73+
74+
75+
def get_logger(**initial_values: Any) -> Any:
76+
"""Return a structlog bound logger, optionally with initial bound values."""
77+
return structlog.get_logger(**initial_values)

backend/app/core/middleware.py

Lines changed: 155 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,155 @@
1+
"""Request pipeline middleware: request ID, correlation, security headers, logging.
2+
3+
Generates a UUID v4 request_id for every request, propagates correlation IDs,
4+
applies security headers, and logs each request with status-appropriate level.
5+
6+
This middleware MUST be the outermost middleware (added last in code) so that
7+
security headers and X-Request-ID are set on ALL responses, including CORS
8+
preflight OPTIONS responses handled by CORSMiddleware.
9+
"""
10+
11+
from __future__ import annotations
12+
13+
import re
14+
import time
15+
from typing import Any
16+
from uuid import uuid4
17+
18+
import structlog
19+
from starlette.middleware.base import BaseHTTPMiddleware, RequestResponseEndpoint
20+
from starlette.requests import Request
21+
from starlette.responses import Response
22+
from starlette.types import ASGIApp
23+
24+
# Correlation ID validation: alphanumeric, hyphens, underscores, dots; max 128 chars.
25+
# Rejects injection payloads (newlines, control chars) that could forge log entries.
26+
_CORRELATION_ID_PATTERN = re.compile(r"^[a-zA-Z0-9\-_.]{1,128}$")
27+
28+
# Security headers applied to every response (PRD Section 4.1.12)
29+
_SECURITY_HEADERS: dict[str, str] = {
30+
"X-Content-Type-Options": "nosniff",
31+
"X-Frame-Options": "DENY",
32+
"X-XSS-Protection": "0",
33+
"Referrer-Policy": "strict-origin-when-cross-origin",
34+
"Permissions-Policy": "camera=(), microphone=(), geolocation=()",
35+
}
36+
37+
# HSTS header only applied in production
38+
_HSTS_VALUE = "max-age=31536000; includeSubDomains"
39+
40+
41+
class RequestPipelineMiddleware(BaseHTTPMiddleware):
42+
"""Middleware that provides request tracing, security headers, and request logging.
43+
44+
Args:
45+
app: The ASGI application.
46+
environment: Deployment environment (e.g. "local", "staging", "production").
47+
Controls whether HSTS header is applied.
48+
"""
49+
50+
def __init__(self, app: ASGIApp, environment: str = "local") -> None:
51+
super().__init__(app)
52+
self.environment = environment
53+
54+
async def dispatch(
55+
self, request: Request, call_next: RequestResponseEndpoint
56+
) -> Response:
57+
# 1. Generate request_id (UUID v4)
58+
request_id = str(uuid4())
59+
60+
# 2. Read X-Correlation-ID or fall back to request_id.
61+
# Validate format to prevent log injection (SEC-001).
62+
raw_correlation = request.headers.get("x-correlation-id")
63+
if raw_correlation and _CORRELATION_ID_PATTERN.match(raw_correlation):
64+
correlation_id = raw_correlation
65+
else:
66+
correlation_id = request_id
67+
68+
# 3. Store in request.state for downstream handlers and error handlers
69+
request.state.request_id = request_id
70+
request.state.correlation_id = correlation_id
71+
72+
# 4. Bind to structlog contextvars for automatic inclusion in all logs
73+
structlog.contextvars.clear_contextvars()
74+
structlog.contextvars.bind_contextvars(
75+
request_id=request_id,
76+
correlation_id=correlation_id,
77+
)
78+
79+
# 5. Record start time
80+
start = time.perf_counter()
81+
82+
# 6. Process request — wrap in try/except so headers are set even
83+
# when an exception propagates past all exception handlers.
84+
try:
85+
response = await call_next(request)
86+
except Exception:
87+
# Log the exception so it's not silently swallowed (BUG-001).
88+
# In practice, global exception handlers (errors.py) catch most
89+
# exceptions before they reach here.
90+
structlog.get_logger().exception(
91+
"unhandled_exception_in_middleware",
92+
method=request.method,
93+
path=request.url.path,
94+
)
95+
response = Response(
96+
content='{"error":"INTERNAL_ERROR","message":"An unexpected error occurred."}',
97+
status_code=500,
98+
media_type="application/json",
99+
)
100+
101+
# 7. Calculate duration
102+
duration_ms = round((time.perf_counter() - start) * 1000, 2)
103+
104+
# 8. Apply security headers
105+
_apply_security_headers(response, self.environment)
106+
107+
# 9. Set X-Request-ID response header
108+
response.headers["X-Request-ID"] = request_id
109+
110+
# 10. Log request with appropriate level based on status code
111+
_log_request(request, response, duration_ms)
112+
113+
# 11. Clear contextvars after logging to prevent leakage (FUNC-001)
114+
structlog.contextvars.clear_contextvars()
115+
116+
return response
117+
118+
119+
def _apply_security_headers(response: Response, environment: str) -> None:
120+
"""Apply security headers to the response."""
121+
for header, value in _SECURITY_HEADERS.items():
122+
response.headers[header] = value
123+
124+
if environment == "production":
125+
response.headers["Strict-Transport-Security"] = _HSTS_VALUE
126+
127+
128+
def _log_request(request: Request, response: Response, duration_ms: float) -> None:
129+
"""Log the completed request at the appropriate level.
130+
131+
- 2xx → info
132+
- 4xx → warning
133+
- 5xx → error
134+
"""
135+
logger: Any = structlog.get_logger()
136+
137+
log_kwargs: dict[str, Any] = {
138+
"method": request.method,
139+
"path": request.url.path,
140+
"status_code": response.status_code,
141+
"duration_ms": duration_ms,
142+
}
143+
144+
# Include user_id if set by auth middleware/handler
145+
user_id = getattr(request.state, "user_id", None)
146+
if user_id is not None:
147+
log_kwargs["user_id"] = user_id
148+
149+
status = response.status_code
150+
if status >= 500:
151+
logger.error("request_completed", **log_kwargs)
152+
elif status >= 400:
153+
logger.warning("request_completed", **log_kwargs)
154+
else:
155+
logger.info("request_completed", **log_kwargs)

backend/app/main.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,11 @@
66
from app.api.main import api_router
77
from app.core.config import settings
88
from app.core.errors import register_exception_handlers
9+
from app.core.logging import setup_logging
10+
from app.core.middleware import RequestPipelineMiddleware
11+
12+
# Configure structured logging (JSON in production, console in local dev)
13+
setup_logging(settings)
914

1015

1116
def custom_generate_unique_id(route: APIRoute) -> str:
@@ -34,4 +39,10 @@ def custom_generate_unique_id(route: APIRoute) -> str:
3439
allow_headers=["*"],
3540
)
3641

42+
# Request pipeline middleware: request ID, correlation, security headers, logging.
43+
# Added AFTER CORSMiddleware in code — Starlette adds middleware as a stack
44+
# (last-added = outermost), so this wraps CORS. This ensures security headers
45+
# and X-Request-ID are set on ALL responses, including CORS preflight OPTIONS.
46+
app.add_middleware(RequestPipelineMiddleware, environment=settings.ENVIRONMENT)
47+
3748
app.include_router(api_router, prefix=settings.API_V1_STR)

backend/pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ dependencies = [
1919
"sentry-sdk[fastapi]>=2.0.0,<3.0.0",
2020
"pyjwt<3.0.0,>=2.8.0",
2121
"pwdlib[argon2,bcrypt]>=0.3.0",
22+
"structlog>=24.1.0,<26.0.0",
2223
]
2324

2425
[dependency-groups]

0 commit comments

Comments
 (0)