Skip to content

Commit 8fe0223

Browse files
committed
chore: code review.
1 parent d85b8a1 commit 8fe0223

41 files changed

Lines changed: 1114 additions & 290 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

CLAUDE.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -128,4 +128,4 @@ After completing any development task, always run the following before reporting
128128

129129
1. **Tests** — run `make test` (or the relevant subset: `make test-backend` / `make test-frontend`) and make sure everything passes.
130130
2. **Lint** — run `make lint` for frontend changes.
131-
3. **Code review** — perform a self-review of the diff, then run a second-opinion review with Gemini CLI for non-trivial changes.
131+
3. **Code review** — perform a self-review of the diff for non-trivial changes.

backend/app/auth.py

Lines changed: 32 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ def create_token(user_id: int, username: str, role: str) -> str:
4444
"role": role,
4545
"exp": expire,
4646
}
47-
return jwt.encode(payload, settings.SECRET_KEY, algorithm=ALGORITHM)
47+
return jwt.encode(payload, settings.SECRET_KEY.get_secret_value(), algorithm=ALGORITHM)
4848

4949

5050
async def _resolve_api_token(token: str) -> dict | None:
@@ -98,55 +98,59 @@ async def _resolve_api_token(token: str) -> dict | None:
9898
}
9999

100100

101-
async def get_current_user(request: Request):
102-
token = None
101+
async def _resolve_request_credentials(request: Request) -> dict | None:
102+
"""Decode whichever credential the request carries, or return None.
103103
104-
# Check Authorization header
104+
Recognises both personal API tokens (prefixed with `jwk_`) and JWT
105+
session tokens, read from `Authorization: Bearer` first and falling
106+
back to the `token` cookie. Returns the resolved user dict on success
107+
and None on any failure (no token, invalid token, missing user). The
108+
caller decides whether to raise 401 or treat it as anonymous.
109+
"""
110+
token = None
105111
auth_header = request.headers.get("Authorization")
106112
if auth_header and auth_header.startswith("Bearer "):
107113
token = auth_header[7:]
108-
109-
# Check cookie
110114
if not token:
111115
token = request.cookies.get("token")
112-
113116
if not token:
114-
raise HTTPException(
115-
status_code=status.HTTP_401_UNAUTHORIZED,
116-
detail="Not authenticated",
117-
)
117+
return None
118118

119-
# Personal API tokens are distinguished by a fixed prefix so we don't
120-
# have to guess-and-fall-back. Anything else must parse as a JWT.
121119
if token.startswith(API_TOKEN_PREFIX):
122-
user = await _resolve_api_token(token)
123-
if user is None:
124-
raise HTTPException(
125-
status_code=status.HTTP_401_UNAUTHORIZED,
126-
detail="Invalid token",
127-
)
128-
return user
120+
return await _resolve_api_token(token)
129121

130122
try:
131-
payload = jwt.decode(token, settings.SECRET_KEY, algorithms=[ALGORITHM])
123+
payload = jwt.decode(token, settings.SECRET_KEY.get_secret_value(), algorithms=[ALGORITHM])
132124
user_id = int(payload["sub"])
133125
except (JWTError, KeyError, ValueError):
134-
raise HTTPException(
135-
status_code=status.HTTP_401_UNAUTHORIZED,
136-
detail="Invalid token",
137-
)
126+
return None
138127

139128
db = await get_db()
140129
row = await db.execute_fetchall(
141130
"SELECT id, username, role, display_name, email FROM users WHERE id = ? AND deleted_at IS NULL",
142131
(user_id,),
143132
)
144133
if not row:
134+
return None
135+
return dict(row[0])
136+
137+
138+
async def get_current_user(request: Request):
139+
user = await _resolve_request_credentials(request)
140+
if user is None:
145141
raise HTTPException(
146142
status_code=status.HTTP_401_UNAUTHORIZED,
147-
detail="User not found",
143+
detail="Not authenticated",
148144
)
149-
return dict(row[0])
145+
return user
146+
147+
148+
async def get_optional_user(request: Request) -> dict | None:
149+
"""Same credential resolution as `get_current_user` but returns None
150+
instead of raising on missing/invalid credentials. Use for endpoints
151+
that serve both authenticated and anonymous traffic (e.g. media
152+
files referenced by public pages)."""
153+
return await _resolve_request_credentials(request)
150154

151155

152156
async def require_admin(user=Depends(get_current_user)):
@@ -159,7 +163,7 @@ async def ensure_admin_exists():
159163
db = await get_db()
160164
rows = await db.execute_fetchall("SELECT id FROM users WHERE role = 'admin'")
161165
if not rows:
162-
pw_hash = hash_password(settings.ADMIN_PASS)
166+
pw_hash = hash_password(settings.ADMIN_PASS.get_secret_value())
163167
await db.execute(
164168
"INSERT INTO users (username, password_hash, role) VALUES (?, ?, 'admin')",
165169
(settings.ADMIN_USER, pw_hash),

backend/app/config.py

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,15 @@
1+
from pydantic import SecretStr
12
from pydantic_settings import BaseSettings
23
from pathlib import Path
34

45

56
class Settings(BaseSettings):
6-
SECRET_KEY: str = "change-me-to-random-string"
7+
# Secrets use SecretStr so `repr(settings)` / a debug dump / a stray
8+
# log line can't leak them. Read the plaintext at the point of use
9+
# via `.get_secret_value()`.
10+
SECRET_KEY: SecretStr = SecretStr("change-me-to-random-string")
711
ADMIN_USER: str = "admin"
8-
ADMIN_PASS: str = "admin"
12+
ADMIN_PASS: SecretStr = SecretStr("admin")
913

1014
DATA_DIR: str = "./data"
1115
DB_PATH: str = "./data/just-wiki.db"
@@ -19,6 +23,14 @@ class Settings(BaseSettings):
1923
# in production, e.g. "https://wiki.example.com".
2024
ALLOWED_ORIGINS: str = ""
2125

26+
# When deployed behind a reverse proxy (nginx/Caddy/ALB/Traefik), the
27+
# direct TCP peer is the proxy — rate limiters keyed on `request.client.host`
28+
# then see every client as the same IP and either deny everyone or no-one.
29+
# Setting this to True makes the rate limiters trust the left-most
30+
# `X-Forwarded-For` entry. Only enable this when a trusted proxy is
31+
# actually in front of the app; otherwise a client can spoof the header.
32+
TRUST_PROXY: bool = False
33+
2234
# Public base URL the browser can reach. Used to build OIDC redirect_uri
2335
# and SSO-error redirects. In dev leave as localhost:8000; in prod set to
2436
# e.g. "https://wiki.example.com".
@@ -37,27 +49,27 @@ class Settings(BaseSettings):
3749

3850
# Google
3951
OIDC_GOOGLE_CLIENT_ID: str = ""
40-
OIDC_GOOGLE_CLIENT_SECRET: str = ""
52+
OIDC_GOOGLE_CLIENT_SECRET: SecretStr = SecretStr("")
4153
OIDC_GOOGLE_DISCOVERY: str = (
4254
"https://accounts.google.com/.well-known/openid-configuration"
4355
)
4456

4557
# GitHub (non-OIDC OAuth2; no discovery URL)
4658
OIDC_GITHUB_CLIENT_ID: str = ""
47-
OIDC_GITHUB_CLIENT_SECRET: str = ""
59+
OIDC_GITHUB_CLIENT_SECRET: SecretStr = SecretStr("")
4860

4961
# Generic OIDC (Keycloak / Authentik / Okta / self-hosted IdP)
5062
OIDC_GENERIC_NAME: str = "Company SSO"
5163
OIDC_GENERIC_CLIENT_ID: str = ""
52-
OIDC_GENERIC_CLIENT_SECRET: str = ""
64+
OIDC_GENERIC_CLIENT_SECRET: SecretStr = SecretStr("")
5365
OIDC_GENERIC_DISCOVERY: str = ""
5466

5567
# ── LDAP / Active Directory (optional) ──
5668
LDAP_ENABLED: bool = False
5769
LDAP_SERVER: str = "" # must be ldaps:// unless TLS disabled
5870
LDAP_TLS_VERIFY: bool = True
5971
LDAP_BIND_DN: str = ""
60-
LDAP_BIND_PASSWORD: str = ""
72+
LDAP_BIND_PASSWORD: SecretStr = SecretStr("")
6173
LDAP_USER_BASE: str = ""
6274
LDAP_USER_FILTER: str = "(&(objectClass=person)(uid={username}))"
6375
LDAP_ATTR_EMAIL: str = "mail"
@@ -74,14 +86,14 @@ class Settings(BaseSettings):
7486
# that speaks the same wire format works (OpenAI, Ollama, Groq, DeepSeek…).
7587
AI_ENABLED: bool = False
7688
AI_BASE_URL: str = "https://generativelanguage.googleapis.com/v1beta/openai"
77-
AI_API_KEY: str = ""
89+
AI_API_KEY: SecretStr = SecretStr("")
7890
AI_MODEL: str = "gemini-2.0-flash"
7991
AI_MAX_CONTEXT_PAGES: int = 5 # top-K pages stuffed into the prompt
8092
AI_EXCERPT_CHARS: int = 1500 # max chars per page content excerpt
8193
AI_RATE_LIMIT_PER_HOUR: int = 20 # per-user request cap
8294
# Legacy; kept as a read-only alias so existing deployments don't break.
8395
# Use AI_API_KEY instead.
84-
GEMINI_API_KEY: str = ""
96+
GEMINI_API_KEY: SecretStr = SecretStr("")
8597

8698
# Dashboard / updates
8799
# Off by default so air-gapped installs never make an outbound call.

backend/app/database.py

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import asyncio
12
import logging
23
import re
34
import sqlite3
@@ -13,6 +14,10 @@
1314
_TOKENIZE_RE = re.compile(r"""tokenize\s*=\s*['"](\w+)['"]""", re.IGNORECASE)
1415

1516
_db: aiosqlite.Connection | None = None
17+
# Serialise the first-touch connection setup so two concurrent `get_db()`
18+
# calls (e.g. racing lifespan/boot-time tasks) can't both open a connection
19+
# and leak the loser.
20+
_db_lock = asyncio.Lock()
1621

1722
SCHEMA_SQL = """
1823
CREATE TABLE IF NOT EXISTS users (
@@ -973,11 +978,17 @@ async def _ensure_fts5_index(db) -> bool:
973978

974979
async def get_db() -> aiosqlite.Connection:
975980
global _db
976-
if _db is None:
977-
_db = await aiosqlite.connect(settings.DB_PATH)
978-
_db.row_factory = aiosqlite.Row
979-
await _db.execute("PRAGMA journal_mode=WAL")
980-
await _db.execute("PRAGMA foreign_keys=ON")
981+
if _db is not None:
982+
return _db
983+
async with _db_lock:
984+
# Re-check after acquiring: another coroutine may have finished
985+
# opening the connection while we were blocked on the lock.
986+
if _db is None:
987+
conn = await aiosqlite.connect(settings.DB_PATH)
988+
conn.row_factory = aiosqlite.Row
989+
await conn.execute("PRAGMA journal_mode=WAL")
990+
await conn.execute("PRAGMA foreign_keys=ON")
991+
_db = conn
981992
return _db
982993

983994

@@ -1065,9 +1076,10 @@ async def seed_welcome_page(db):
10651076
shutil.copy2(logo_src, logo_dst)
10661077
# Make sure it's readable
10671078
os.chmod(logo_dst, 0o644)
1068-
print(f"Successfully seeded logo from {logo_src} to {logo_dst}")
1069-
except Exception as e:
1070-
print(f"Error seeding logo: {e}")
1079+
logger.info("Seeded logo from %s to %s", logo_src, logo_dst)
1080+
except OSError:
1081+
# Best-effort — missing/broken source file shouldn't block boot.
1082+
logger.warning("Logo seeding failed", exc_info=True)
10711083

10721084
rows = await db.execute_fetchall("SELECT COUNT(*) as cnt FROM pages")
10731085
if rows[0]["cnt"] > 0:

backend/app/main.py

Lines changed: 46 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525

2626
def _check_security():
2727
"""Warn loudly about insecure defaults on startup."""
28-
if settings.SECRET_KEY in _INSECURE_SECRETS:
28+
if settings.SECRET_KEY.get_secret_value() in _INSECURE_SECRETS:
2929
safe_key = secrets.token_urlsafe(32)
3030
logger.critical(
3131
"\n"
@@ -37,11 +37,11 @@ def _check_security():
3737
"╚══════════════════════════════════════════════════════╝",
3838
safe_key[:44],
3939
)
40-
if settings.ADMIN_PASS in {"admin", "password", "123456", ""}:
40+
admin_pass = settings.ADMIN_PASS.get_secret_value()
41+
if admin_pass in {"admin", "password", "123456", ""}:
4142
logger.warning(
42-
"ADMIN_PASS is set to a weak default ('%s'). "
43+
"ADMIN_PASS is set to a weak default. "
4344
"Change it in .env before deploying to production.",
44-
settings.ADMIN_PASS,
4545
)
4646
# Either SSO path issues the session cookie (OIDC state/nonce/PKCE) over
4747
# the wire. Without TLS marking, a passive attacker on the path can
@@ -56,6 +56,38 @@ def _check_security():
5656
"COOKIE_SECURE=true before exposing it."
5757
)
5858

59+
# Fail loudly if the SSO toggles are on but no usable provider is
60+
# configured. Previously this surfaced only on the first login attempt
61+
# as an opaque 500; catching it here saves operators a debugging loop.
62+
if settings.OIDC_ENABLED:
63+
providers = [p.strip() for p in settings.OIDC_PROVIDERS.split(",") if p.strip()]
64+
if not providers:
65+
logger.error(
66+
"OIDC_ENABLED=true but OIDC_PROVIDERS is empty. "
67+
"Login page will show no SSO buttons."
68+
)
69+
else:
70+
any_configured = False
71+
if "google" in providers and settings.OIDC_GOOGLE_CLIENT_ID and settings.OIDC_GOOGLE_CLIENT_SECRET.get_secret_value():
72+
any_configured = True
73+
if "github" in providers and settings.OIDC_GITHUB_CLIENT_ID and settings.OIDC_GITHUB_CLIENT_SECRET.get_secret_value():
74+
any_configured = True
75+
if "generic" in providers and settings.OIDC_GENERIC_CLIENT_ID and settings.OIDC_GENERIC_CLIENT_SECRET.get_secret_value() and settings.OIDC_GENERIC_DISCOVERY:
76+
any_configured = True
77+
if not any_configured:
78+
logger.error(
79+
"OIDC_ENABLED=true but no provider is fully configured. "
80+
"Set the matching OIDC_{provider}_CLIENT_ID and _SECRET "
81+
"(and _DISCOVERY for 'generic')."
82+
)
83+
84+
if settings.LDAP_ENABLED:
85+
if not settings.LDAP_SERVER or not settings.LDAP_BIND_DN or not settings.LDAP_USER_BASE:
86+
logger.error(
87+
"LDAP_ENABLED=true but LDAP_SERVER/LDAP_BIND_DN/LDAP_USER_BASE "
88+
"is missing. LDAP login will fail."
89+
)
90+
5991

6092
@asynccontextmanager
6193
async def lifespan(app: FastAPI):
@@ -94,15 +126,17 @@ async def csrf_guard(request: Request, call_next):
94126
if request.url.path.rstrip("/") in _CSRF_EXEMPT_PATHS:
95127
return await call_next(request)
96128

97-
# Bearer-token requests don't ride on the browser-ambient cookie, so CSRF
98-
# doesn't reach them. Skip the check.
99-
auth_header = request.headers.get("Authorization", "")
100-
if auth_header.startswith("Bearer "):
101-
return await call_next(request)
102-
103129
# If there's no session cookie, there's nothing to protect: any request
104130
# without credentials will be rejected by the route's auth dependency,
105-
# and CSRF only matters for ambient-credential flows.
131+
# and CSRF only matters for ambient-credential flows. This branch also
132+
# covers legitimate Bearer-token clients (API tokens, tests), which never
133+
# attach the session cookie.
134+
#
135+
# We deliberately do NOT exempt based on the Authorization header alone:
136+
# `get_current_user` falls back to the cookie when a Bearer token is
137+
# invalid, so a cross-origin attacker could otherwise pair a bogus
138+
# `Authorization: Bearer xyz` with the victim's session cookie to skip
139+
# CSRF protection on every mutating endpoint.
106140
if not request.cookies.get("token"):
107141
return await call_next(request)
108142

@@ -149,7 +183,7 @@ def _origin_of(url: str | None) -> str | None:
149183
_session_https_only = settings.COOKIE_SECURE or settings.PUBLIC_BASE_URL.startswith("https://")
150184
app.add_middleware(
151185
SessionMiddleware,
152-
secret_key=settings.SECRET_KEY,
186+
secret_key=settings.SECRET_KEY.get_secret_value(),
153187
session_cookie="justwiki_oauth",
154188
same_site="lax",
155189
https_only=_session_https_only,

backend/app/migrations.py

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,24 @@ async def _m006_auth_identities(db: aiosqlite.Connection) -> None:
9898
)
9999

100100

101+
async def _m007_groups_ldap_dn(db: aiosqlite.Connection) -> None:
102+
"""Add `ldap_dn` to `groups` so LDAP-mirrored groups can be reconciled.
103+
104+
A group with ldap_dn IS NOT NULL is considered fully managed by the LDAP
105+
sync loop — user additions/removals during sync only touch those rows;
106+
manually-created groups (ldap_dn IS NULL) are never pruned.
107+
"""
108+
if not await _column_exists(db, "groups", "ldap_dn"):
109+
await db.execute("ALTER TABLE groups ADD COLUMN ldap_dn TEXT")
110+
# SQLite can't add UNIQUE via ALTER. A partial unique index is the
111+
# idiomatic workaround and matches the semantics we want: only non-NULL
112+
# DNs must be unique (multiple manual groups with NULL dn are fine).
113+
await db.execute(
114+
"CREATE UNIQUE INDEX IF NOT EXISTS idx_groups_ldap_dn "
115+
"ON groups(ldap_dn) WHERE ldap_dn IS NOT NULL"
116+
)
117+
118+
101119
async def _m008_api_tokens_extend(db: aiosqlite.Connection) -> None:
102120
"""Extend api_tokens with prefix / expires_at / revoked_at.
103121
@@ -128,24 +146,6 @@ async def _m009_page_type(db: aiosqlite.Connection) -> None:
128146
)
129147

130148

131-
async def _m007_groups_ldap_dn(db: aiosqlite.Connection) -> None:
132-
"""Add `ldap_dn` to `groups` so LDAP-mirrored groups can be reconciled.
133-
134-
A group with ldap_dn IS NOT NULL is considered fully managed by the LDAP
135-
sync loop — user additions/removals during sync only touch those rows;
136-
manually-created groups (ldap_dn IS NULL) are never pruned.
137-
"""
138-
if not await _column_exists(db, "groups", "ldap_dn"):
139-
await db.execute("ALTER TABLE groups ADD COLUMN ldap_dn TEXT")
140-
# SQLite can't add UNIQUE via ALTER. A partial unique index is the
141-
# idiomatic workaround and matches the semantics we want: only non-NULL
142-
# DNs must be unique (multiple manual groups with NULL dn are fine).
143-
await db.execute(
144-
"CREATE UNIQUE INDEX IF NOT EXISTS idx_groups_ldap_dn "
145-
"ON groups(ldap_dn) WHERE ldap_dn IS NOT NULL"
146-
)
147-
148-
149149
MIGRATIONS: list[Migration] = [
150150
(1, "user_profile_columns", _m001_user_profile_columns),
151151
(2, "user_soft_delete", _m002_user_soft_delete),

0 commit comments

Comments
 (0)