Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 14 additions & 24 deletions app/core/i18n.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,18 @@

1. URL path-prefix (``/de/...`` or ``/en/...``)
2. Query-param (``?lang=de|en``)
3. Cookie (``fm_lang``)
4. ``Accept-Language`` header (``de*`` | ``en*``)
5. Default ``de`` (Hamburg-based operator, see plan)
3. ``Accept-Language`` header (``de*`` | ``en*``)
4. Default ``de`` (Hamburg-based operator, see plan)

The app stores **no client-side locale preference** (no cookie, no
localStorage). The URL is the single source of truth: a user who clicks
the EN switcher lands on ``/en/...`` and stays there as long as
in-app links propagate ``current_prefix``. A return visit to an
unprefixed URL falls through to ``Accept-Language`` and the operator
default. This keeps the published privacy guarantee ("no cookies")
intact and makes shared URLs deterministic. Logged-in users get a
sticky preference via ``User.preferred_lang`` (PR-i18n-3) — server-side,
device-portable, strictly better than a cookie.

The resolved locale lands on ``request.state.locale`` via
:class:`LocaleMiddleware`. Routes that render templates pull it through
Expand Down Expand Up @@ -36,8 +45,6 @@

SUPPORTED_LOCALES: tuple[str, ...] = ("de", "en")
DEFAULT_LOCALE: str = "de"
COOKIE_NAME: str = "fm_lang"
COOKIE_MAX_AGE: int = 30 * 24 * 60 * 60 # 30 days
LOCALE_DIR: Path = Path(__file__).resolve().parent.parent.parent / "locale"

# Module-level translation cache. Populated lazily on first lookup.
Expand Down Expand Up @@ -134,15 +141,11 @@ def resolve_locale(request: Request) -> str:
q = request.query_params.get("lang")
if q in SUPPORTED_LOCALES:
return q
# 3. Sticky cookie
c = request.cookies.get(COOKIE_NAME)
if c in SUPPORTED_LOCALES:
return c
# 4. Best-effort Accept-Language
# 3. Best-effort Accept-Language
al = _accept_language_locale(request.headers.get("accept-language"))
if al:
return al
# 5. Operator default (DE unless overridden)
# 4. Operator default (DE unless overridden)
return _effective_default()


Expand All @@ -155,19 +158,6 @@ async def get_locale(request: Request) -> str:
return getattr(request.state, "locale", None) or resolve_locale(request)


def is_explicit_locale_signal(request: Request) -> bool:
"""True when the caller's URL or query carries an unambiguous locale.

The cookie is only set when the signal was *explicit* — otherwise the
cookie would race the URL on every page load and surprise users who
hit a different prefix from a bookmark.
"""
return (
path_prefix_locale(request.url.path) is not None
or request.query_params.get("lang") in SUPPORTED_LOCALES
)


def localized_context(request: Request, **extra) -> dict:
"""Build a context dict carrying the per-request translator + locale.

Expand Down
28 changes: 7 additions & 21 deletions app/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,8 @@
normalize_classification as _normalize_data_classification,
)
from app.core.i18n import (
COOKIE_MAX_AGE as _LOCALE_COOKIE_MAX_AGE,
COOKIE_NAME as _LOCALE_COOKIE_NAME,
SUPPORTED_LOCALES,
base_path,
is_explicit_locale_signal,
localized_context,
localized_url,
resolve_locale,
Expand Down Expand Up @@ -279,26 +276,15 @@ def _build_csp_header(api_base_url: str) -> str:

@app.middleware("http")
async def locale_resolver(request: Request, call_next):
"""Resolve the active locale, stash on request.state, persist via cookie.
"""Resolve the active locale and stash it on ``request.state``.

The cookie is only written when the locale was *explicit* (URL prefix
or ``?lang=`` param) — otherwise we'd race the cookie against the URL
on every page load and lock visitors out of Accept-Language detection.
No cookie is written. The URL is the single source of truth — see
``app/core/i18n.py`` module docstring for the rationale and the
privacy-policy commitment ("FileMorph sets no cookies on its own
domain", ``app/templates/privacy.html`` §6).
"""
locale = resolve_locale(request)
request.state.locale = locale
response = await call_next(request)
if is_explicit_locale_signal(request) and request.cookies.get(_LOCALE_COOKIE_NAME) != locale:
response.set_cookie(
_LOCALE_COOKIE_NAME,
value=locale,
max_age=_LOCALE_COOKIE_MAX_AGE,
samesite="lax",
secure=request.url.scheme == "https",
httponly=False,
path="/",
)
return response
request.state.locale = resolve_locale(request)
return await call_next(request)


@app.middleware("http")
Expand Down
102 changes: 46 additions & 56 deletions tests/test_i18n.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,21 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""i18n infrastructure tests — pin the locale-resolution contract.

Covers the five-step resolution chain in `app/core/i18n.py::resolve_locale`,
the unknown-locale fallback, the cookie persistence rule (only-on-explicit-
signal), the hreflang presence on every page, and the `<html lang>` /
`og:locale` propagation.
Covers the four-step resolution chain in
``app/core/i18n.py::resolve_locale`` (URL-prefix → query-param →
Accept-Language → operator default), the unknown-locale fallback, the
hreflang presence on every page, the ``<html lang>`` / ``og:locale``
propagation, and the **no-cookie** regression-guard.

The app intentionally writes no client-side locale cookie — the
published privacy policy (``app/templates/privacy.html`` §6) commits to
"no cookies on its own domain", and the URL is the single source of
truth for locale. ``test_no_locale_cookie_set_on_any_route`` is the
programmatic guard against accidental reintroduction.

These tests do not depend on any DE translations existing — PR-i18n-1
ships infrastructure with empty .po files, so all rendered text stays
EN. The tests assert *behaviour* (URL-prefix wins, cookie persists, etc.),
not translated copy.
EN. The tests assert *behaviour*, not translated copy.
"""

from __future__ import annotations
Expand All @@ -29,14 +35,13 @@

@pytest.fixture(autouse=True)
def _clear_cookies(client):
"""Reset the session-scoped TestClient's cookie jar before each i18n test.

The shared ``client`` fixture in ``conftest.py`` is session-scoped, so
cookies from earlier i18n tests (e.g. a `/de/` visit setting
``fm_lang=de``) would leak into later tests and steal priority over
the resolution chain we're trying to assert. Clearing per-test gives
each case a clean cookie jar without sacrificing the speed of a
shared TestClient.
"""Defensive: reset the session-scoped TestClient's cookie jar per test.

The app no longer sets any locale cookie, but the session-scoped
``client`` from ``conftest.py`` accumulates whatever any other test
in the suite happens to set. Clearing here pins each i18n test to a
clean jar so unrelated suite ordering can't leak state into the
resolution-chain assertions.
"""
client.cookies.clear()
yield
Expand Down Expand Up @@ -121,16 +126,8 @@ def test_query_param_lang_overrides_default(client):
assert m and m.group(1) == "en"


def test_cookie_overrides_default(client):
"""A returning visitor with `fm_lang=en` cookie sees EN even on `/`."""
r = client.get("/", cookies={"fm_lang": "en"})
assert r.status_code == 200
m = re.search(r'<html lang="([^"]+)"', r.text)
assert m and m.group(1) == "en"


def test_accept_language_en_falls_through(client):
"""When no URL prefix / query / cookie, Accept-Language: en* wins over default DE."""
"""When no URL prefix / query is present, Accept-Language: en* wins over default DE."""
r = client.get("/", headers={"accept-language": "en-US,en;q=0.9"})
assert r.status_code == 200
m = re.search(r'<html lang="([^"]+)"', r.text)
Expand All @@ -139,9 +136,9 @@ def test_accept_language_en_falls_through(client):

def test_unknown_locale_query_falls_back_to_default(client):
"""`?lang=fr` is not supported, and with no Accept-Language signal
the server falls back to step 5 (operator default = DE upstream).
the server falls back to the operator default (DE upstream).

We send an empty ``accept-language`` so the test pins step 5 in
We send an empty ``accept-language`` so the test pins the default in
isolation; without it httpx's TestClient sometimes inherits a
locale-shaped default that masks the fallback path.
"""
Expand All @@ -151,14 +148,6 @@ def test_unknown_locale_query_falls_back_to_default(client):
assert m and m.group(1) == DEFAULT_LOCALE


def test_url_prefix_beats_cookie(client):
"""Path-prefix resolution wins over cookie — prevents stale-cookie surprises."""
r = client.get("/en/", cookies={"fm_lang": "de"})
assert r.status_code == 200
m = re.search(r'<html lang="([^"]+)"', r.text)
assert m and m.group(1) == "en"


def test_url_prefix_beats_query_param(client):
"""Path-prefix wins over `?lang=` so deep-linked URLs stay deterministic."""
r = client.get("/de/?lang=en")
Expand All @@ -167,35 +156,36 @@ def test_url_prefix_beats_query_param(client):
assert m and m.group(1) == "de"


# ── Cookie persistence ────────────────────────────────────────────────────────


def test_explicit_url_prefix_sets_cookie(client):
"""Visiting `/en/` should set `fm_lang=en` cookie for sticky preference."""
r = client.get("/en/")
assert r.status_code == 200
set_cookie = r.headers.get("set-cookie", "")
assert "fm_lang=en" in set_cookie

# ── No-cookie regression-guard ────────────────────────────────────────────────

def test_explicit_query_lang_sets_cookie(client):
"""`?lang=en` is also explicit — cookie persists the choice."""
r = client.get("/?lang=en")
assert r.status_code == 200
set_cookie = r.headers.get("set-cookie", "")
assert "fm_lang=en" in set_cookie

@pytest.mark.parametrize(
"url",
[
"/",
"/de/",
"/en/",
"/?lang=de",
"/?lang=en",
"/login",
"/de/login",
"/en/login",
],
)
def test_no_locale_cookie_set_on_any_route(client, url):
"""The app commits to "no cookies on its own domain" (privacy.html §6).

def test_unprefixed_path_does_not_set_cookie(client):
"""`/` with no explicit locale signal must NOT set fm_lang.

Otherwise a default-DE visitor's first hit would write `fm_lang=de`,
locking them out of any future Accept-Language detection.
This is the programmatic gate against accidental reintroduction of a
locale cookie. Hits the URL space that PR-i18n-1 originally wired the
cookie to (root, prefixed, query-param, and a non-trivial page) and
asserts the response carries no ``fm_lang`` Set-Cookie.
"""
r = client.get("/")
r = client.get(url, headers={"accept-language": ""})
assert r.status_code == 200
set_cookie = r.headers.get("set-cookie", "")
assert "fm_lang" not in set_cookie
assert "fm_lang" not in set_cookie, (
f"locale cookie regression on {url!r} — Set-Cookie was: {set_cookie!r}"
)


# ── hreflang + canonical ──────────────────────────────────────────────────────
Expand Down
Loading