Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions docs/changes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,15 @@ Weblate 2026.7

.. rubric:: New features

* Added :ref:`check-safe-mdx` check to ensure that the target string contains the same JSX expressions as the source string for :ref:`mdx` files.

Weblate 2026.6
--------------

*Not yet released.*

.. rubric:: New features

.. rubric:: Improvements

* Management interface access control is now more fine-grained with dedicated site-wide permissions.
Expand Down
4 changes: 4 additions & 0 deletions docs/snippets/check-flags-autogenerated.rst
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,8 @@
Enables the :ref:`check-placeholders` quality check.
``regex``
Enables the :ref:`check-regex` quality check.
``safe-mdx``
Enables the :ref:`check-safe-mdx` quality check.
``safe-html``
Enables the :ref:`check-safe-html` quality check.
``auto-safe-html``
Expand Down Expand Up @@ -272,6 +274,8 @@
Skip the :ref:`check-rst-syntax` quality check.
``ignore-reused``
Skip the :ref:`check-reused` quality check.
``ignore-safe-mdx``
Skip the :ref:`check-safe-mdx` quality check.
``ignore-same-plurals``
Skip the :ref:`check-same-plurals` quality check.
``ignore-begin-newline``
Expand Down
22 changes: 22 additions & 0 deletions docs/snippets/checks-autogenerated.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1916,6 +1916,28 @@ Check that fails if the same translation is used on different source strings.
Such translations can be intentional, but can also confuse users.


.. AUTOGENERATED START: check-safe-mdx
.. This section is automatically generated by `./manage.py list_checks`. Do not edit manually.

.. _check-safe-mdx:

Safe MDX
~~~~~~~~

.. versionadded:: 2026.7

:Summary: JSX expressions in the translation do not match the source.
:Scope: translated strings
:Check class: ``weblate.checks.mdx.SafeMDXCheck``
:Check identifier: ``safe-mdx``
:Trigger: This check needs to be enabled using a flag.
:File formats automatically enabling this check: :ref:`mdx`
:Flag to enable: ``safe-mdx``
:Flag to ignore: ``ignore-safe-mdx``

.. AUTOGENERATED END: check-safe-mdx


.. AUTOGENERATED START: check-same-plurals
.. This section is automatically generated by `./manage.py list_checks`. Do not edit manually.

Expand Down
2 changes: 1 addition & 1 deletion docs/snippets/format-features/mdx-features.rst
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,6 @@
* - Supports read-only strings :ref:`ⓘ <read-only-strings>`
- ``No``
* - Check flags added by this format :ref:`ⓘ <custom-checks>`
- ``auto-safe-html``, ``strict-same``, ``md-text``
- ``auto-safe-html``, ``strict-same``, ``md-text``, :ref:`check-safe-mdx`

.. AUTOGENERATED END: format-features mdx
1 change: 1 addition & 0 deletions weblate/checks/defaults.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@
"weblate.checks.markup.SafeHTMLCheck",
"weblate.checks.markup.RSTReferencesCheck",
"weblate.checks.markup.RSTSyntaxCheck",
"weblate.checks.mdx.SafeMDXCheck",
"weblate.checks.placeholders.PlaceholderCheck",
"weblate.checks.placeholders.RegexCheck",
"weblate.checks.duplicate.DuplicateCheck",
Expand Down
199 changes: 199 additions & 0 deletions weblate/checks/mdx.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,199 @@
# Copyright © Michal Čihař <michal@weblate.org>
#
# SPDX-License-Identifier: GPL-3.0-or-later

from __future__ import annotations

from typing import TYPE_CHECKING

from django.utils.translation import gettext_lazy

from weblate.checks.base import TargetCheck

if TYPE_CHECKING:
from collections.abc import Iterator

from weblate.trans.models import Unit

# Various lexical contexts tracked while scanning a JSX expression.
_CODE = 0
_SINGLE = 1 # '...'
_DOUBLE = 2 # "..."
_TEMPLATE = 3 # `...`
_LINE_COMMENT = 4 # // ...
_BLOCK_COMMENT = 5 # /* ... */
_REGEX = 6 # /.../

# Characters after which a ``/`` starts a regex literal
_REGEX_PRECEDERS = frozenset("(,=:[{;!&|?+-*%~^")


def _regex_allowed(prev: str) -> bool:
"""Decide whether a ``/`` begins a regex based on the previous code char."""
return not prev or prev in _REGEX_PRECEDERS


def _scan_expression(text: str, start: int) -> int | None: # noqa: C901
"""
Find the closing brace of the JSX expression opening at ``start``.

Returns the index of closing brace or None if the expression is unterminated.
"""
length = len(text)
depth = 1
mode = _CODE
# Mode to restore when a ``}`` closes; lets ``${ ... }`` interpolations
# return to template context. Length is always ``depth - 1``.
restore: list[int] = []
in_char_class = False # whether the current regex is inside ``[...]``
prev = "" # last significant character
i = start + 1

while i < length:
char = text[i]

if mode == _CODE:
if char == "{":
depth += 1
restore.append(_CODE)
elif char == "}":
depth -= 1
if depth == 0:
return i
mode = restore.pop()
elif char == '"':
mode = _DOUBLE
elif char == "'":
mode = _SINGLE
elif char == "`":
mode = _TEMPLATE
elif char == "/" and i + 1 < length and text[i + 1] == "/":
mode = _LINE_COMMENT
i += 1
elif char == "/" and i + 1 < length and text[i + 1] == "*":
mode = _BLOCK_COMMENT
i += 1
elif char == "/" and _regex_allowed(prev):
mode = _REGEX
in_char_class = False
if not char.isspace():
prev = char

elif mode == _DOUBLE:
if char == "\\":
i += 1
elif char == '"':
mode = _CODE
prev = char

elif mode == _SINGLE:
if char == "\\":
i += 1
elif char == "'":
mode = _CODE
prev = char

elif mode == _TEMPLATE:
if char == "\\":
i += 1
elif char == "`":
mode = _CODE
prev = char
elif char == "$" and i + 1 < length and text[i + 1] == "{":
depth += 1
restore.append(_TEMPLATE)
mode = _CODE
i += 1

elif mode == _LINE_COMMENT:
if char == "\n":
mode = _CODE

elif mode == _BLOCK_COMMENT:
if char == "*" and i + 1 < length and text[i + 1] == "/":
mode = _CODE
i += 1

elif mode == _REGEX:
if char == "\\":
i += 1
elif char == "[":
in_char_class = True
elif char == "]":
in_char_class = False
elif char == "/" and not in_char_class:
mode = _CODE
prev = char

i += 1

return None


def _skip_code_span(text: str, start: int) -> int | None:
"""
Find the end of the Markdown inline code span opening at ``start``.

Returns the index of closing backtick or None if there is no matching closing run.
"""
length = len(text)
run_end = start
while run_end < length and text[run_end] == "`":
run_end += 1
run = run_end - start

i = run_end
while i < length:
if text[i] == "`":
close_end = i
while close_end < length and text[close_end] == "`":
close_end += 1
if close_end - i == run:
return close_end - 1
i = close_end
else:
i += 1

return None


class SafeMDXCheck(TargetCheck):
"""Check for unsafe MDX content."""

check_id = "safe-mdx"
name = gettext_lazy("Safe MDX")
description = gettext_lazy(
"JSX expressions in the translation do not match the source."
)
default_disabled = True
version_added = "2026.7"
Comment thread
gersona marked this conversation as resolved.
Comment thread
gersona marked this conversation as resolved.

def check_single(self, source: str, target: str, unit: Unit) -> bool:
"""Check the target has the same JSX expressions as the source."""
expected = list(self.get_jsx_expression_matches(source))
found = list(self.get_jsx_expression_matches(target))
return sorted(found) != sorted(expected)

def get_jsx_expression_matches(self, text: str) -> Iterator[str]:
i = 0
length = len(text)
while i < length:
char = text[i]
if char == "\\":
# escaped character can be skipped (e.g. ``\{``)
i += 2
continue
if char == "`":
# Markdown inline code span
close = _skip_code_span(text, i)
if close is not None:
i = close + 1
continue
if char == "{":
# JSX expression
close = _scan_expression(text, i)
if close is not None:
yield text[i : close + 1]
i = close + 1
continue
i += 1
108 changes: 108 additions & 0 deletions weblate/checks/test_mdx.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
# Copyright © Michal Čihař <michal@weblate.org>
#
# SPDX-License-Identifier: GPL-3.0-or-later

from __future__ import annotations

from weblate.checks.mdx import SafeMDXCheck
from weblate.checks.tests.test_checks import CheckTestCase


class SafeMDXCheckTest(CheckTestCase):
check = SafeMDXCheck()

def setUp(self) -> None:
super().setUp()
self.test_good_matching = (
"Hello, {props.name.toUpperCase()}",
"Ahoj, {props.name.toUpperCase()}",
"safe-mdx",
)
Comment thread
gersona marked this conversation as resolved.

self.test_failure_1 = (
"Hello, {props.name.toUpperCase()}",
"Ahoj, {props.unauthorized.access()}",
"safe-mdx",
)
self.test_failure_2 = ("Test {Math.PI * 100}", "Test {Math.PI*100}", "safe-mdx")
self.test_failure_3 = ("Hello, {props.name.toUpperCase()}", "Ahoj", "safe-mdx")
self.test_ignore_check = (
"Hello, {test}",
"Ahoj, {ignore}",
"safe-mdx,ignore-safe-mdx",
)

def test_complex_expressions(self) -> None:
self.check_jsx_expression_matches(
"Expression one {[1, 2, 3].map(({ id }) => (<p key={id}>{id}</p>))} and two {[1, 2, 3].map(({ id }) => (<p key={id}>{id}</p>))}",
[
"{[1, 2, 3].map(({ id }) => (<p key={id}>{id}</p>))}",
"{[1, 2, 3].map(({ id }) => (<p key={id}>{id}</p>))}",
],
)
self.check_jsx_expression_matches(
"Test {[1, 2, 3].map(({ id }) => (<p key={id}>{id}</p>))}",
["{[1, 2, 3].map(({ id }) => (<p key={id}>{id}</p>))}"],
)
self.check_jsx_expression_matches(
"Test { `}}` + 'count' }",
["{ `}}` + 'count' }"],
)
self.check_jsx_expression_matches(
"Test { `{{` + 'count' + `}}` }",
["{ `{{` + 'count' + `}}` }"],
)
# Braces inside a string literal must not break expression boundaries.
self.check_jsx_expression_matches(
'Show {label("}")} here',
['{label("}")}'],
)
# Braces inside a block comment are ignored.
self.check_jsx_expression_matches(
"Value {x /* } */ + y}",
["{x /* } */ + y}"],
)
# Braces inside a regex literal (including a character class) are ignored.
self.check_jsx_expression_matches(
'Clean {s.replace(/[{}]/g, "")}',
['{s.replace(/[{}]/g, "")}'],
)
# Template literal interpolation is part of the expression.
self.check_jsx_expression_matches(
"Total {`sum: ${a + b}`}",
["{`sum: ${a + b}`}"],
)
# Escaped braces are literal text, not expressions; only {real} counts.
self.check_jsx_expression_matches(
"Price \\{literal\\} and {real}",
["{real}"],
)
self.check_jsx_expression_matches(
"Price {literal} and {real}",
["{literal}", "{real}"],
)
# Braces inside a Markdown inline code span are literal text.
self.check_jsx_expression_matches(
"Use `{notExpr}` but {expr}",
["{expr}"],
)
self.check_jsx_expression_matches(
"Test \\{{count1} `{inside_thiings}` and then {count2}",
["{count1}", "{count2}"],
)
# Nested destructuring with a default object literal.
self.check_jsx_expression_matches(
"{({ a = { x: 1 } }) => a}",
["{({ a = { x: 1 } }) => a}"],
)
# JSX element with nested expression attributes.
self.check_jsx_expression_matches(
"{<Icon name={`star`} count={3} />}",
["{<Icon name={`star`} count={3} />}"],
)

def check_jsx_expression_matches(self, text: str, expected: list[str]) -> None:
self.assertEqual(
list(self.check.get_jsx_expression_matches(text)),
expected,
)
Comment thread
gersona marked this conversation as resolved.
2 changes: 1 addition & 1 deletion weblate/checks/tests/test_flags.py
Original file line number Diff line number Diff line change
Expand Up @@ -332,4 +332,4 @@ def check_location_flags(content: str, expected_flags: set[str]) -> None:

# test md-text flag for MDX
content = f'{PO_HEADER}#: ../../path/file.mdx:24 ../../path/file.mdx:52msgid "Hello, world!"msgstr "Nazdar svete!"'
check_location_flags(content, {"md-text"})
check_location_flags(content, {"md-text", "safe-mdx"})
Loading
Loading