diff --git a/docs/changes.rst b/docs/changes.rst index cb0ab09105b8..587c3ff571e8 100644 --- a/docs/changes.rst +++ b/docs/changes.rst @@ -50,6 +50,7 @@ Weblate 2026.5 * :ref:`code-hosting-gerrit` branch validation now suggests short branch names when full refs are supplied. * Malformed ALTCHA CAPTCHA submissions no longer cause server errors. * Malformed repository URLs in webhook payloads no longer trigger server errors during fallback matching. +* :ref:`check-placeholders` now merges overlapping non-nested spans from multiple flags. * :ref:`backup` logs no longer include OpenSSH post-quantum key exchange warnings from remote Borg connections. * Locked component pages now show an unsubscribe action after subscribing to unlock notifications. diff --git a/weblate/checks/placeholders.py b/weblate/checks/placeholders.py index 95f0db83b09b..4fb00022f3a6 100644 --- a/weblate/checks/placeholders.py +++ b/weblate/checks/placeholders.py @@ -14,6 +14,7 @@ from weblate.checks.base import TargetCheckParametrized from weblate.checks.parser import multi_value_flag, single_value_flag +from weblate.checks.utils import merge_highlight_spans if TYPE_CHECKING: from weblate.trans.models import Unit @@ -131,10 +132,30 @@ def check_highlight(self, source: str, unit: Unit): if not self.has_value(unit): return - regexp = self.get_value(unit) + regex_flags = regex.IGNORECASE if "case-insensitive" in unit.all_flags else 0 + spans: list[tuple[int, int, str]] = [] + + # get raw list of patterns from unit to run each independently continue + for param in unit.all_flags.get_value_raw(self.enable_string): + if isinstance(param, str): + if not param: + continue + pattern = regex.compile(regex.escape(param), regex_flags) + else: + if not param.pattern: + continue + pattern = regex.compile(param.pattern, regex_flags) + + spans.extend( + (match.start(), match.end(), match.group()) + for match in pattern.finditer(source) + ) + + if not spans: + return - for match in regexp.finditer(source): - yield (match.start(), match.end(), match.group()) + spans.sort(key=lambda x: (x[0], -x[1])) + yield from merge_highlight_spans(source, spans) def get_description(self, check_obj): unit = check_obj.unit diff --git a/weblate/checks/tests/test_placeholders.py b/weblate/checks/tests/test_placeholders.py index 3a3bdf92ab92..e79f7718f495 100644 --- a/weblate/checks/tests/test_placeholders.py +++ b/weblate/checks/tests/test_placeholders.py @@ -134,6 +134,21 @@ def test_escaped_markup(self) -> None: ], ) + def test_overlapping_non_nested(self) -> None: + # The 2 flags match partially overlapping spans + # 'python-brace-format' matches {user.name}. + # "placeholders:r"\$\{\w+" matches ${user. + unit = make_unit( + None, + r'placeholders:r"\$\{\w+":r"\w+\.\w+\}"', + self.default_lang, + "nested ${user.name} non-overlapping", + ) + self.assertEqual( + list(self.check.check_highlight(unit.source, unit)), + [(7, 19, "${user.name}")], + ) + def test_empty_placeholder_flags_do_not_match(self) -> None: for flags in ("placeholders:", 'placeholders:""', 'placeholders:r""'): with self.subTest(flags=flags): diff --git a/weblate/checks/tests/test_utils.py b/weblate/checks/tests/test_utils.py index 8d3830de5374..81e00569912e 100644 --- a/weblate/checks/tests/test_utils.py +++ b/weblate/checks/tests/test_utils.py @@ -39,6 +39,19 @@ def test_overlap(self) -> None: [(7, 26, ''), (32, 36, "")], ) + def test_overlap_non_nested(self) -> None: + # The 2 flags match partially overlapping spans + # 'python-brace-format' matches {user.name}. + # "placeholders:r"\$\{\w+" matches ${user. + unit = make_unit( + source="nested ${user.name} non-overlapping", + flags=r'python-brace-format, placeholders:r"\$\{\w+"', + ) + self.assertEqual( + highlight_string(unit.source, unit), + [(7, 19, "${user.name}")], + ) + def test_syntax(self) -> None: unit = make_unit( source="Text with a `link `_.", diff --git a/weblate/checks/utils.py b/weblate/checks/utils.py index ab3c33e9843c..4c21fc2218a3 100644 --- a/weblate/checks/utils.py +++ b/weblate/checks/utils.py @@ -46,6 +46,21 @@ def highlight_pygments(source: str, unit: Unit) -> Generator[tuple[int, int, str start += len(text) +def merge_highlight_spans( + source: str, highlights: list[tuple[int, int, str]] +) -> list[tuple[int, int, str]]: + """Merge overlapping highlight spans (nested or partial) into their union intervals.""" + merged: list[tuple[int, int, str]] = [] + for start, end, text in highlights: + if merged and start < merged[-1][1]: + prev_start, prev_end, _ = merged[-1] + new_end = max(prev_end, end) + merged[-1] = (prev_start, new_end, source[prev_start:new_end]) + else: + merged.append((start, end, text)) + return merged + + def highlight_string( source: str, unit: Unit, *, highlight_syntax: bool = False ) -> list[tuple[int, int, str]]: @@ -67,27 +82,7 @@ def highlight_string( # Sort by order in string, longest first highlights.sort(key=lambda item: (item[0], -item[1])) - # Remove overlapping ones - # pylint: disable-next=consider-using-enumerate - for hl_idx in range(len(highlights)): - if hl_idx >= len(highlights): - break - elref = highlights[hl_idx] - hl_idx_next = hl_idx + 1 - while hl_idx_next < len(highlights): - eltest = highlights[hl_idx_next] - if eltest[0] >= elref[0] and eltest[1] <= elref[1]: - # Elements overlap, remove inner one - highlights.pop(hl_idx_next) - # Do not increment index here as we've removed the current element - elif eltest[0] > elref[1]: - # This is not an overlapping element - break - else: - # Increase index to test - hl_idx_next += 1 - - return highlights + return merge_highlight_spans(source, highlights) def replace_highlighted( @@ -106,6 +101,7 @@ def replace_highlighted( last_end = 0 for start, end, _text in highlights: if start < last_end: + last_end = max(last_end, end) continue result.append(source[last_end:start]) if callable(replacement):