Restore _pyrepl.utils.str_width and .wlen; respond to code review

ambv · ambv · commit 37a4fd971333 · 2026-04-07T13:32:57.000+02:00
diff --git a/Lib/_pyrepl/utils.py b/Lib/_pyrepl/utils.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 import builtins
+import functools
 import keyword
 import re
 import token as T
@@ -10,19 +11,46 @@
 from collections import deque
 from io import StringIO
 from tokenize import TokenInfo as TI
-from traceback import _str_width as str_width, _wlen as wlen
 from typing import Iterable, Iterator, Match, NamedTuple, Self
 
 from .types import CharBuffer, CharWidths
 from .trace import trace
 
+
+ANSI_ESCAPE_SEQUENCE = re.compile(r"\x1b\[[ -@]*[A-~]")
 ZERO_WIDTH_BRACKET = re.compile(r"\x01.*?\x02")
 ZERO_WIDTH_TRANS = str.maketrans({"\x01": "", "\x02": ""})
 IDENTIFIERS_AFTER = frozenset({"def", "class"})
 KEYWORD_CONSTANTS = frozenset({"True", "False", "None"})
 BUILTINS = frozenset({str(name) for name in dir(builtins) if not name.startswith('_')})
 
 
+@functools.cache
+def str_width(c: str) -> int:
+    if ord(c) < 128:
+        return 1
+    # gh-139246 for zero-width joiner and combining characters
+    if unicodedata.combining(c):
+        return 0
+    category = unicodedata.category(c)
+    if category == "Cf" and c != "\u00ad":
+        return 0
+    w = unicodedata.east_asian_width(c)
+    if w in ("N", "Na", "H", "A"):
+        return 1
+    return 2
+
+
+def wlen(s: str) -> int:
+    if len(s) == 1 and s != "\x1a":
+        return str_width(s)
+    length = sum(str_width(i) for i in s)
+    # remove lengths of any escape sequences
+    sequence = ANSI_ESCAPE_SEQUENCE.findall(s)
+    ctrl_z_cnt = s.count("\x1a")
+    return length - sum(len(i) for i in sequence) + ctrl_z_cnt
+
+
 def THEME(**kwargs):
     # Not cached: the user can modify the theme inside the interactive session.
     return _colorize.get_theme(**kwargs).syntax
diff --git a/Lib/test/test_pyrepl/support.py b/Lib/test/test_pyrepl/support.py
@@ -1,13 +1,12 @@
 from code import InteractiveConsole
 from functools import partial
-from traceback import ANSI_ESCAPE_SEQUENCE
 from typing import Iterable
 from unittest.mock import MagicMock
 
 from _pyrepl.console import Console, Event
 from _pyrepl.readline import ReadlineAlikeReader, ReadlineConfig
 from _pyrepl.simple_interact import _strip_final_indent
-from _pyrepl.utils import unbracket
+from _pyrepl.utils import unbracket, ANSI_ESCAPE_SEQUENCE
 
 
 class ScreenEqualMixin:
diff --git a/Lib/test/test_pyrepl/test_utils.py b/Lib/test/test_pyrepl/test_utils.py
@@ -1,9 +1,52 @@
 from unittest import TestCase
 
-from _pyrepl.utils import prev_next_window, gen_colors
+from _pyrepl.utils import str_width, wlen, prev_next_window, gen_colors
 
 
 class TestUtils(TestCase):
+    def test_str_width(self):
+        characters = [
+            'a',
+            '1',
+            '_',
+            '!',
+            '\x1a',
+            '\u263A',
+            '\uffb9',
+            '\N{LATIN SMALL LETTER E WITH ACUTE}',  # é
+            '\N{LATIN SMALL LETTER E WITH CEDILLA}', # ȩ
+            '\u00ad',
+        ]
+        for c in characters:
+            self.assertEqual(str_width(c), 1)
+
+        zero_width_characters = [
+            '\N{COMBINING ACUTE ACCENT}',
+            '\N{ZERO WIDTH JOINER}',
+        ]
+        for c in zero_width_characters:
+            with self.subTest(character=c):
+                self.assertEqual(str_width(c), 0)
+
+        characters = [chr(99989), chr(99999)]
+        for c in characters:
+            self.assertEqual(str_width(c), 2)
+
+    def test_wlen(self):
+        for c in ['a', 'b', '1', '!', '_']:
+            self.assertEqual(wlen(c), 1)
+        self.assertEqual(wlen('\x1a'), 2)
+
+        char_east_asian_width_N = chr(3800)
+        self.assertEqual(wlen(char_east_asian_width_N), 1)
+        char_east_asian_width_W = chr(4352)
+        self.assertEqual(wlen(char_east_asian_width_W), 2)
+
+        self.assertEqual(wlen('hello'), 5)
+        self.assertEqual(wlen('hello' + '\x1a'), 7)
+        self.assertEqual(wlen('e\N{COMBINING ACUTE ACCENT}'), 1)
+        self.assertEqual(wlen('a\N{ZERO WIDTH JOINER}b'), 2)
+
     def test_prev_next_window(self):
         def gen_normal():
             yield 1
diff --git a/Lib/test/test_traceback.py b/Lib/test/test_traceback.py
@@ -31,7 +31,6 @@
 import traceback
 from functools import partial
 from pathlib import Path
-from traceback import _str_width, _wlen
 import _colorize
 
 MODULE_PREFIX = f'{__name__}.' if __name__ == '__main__' else ''
@@ -1791,49 +1790,6 @@ def f():
         ]
         self.assertEqual(result_lines, expected)
 
-    def test_str_width(self):
-        characters = [
-            'a',
-            '1',
-            '_',
-            '!',
-            '\x1a',
-            '\u263A',
-            '\uffb9',
-            '\N{LATIN SMALL LETTER E WITH ACUTE}',  # é
-            '\N{LATIN SMALL LETTER E WITH CEDILLA}', # ȩ
-            '\u00ad',
-        ]
-        for c in characters:
-            self.assertEqual(_str_width(c), 1)
-
-        zero_width_characters = [
-            '\N{COMBINING ACUTE ACCENT}',
-            '\N{ZERO WIDTH JOINER}',
-        ]
-        for c in zero_width_characters:
-            with self.subTest(character=c):
-                self.assertEqual(_str_width(c), 0)
-
-        characters = [chr(99989), chr(99999)]
-        for c in characters:
-            self.assertEqual(_str_width(c), 2)
-
-    def test_wlen(self):
-        for c in ['a', 'b', '1', '!', '_']:
-            self.assertEqual(_wlen(c), 1)
-        self.assertEqual(_wlen('\x1a'), 2)
-
-        char_east_asian_width_N = chr(3800)
-        self.assertEqual(_wlen(char_east_asian_width_N), 1)
-        char_east_asian_width_W = chr(4352)
-        self.assertEqual(_wlen(char_east_asian_width_W), 2)
-
-        self.assertEqual(_wlen('hello'), 5)
-        self.assertEqual(_wlen('hello' + '\x1a'), 7)
-        self.assertEqual(_wlen('e\N{COMBINING ACUTE ACCENT}'), 1)
-        self.assertEqual(_wlen('a\N{ZERO WIDTH JOINER}b'), 2)
-
 
 class TestKeywordTypoSuggestions(unittest.TestCase):
     TYPO_CASES = [
diff --git a/Lib/traceback.py b/Lib/traceback.py
@@ -985,9 +985,11 @@ def _zip_display_width(line, carets):
 
 @functools.cache
 def _str_width(c: str) -> int:
-    import unicodedata
+    # copied from _pyrepl.utils to fix gh-130273
+
     if ord(c) < 128:
         return 1
+    import unicodedata
     # gh-139246 for zero-width joiner and combining characters
     if unicodedata.combining(c):
         return 0
@@ -1000,20 +1002,21 @@ def _str_width(c: str) -> int:
     return 2
 
 
-ANSI_ESCAPE_SEQUENCE = re.compile(r"\x1b\[[ -@]*[A-~]")
+_ANSI_ESCAPE_SEQUENCE = re.compile(r"\x1b\[[ -@]*[A-~]")
 
 
 def _wlen(s: str) -> int:
+    # copied from _pyrepl.utils to fix gh-130273
+
     if len(s) == 1 and s != "\x1a":
         return _str_width(s)
     length = sum(_str_width(i) for i in s)
     # remove lengths of any escape sequences
-    sequence = ANSI_ESCAPE_SEQUENCE.findall(s)
+    sequence = _ANSI_ESCAPE_SEQUENCE.findall(s)
     ctrl_z_cnt = s.count("\x1a")
     return length - sum(len(i) for i in sequence) + ctrl_z_cnt
 
 
-
 def _display_width(line, offset=None):
     """Calculate the extra amount of width space the given source
     code segment might take if it were to be displayed on a fixed
@@ -1028,6 +1031,7 @@ def _display_width(line, offset=None):
 
     return _wlen(line[:offset])
 
+
 def _format_note(note, indent, theme):
     for l in note.split("\n"):
         yield f"{indent}{theme.note}{l}{theme.reset}\n"
diff --git a/Misc/NEWS.d/next/Library/2025-12-10-15-15-09.gh-issue-130273.iCfiY5.rst b/Misc/NEWS.d/next/Library/2025-12-10-15-15-09.gh-issue-130273.iCfiY5.rst
@@ -1 +1 @@
-Fix traceback color output with unicode characters
+Fix traceback color output with Unicode characters.

Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-Fix traceback color output with unicode characters`
	`1`	`+Fix traceback color output with Unicode characters.`