Skip to content

Commit 37a4fd9

Browse files
committed
Restore _pyrepl.utils.str_width and .wlen; respond to code review
1 parent 05ad91e commit 37a4fd9

File tree

6 files changed

+83
-53
lines changed

6 files changed

+83
-53
lines changed

Lib/_pyrepl/utils.py

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from __future__ import annotations
22
import builtins
3+
import functools
34
import keyword
45
import re
56
import token as T
@@ -10,19 +11,46 @@
1011
from collections import deque
1112
from io import StringIO
1213
from tokenize import TokenInfo as TI
13-
from traceback import _str_width as str_width, _wlen as wlen
1414
from typing import Iterable, Iterator, Match, NamedTuple, Self
1515

1616
from .types import CharBuffer, CharWidths
1717
from .trace import trace
1818

19+
20+
ANSI_ESCAPE_SEQUENCE = re.compile(r"\x1b\[[ -@]*[A-~]")
1921
ZERO_WIDTH_BRACKET = re.compile(r"\x01.*?\x02")
2022
ZERO_WIDTH_TRANS = str.maketrans({"\x01": "", "\x02": ""})
2123
IDENTIFIERS_AFTER = frozenset({"def", "class"})
2224
KEYWORD_CONSTANTS = frozenset({"True", "False", "None"})
2325
BUILTINS = frozenset({str(name) for name in dir(builtins) if not name.startswith('_')})
2426

2527

28+
@functools.cache
29+
def str_width(c: str) -> int:
30+
if ord(c) < 128:
31+
return 1
32+
# gh-139246 for zero-width joiner and combining characters
33+
if unicodedata.combining(c):
34+
return 0
35+
category = unicodedata.category(c)
36+
if category == "Cf" and c != "\u00ad":
37+
return 0
38+
w = unicodedata.east_asian_width(c)
39+
if w in ("N", "Na", "H", "A"):
40+
return 1
41+
return 2
42+
43+
44+
def wlen(s: str) -> int:
45+
if len(s) == 1 and s != "\x1a":
46+
return str_width(s)
47+
length = sum(str_width(i) for i in s)
48+
# remove lengths of any escape sequences
49+
sequence = ANSI_ESCAPE_SEQUENCE.findall(s)
50+
ctrl_z_cnt = s.count("\x1a")
51+
return length - sum(len(i) for i in sequence) + ctrl_z_cnt
52+
53+
2654
def THEME(**kwargs):
2755
# Not cached: the user can modify the theme inside the interactive session.
2856
return _colorize.get_theme(**kwargs).syntax

Lib/test/test_pyrepl/support.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,12 @@
11
from code import InteractiveConsole
22
from functools import partial
3-
from traceback import ANSI_ESCAPE_SEQUENCE
43
from typing import Iterable
54
from unittest.mock import MagicMock
65

76
from _pyrepl.console import Console, Event
87
from _pyrepl.readline import ReadlineAlikeReader, ReadlineConfig
98
from _pyrepl.simple_interact import _strip_final_indent
10-
from _pyrepl.utils import unbracket
9+
from _pyrepl.utils import unbracket, ANSI_ESCAPE_SEQUENCE
1110

1211

1312
class ScreenEqualMixin:

Lib/test/test_pyrepl/test_utils.py

Lines changed: 44 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,52 @@
11
from unittest import TestCase
22

3-
from _pyrepl.utils import prev_next_window, gen_colors
3+
from _pyrepl.utils import str_width, wlen, prev_next_window, gen_colors
44

55

66
class TestUtils(TestCase):
7+
def test_str_width(self):
8+
characters = [
9+
'a',
10+
'1',
11+
'_',
12+
'!',
13+
'\x1a',
14+
'\u263A',
15+
'\uffb9',
16+
'\N{LATIN SMALL LETTER E WITH ACUTE}', # é
17+
'\N{LATIN SMALL LETTER E WITH CEDILLA}', # ȩ
18+
'\u00ad',
19+
]
20+
for c in characters:
21+
self.assertEqual(str_width(c), 1)
22+
23+
zero_width_characters = [
24+
'\N{COMBINING ACUTE ACCENT}',
25+
'\N{ZERO WIDTH JOINER}',
26+
]
27+
for c in zero_width_characters:
28+
with self.subTest(character=c):
29+
self.assertEqual(str_width(c), 0)
30+
31+
characters = [chr(99989), chr(99999)]
32+
for c in characters:
33+
self.assertEqual(str_width(c), 2)
34+
35+
def test_wlen(self):
36+
for c in ['a', 'b', '1', '!', '_']:
37+
self.assertEqual(wlen(c), 1)
38+
self.assertEqual(wlen('\x1a'), 2)
39+
40+
char_east_asian_width_N = chr(3800)
41+
self.assertEqual(wlen(char_east_asian_width_N), 1)
42+
char_east_asian_width_W = chr(4352)
43+
self.assertEqual(wlen(char_east_asian_width_W), 2)
44+
45+
self.assertEqual(wlen('hello'), 5)
46+
self.assertEqual(wlen('hello' + '\x1a'), 7)
47+
self.assertEqual(wlen('e\N{COMBINING ACUTE ACCENT}'), 1)
48+
self.assertEqual(wlen('a\N{ZERO WIDTH JOINER}b'), 2)
49+
750
def test_prev_next_window(self):
851
def gen_normal():
952
yield 1

Lib/test/test_traceback.py

Lines changed: 0 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,6 @@
3131
import traceback
3232
from functools import partial
3333
from pathlib import Path
34-
from traceback import _str_width, _wlen
3534
import _colorize
3635

3736
MODULE_PREFIX = f'{__name__}.' if __name__ == '__main__' else ''
@@ -1791,49 +1790,6 @@ def f():
17911790
]
17921791
self.assertEqual(result_lines, expected)
17931792

1794-
def test_str_width(self):
1795-
characters = [
1796-
'a',
1797-
'1',
1798-
'_',
1799-
'!',
1800-
'\x1a',
1801-
'\u263A',
1802-
'\uffb9',
1803-
'\N{LATIN SMALL LETTER E WITH ACUTE}', # é
1804-
'\N{LATIN SMALL LETTER E WITH CEDILLA}', # ȩ
1805-
'\u00ad',
1806-
]
1807-
for c in characters:
1808-
self.assertEqual(_str_width(c), 1)
1809-
1810-
zero_width_characters = [
1811-
'\N{COMBINING ACUTE ACCENT}',
1812-
'\N{ZERO WIDTH JOINER}',
1813-
]
1814-
for c in zero_width_characters:
1815-
with self.subTest(character=c):
1816-
self.assertEqual(_str_width(c), 0)
1817-
1818-
characters = [chr(99989), chr(99999)]
1819-
for c in characters:
1820-
self.assertEqual(_str_width(c), 2)
1821-
1822-
def test_wlen(self):
1823-
for c in ['a', 'b', '1', '!', '_']:
1824-
self.assertEqual(_wlen(c), 1)
1825-
self.assertEqual(_wlen('\x1a'), 2)
1826-
1827-
char_east_asian_width_N = chr(3800)
1828-
self.assertEqual(_wlen(char_east_asian_width_N), 1)
1829-
char_east_asian_width_W = chr(4352)
1830-
self.assertEqual(_wlen(char_east_asian_width_W), 2)
1831-
1832-
self.assertEqual(_wlen('hello'), 5)
1833-
self.assertEqual(_wlen('hello' + '\x1a'), 7)
1834-
self.assertEqual(_wlen('e\N{COMBINING ACUTE ACCENT}'), 1)
1835-
self.assertEqual(_wlen('a\N{ZERO WIDTH JOINER}b'), 2)
1836-
18371793

18381794
class TestKeywordTypoSuggestions(unittest.TestCase):
18391795
TYPO_CASES = [

Lib/traceback.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -985,9 +985,11 @@ def _zip_display_width(line, carets):
985985

986986
@functools.cache
987987
def _str_width(c: str) -> int:
988-
import unicodedata
988+
# copied from _pyrepl.utils to fix gh-130273
989+
989990
if ord(c) < 128:
990991
return 1
992+
import unicodedata
991993
# gh-139246 for zero-width joiner and combining characters
992994
if unicodedata.combining(c):
993995
return 0
@@ -1000,20 +1002,21 @@ def _str_width(c: str) -> int:
10001002
return 2
10011003

10021004

1003-
ANSI_ESCAPE_SEQUENCE = re.compile(r"\x1b\[[ -@]*[A-~]")
1005+
_ANSI_ESCAPE_SEQUENCE = re.compile(r"\x1b\[[ -@]*[A-~]")
10041006

10051007

10061008
def _wlen(s: str) -> int:
1009+
# copied from _pyrepl.utils to fix gh-130273
1010+
10071011
if len(s) == 1 and s != "\x1a":
10081012
return _str_width(s)
10091013
length = sum(_str_width(i) for i in s)
10101014
# remove lengths of any escape sequences
1011-
sequence = ANSI_ESCAPE_SEQUENCE.findall(s)
1015+
sequence = _ANSI_ESCAPE_SEQUENCE.findall(s)
10121016
ctrl_z_cnt = s.count("\x1a")
10131017
return length - sum(len(i) for i in sequence) + ctrl_z_cnt
10141018

10151019

1016-
10171020
def _display_width(line, offset=None):
10181021
"""Calculate the extra amount of width space the given source
10191022
code segment might take if it were to be displayed on a fixed
@@ -1028,6 +1031,7 @@ def _display_width(line, offset=None):
10281031

10291032
return _wlen(line[:offset])
10301033

1034+
10311035
def _format_note(note, indent, theme):
10321036
for l in note.split("\n"):
10331037
yield f"{indent}{theme.note}{l}{theme.reset}\n"
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
Fix traceback color output with unicode characters
1+
Fix traceback color output with Unicode characters.

0 commit comments

Comments
 (0)