Skip to content

Commit 8978756

Browse files
committed
add test_grapheme_clusters.py from pull request #391
1 parent 032dc0d commit 8978756

1 file changed

Lines changed: 239 additions & 0 deletions

File tree

test/test_grapheme_clusters.py

Lines changed: 239 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,239 @@
1+
"""Tests for Unicode grapheme cluster handling in tabulate."""
2+
3+
import pytest
4+
5+
from tabulate import tabulate
6+
7+
try:
8+
import wcwidth
9+
10+
HAS_WCWIDTH = True
11+
HAS_WCWIDTH_030 = hasattr(wcwidth, "wrap")
12+
except ImportError:
13+
wcwidth = None
14+
HAS_WCWIDTH = False
15+
HAS_WCWIDTH_030 = False
16+
17+
requires_wcwidth = pytest.mark.skipif(not HAS_WCWIDTH, reason="requires wcwidth")
18+
19+
requires_wcwidth_030 = pytest.mark.skipif(not HAS_WCWIDTH_030, reason="requires wcwidth >= 0.3.0")
20+
21+
22+
class TestGraphemeClusterWidth:
23+
"""Tests for correct width calculation of grapheme clusters."""
24+
25+
@requires_wcwidth
26+
def test_zwj_family_emoji_width(self):
27+
"""ZWJ family emoji has display width 2."""
28+
family = "\U0001f468\u200d\U0001f469\u200d\U0001f467"
29+
assert wcwidth.wcswidth(family) == 2
30+
31+
@requires_wcwidth
32+
def test_regional_indicator_flag_width(self):
33+
"""Regional indicator pair (flag) has display width 2."""
34+
us_flag = "\U0001f1fa\U0001f1f8"
35+
assert wcwidth.wcswidth(us_flag) == 2
36+
37+
@requires_wcwidth
38+
def test_vs16_emoji_width(self):
39+
"""VS16 variation selector creates wide emoji."""
40+
heart = "\u2764\ufe0f"
41+
assert wcwidth.wcswidth(heart) == 2
42+
43+
44+
class TestGraphemeClusterAlignment:
45+
"""Tests for correct alignment of cells containing grapheme clusters."""
46+
47+
@requires_wcwidth
48+
def test_zwj_alignment_in_grid(self):
49+
"""ZWJ emoji aligns correctly in grid format."""
50+
family = "\U0001f468\u200d\U0001f469\u200d\U0001f467"
51+
data = [
52+
["ABC", "text"],
53+
[family, "emoji"],
54+
]
55+
result = tabulate(data, headers=["col", "desc"], tablefmt="grid")
56+
lines = result.split("\n")
57+
58+
border_width = len(lines[0])
59+
for line in lines:
60+
from tabulate import _visible_width
61+
62+
assert _visible_width(line) == border_width
63+
64+
@requires_wcwidth
65+
def test_flag_alignment_in_grid(self):
66+
"""Regional indicator flags align correctly in grid format."""
67+
us_flag = "\U0001f1fa\U0001f1f8"
68+
data = [
69+
["AB", "text"],
70+
[us_flag, "flag"],
71+
]
72+
result = tabulate(data, headers=["col", "desc"], tablefmt="grid")
73+
lines = result.split("\n")
74+
75+
border_width = len(lines[0])
76+
for line in lines:
77+
from tabulate import _visible_width
78+
79+
assert _visible_width(line) == border_width
80+
81+
82+
class TestGraphemeClusterWrapping:
83+
"""Tests for grapheme cluster preservation during text wrapping.
84+
85+
These tests require wcwidth >= 0.3.0 for iter_graphemes and wrap() APIs.
86+
"""
87+
88+
@requires_wcwidth_030
89+
def test_zwj_not_broken_during_wrap(self):
90+
"""ZWJ sequence preserved as single unit during wrap."""
91+
family = "\U0001f468\u200d\U0001f469\u200d\U0001f467"
92+
data = [[f"A{family}B"]]
93+
result = tabulate(data, tablefmt="plain", maxcolwidths=3)
94+
95+
graphemes_in_result = []
96+
for line in result.split("\n"):
97+
graphemes_in_result.extend(list(wcwidth.iter_graphemes(line.strip())))
98+
99+
assert family in graphemes_in_result
100+
101+
@requires_wcwidth_030
102+
def test_flag_not_broken_during_wrap(self):
103+
"""Regional indicator flag preserved as single unit during wrap."""
104+
us_flag = "\U0001f1fa\U0001f1f8"
105+
gb_flag = "\U0001f1ec\U0001f1e7"
106+
fr_flag = "\U0001f1eb\U0001f1f7"
107+
flags = us_flag + gb_flag + fr_flag
108+
109+
data = [[flags]]
110+
result = tabulate(data, tablefmt="plain", maxcolwidths=5)
111+
112+
graphemes_in_result = []
113+
for line in result.split("\n"):
114+
graphemes_in_result.extend(list(wcwidth.iter_graphemes(line.strip())))
115+
116+
assert us_flag in graphemes_in_result
117+
assert gb_flag in graphemes_in_result
118+
assert fr_flag in graphemes_in_result
119+
120+
@requires_wcwidth_030
121+
def test_vs16_not_broken_during_wrap(self):
122+
"""VS16 variation selector kept with base character during wrap."""
123+
heart = "\u2764\ufe0f"
124+
data = [[heart * 3]]
125+
result = tabulate(data, tablefmt="plain", maxcolwidths=4)
126+
127+
graphemes_in_result = []
128+
for line in result.split("\n"):
129+
graphemes_in_result.extend(list(wcwidth.iter_graphemes(line.strip())))
130+
131+
heart_count = sum(1 for g in graphemes_in_result if g == heart)
132+
assert heart_count == 3
133+
134+
@requires_wcwidth_030
135+
def test_skin_tone_modifier_not_broken(self):
136+
"""Skin tone modifier preserved with emoji during wrap."""
137+
wave_light = "\U0001f44b\U0001f3fb"
138+
data = [[f"Hi{wave_light}there"]]
139+
result = tabulate(data, tablefmt="plain", maxcolwidths=5)
140+
141+
graphemes_in_result = []
142+
for line in result.split("\n"):
143+
graphemes_in_result.extend(list(wcwidth.iter_graphemes(line.strip())))
144+
145+
assert wave_light in graphemes_in_result
146+
147+
148+
class TestComplexGraphemeClusters:
149+
"""Tests for complex grapheme cluster scenarios.
150+
151+
These tests require wcwidth >= 0.3.0 for iter_graphemes API.
152+
"""
153+
154+
@requires_wcwidth_030
155+
def test_multiple_zwj_sequences_in_cell(self):
156+
"""Multiple ZWJ sequences in single cell handled correctly."""
157+
family = "\U0001f468\u200d\U0001f469\u200d\U0001f467"
158+
technologist = "\U0001f468\U0001f3fb\u200d\U0001f4bb"
159+
data = [[f"{family} and {technologist}"]]
160+
result = tabulate(data, tablefmt="plain", maxcolwidths=15)
161+
162+
graphemes_in_result = []
163+
for line in result.split("\n"):
164+
graphemes_in_result.extend(list(wcwidth.iter_graphemes(line.strip())))
165+
166+
assert family in graphemes_in_result
167+
assert technologist in graphemes_in_result
168+
169+
@requires_wcwidth_030
170+
def test_flags_with_text_wrap(self):
171+
"""Flags interspersed with text wrap correctly."""
172+
us_flag = "\U0001f1fa\U0001f1f8"
173+
data = [[f"Visit {us_flag} USA today!"]]
174+
result = tabulate(data, tablefmt="plain", maxcolwidths=10)
175+
176+
graphemes_in_result = []
177+
for line in result.split("\n"):
178+
graphemes_in_result.extend(list(wcwidth.iter_graphemes(line.strip())))
179+
180+
assert us_flag in graphemes_in_result
181+
182+
@requires_wcwidth_030
183+
def test_combining_marks_preserved(self):
184+
"""Combining diacritical marks stay with base character."""
185+
e_acute = "e\u0301"
186+
data = [[f"caf{e_acute} au lait"]]
187+
result = tabulate(data, tablefmt="plain", maxcolwidths=5)
188+
189+
graphemes_in_result = []
190+
for line in result.split("\n"):
191+
graphemes_in_result.extend(list(wcwidth.iter_graphemes(line.strip())))
192+
193+
assert e_acute in graphemes_in_result
194+
195+
196+
class TestAnsiWithGraphemeClusters:
197+
"""Tests for ANSI escape codes combined with grapheme clusters."""
198+
199+
@requires_wcwidth
200+
def test_ansi_colored_zwj_width(self):
201+
"""ANSI colored ZWJ emoji has correct width."""
202+
family = "\U0001f468\u200d\U0001f469\u200d\U0001f467"
203+
colored = f"\x1b[31m{family}\x1b[0m"
204+
205+
from tabulate import _visible_width
206+
207+
assert _visible_width(colored) == 2
208+
209+
@requires_wcwidth
210+
def test_ansi_colored_zwj_alignment(self):
211+
"""ANSI colored ZWJ emoji aligns correctly."""
212+
family = "\U0001f468\u200d\U0001f469\u200d\U0001f467"
213+
colored = f"\x1b[31m{family}\x1b[0m"
214+
data = [
215+
["AB", "text"],
216+
[colored, "emoji"],
217+
]
218+
result = tabulate(data, headers=["col", "desc"], tablefmt="grid")
219+
lines = result.split("\n")
220+
221+
from tabulate import _visible_width
222+
223+
border_width = _visible_width(lines[0])
224+
for line in lines:
225+
assert _visible_width(line) == border_width
226+
227+
@requires_wcwidth_030
228+
def test_ansi_colored_flag_wrap(self):
229+
"""ANSI colored flag not broken during wrap."""
230+
us_flag = "\U0001f1fa\U0001f1f8"
231+
colored = f"\x1b[34m{us_flag}\x1b[0m"
232+
data = [[f"A{colored}B"]]
233+
result = tabulate(data, tablefmt="plain", maxcolwidths=4)
234+
235+
assert "\U0001f1fa" in result
236+
assert "\U0001f1f8" in result
237+
lines = [line.strip() for line in result.split("\n") if line.strip()]
238+
flag_parts_same_line = any("\U0001f1fa" in line and "\U0001f1f8" in line for line in lines)
239+
assert flag_parts_same_line

0 commit comments

Comments
 (0)