@@ -1300,16 +1300,103 @@ class MyStr(str):
13001300 self .assertIs (type (normalize (form , MyStr (input_str ))), str )
13011301
13021302
1303- class GraphemeBreakTest (unittest .TestCase ):
1303+ class BaseGraphemeBreakTest :
1304+ iter_graphemes = staticmethod (unicodedata .iter_graphemes )
1305+
1306+ def test_grapheme_break_types (self ):
1307+ self .assertRaises (TypeError , self .iter_graphemes )
1308+ self .assertRaises (TypeError , self .iter_graphemes , b'x' )
1309+
1310+ def test_grapheme_break_empty (self ):
1311+ graphemes = self ._graphemes
1312+ self .assertEqual (graphemes ('' ), [])
1313+
1314+ def test_grapheme_break_simple (self ):
1315+ graphemes = self ._graphemes
1316+ self .assertEqual (graphemes ('abcd' ), ['a' , 'b' , 'c' , 'd' ])
1317+ self .assertEqual (graphemes ('abcd' , 1 ), ['b' , 'c' , 'd' ])
1318+ self .assertEqual (graphemes ('abcd' , 1 , 3 ), ['b' , 'c' ])
1319+ self .assertEqual (graphemes ('abcd' , - 3 ), ['b' , 'c' , 'd' ])
1320+ self .assertEqual (graphemes ('abcd' , 1 , - 1 ), ['b' , 'c' ])
1321+ self .assertEqual (graphemes ('abcd' , 3 , 1 ), [])
1322+ self .assertEqual (graphemes ('abcd' , 5 ), [])
1323+ self .assertEqual (graphemes ('abcd' , 0 , 5 ), ['a' , 'b' , 'c' , 'd' ])
1324+ self .assertEqual (graphemes ('abcd' , - 5 ), ['a' , 'b' , 'c' , 'd' ])
1325+ self .assertEqual (graphemes ('abcd' , 0 , - 5 ), [])
1326+
1327+ def test_grapheme_break_rules (self ):
1328+ graphemes = self ._graphemes
1329+ # GB3
1330+ self .assertEqual (graphemes ('\r \n ' ), ['\r \n ' ])
1331+ # GB4
1332+ self .assertEqual (graphemes ('\r \u0308 ' ), ['\r ' , '\u0308 ' ])
1333+ self .assertEqual (graphemes ('\n \u0308 ' ), ['\n ' , '\u0308 ' ])
1334+ self .assertEqual (graphemes ('\0 \u0308 ' ), ['\0 ' , '\u0308 ' ])
1335+ # GB5
1336+ self .assertEqual (graphemes ('\u06dd \r ' ), ['\u06dd ' , '\r ' ])
1337+ self .assertEqual (graphemes ('\u06dd \n ' ), ['\u06dd ' , '\n ' ])
1338+ self .assertEqual (graphemes ('\u06dd \0 ' ), ['\u06dd ' , '\0 ' ])
1339+ # GB6
1340+ self .assertEqual (graphemes ('\u1100 \u1160 ' ), ['\u1100 \u1160 ' ])
1341+ self .assertEqual (graphemes ('\u1100 \uAC00 ' ), ['\u1100 \uAC00 ' ])
1342+ self .assertEqual (graphemes ('\u1100 \uAC01 ' ), ['\u1100 \uAC01 ' ])
1343+ # GB7
1344+ self .assertEqual (graphemes ('\uAC00 \u1160 ' ), ['\uAC00 \u1160 ' ])
1345+ self .assertEqual (graphemes ('\uAC00 \u11A8 ' ), ['\uAC00 \u11A8 ' ])
1346+ self .assertEqual (graphemes ('\u1160 \u1160 ' ), ['\u1160 \u1160 ' ])
1347+ self .assertEqual (graphemes ('\u1160 \u11A8 ' ), ['\u1160 \u11A8 ' ])
1348+ # GB8
1349+ self .assertEqual (graphemes ('\uAC01 \u11A8 ' ), ['\uAC01 \u11A8 ' ])
1350+ self .assertEqual (graphemes ('\u11A8 \u11A8 ' ), ['\u11A8 \u11A8 ' ])
1351+ # GB9
1352+ self .assertEqual (graphemes ('a\u0300 ' ), ['a\u0300 ' ])
1353+ self .assertEqual (graphemes ('a\u200D ' ), ['a\u200D ' ])
1354+ # GB9a
1355+ self .assertEqual (graphemes ('\u0905 \u0903 ' ), ['\u0905 \u0903 ' ])
1356+ # GB9b
1357+ self .assertEqual (graphemes ('\u06dd \u0661 ' ), ['\u06dd \u0661 ' ])
1358+ # GB9c
1359+ self .assertEqual (graphemes ('\u0915 \u094d \u0924 ' ),
1360+ ['\u0915 \u094d \u0924 ' ])
1361+ self .assertEqual (graphemes ('\u0915 \u094D \u094D \u0924 ' ),
1362+ ['\u0915 \u094D \u094D \u0924 ' ])
1363+ self .assertEqual (graphemes ('\u0915 \u094D \u0924 \u094D \u092F ' ),
1364+ ['\u0915 \u094D \u0924 \u094D \u092F ' ])
1365+ # GB11
1366+ self .assertEqual (graphemes (
1367+ '\U0001F9D1 \U0001F3FE \u200D \u2764 \uFE0F '
1368+ '\u200D \U0001F48B \u200D \U0001F9D1 \U0001F3FC ' ),
1369+ ['\U0001F9D1 \U0001F3FE \u200D \u2764 \uFE0F '
1370+ '\u200D \U0001F48B \u200D \U0001F9D1 \U0001F3FC ' ])
1371+ # GB12
1372+ self .assertEqual (graphemes (
1373+ '\U0001F1FA \U0001F1E6 \U0001F1FA \U0001F1F3 ' ),
1374+ ['\U0001F1FA \U0001F1E6 ' , '\U0001F1FA \U0001F1F3 ' ])
1375+ # GB13
1376+ self .assertEqual (graphemes (
1377+ 'a\U0001F1FA \U0001F1E6 \U0001F1FA \U0001F1F3 ' ),
1378+ ['a' , '\U0001F1FA \U0001F1E6 ' , '\U0001F1FA \U0001F1F3 ' ])
1379+
1380+ def test_segment_object (self ):
1381+ segments = list (self .iter_graphemes ('spa\u0300 m' ))
1382+ self .assertEqual (len (segments ), 4 , segments )
1383+ segment = segments [2 ]
1384+ self .assertEqual (segment .start , 2 )
1385+ self .assertEqual (segment .end , 4 )
1386+ self .assertEqual (str (segment ), 'a\u0300 ' )
1387+
1388+ def _graphemes (self , * args ):
1389+ return list (map (str , self .iter_graphemes (* args )))
1390+
13041391 @requires_resource ('network' )
1305- def test_grapheme_break (self ):
1392+ def test_tr29_conformance (self ):
13061393 TESTDATAFILE = "GraphemeBreakTest.txt"
13071394 testdata = download_test_data_file (TESTDATAFILE )
13081395
13091396 with testdata :
1310- self .run_grapheme_break_tests (testdata )
1397+ self ._run_grapheme_break_tests (testdata )
13111398
1312- def run_grapheme_break_tests (self , testdata ):
1399+ def _run_grapheme_break_tests (self , testdata ):
13131400 for line in testdata :
13141401 line , _ , comment = line .partition ('#' )
13151402 line = line .strip ()
@@ -1330,19 +1417,32 @@ def run_grapheme_break_tests(self, testdata):
13301417 self .assertEqual (chunks .pop (), '' , line )
13311418 input = '' .join (chunks )
13321419 with self .subTest (line ):
1333- result = list (unicodedata .iter_graphemes (input ))
1420+ result = list (self .iter_graphemes (input ))
13341421 self .assertEqual (list (map (str , result )), chunks , comment )
1335- self .assertEqual ([x .start for x in result ], breaks [:- 1 ], comment )
1336- self .assertEqual ([x .end for x in result ], breaks [1 :], comment )
1422+ self .assertEqual ([x .start for x in result ],
1423+ breaks [:- 1 ], comment )
1424+ self .assertEqual ([x .end for x in result ],
1425+ breaks [1 :], comment )
13371426 for i in range (1 , len (breaks ) - 1 ):
1338- result = list (unicodedata .iter_graphemes (input , breaks [i ]))
1339- self .assertEqual (list (map (str , result )), chunks [i :], comment )
1340- self .assertEqual ([x .start for x in result ], breaks [i :- 1 ], comment )
1341- self .assertEqual ([x .end for x in result ], breaks [i + 1 :], comment )
1427+ result = list (self .iter_graphemes (input , breaks [i ]))
1428+ self .assertEqual (list (map (str , result )),
1429+ chunks [i :], comment )
1430+ self .assertEqual ([x .start for x in result ],
1431+ breaks [i :- 1 ], comment )
1432+ self .assertEqual ([x .end for x in result ],
1433+ breaks [i + 1 :], comment )
1434+
1435+
1436+ class GraphemeBreakTest (unittest .TestCase , BaseGraphemeBreakTest ):
1437+ iter_graphemes = staticmethod (unicodedata .iter_graphemes )
1438+
1439+ def test_segment_repr (self ):
1440+ segment = list (unicodedata .iter_graphemes ('spa\u0300 m' ))[2 ]
1441+ self .assertEqual (repr (segment ), '<Segment 2:4>' )
1442+ self .assertRaises (TypeError , iter , segment )
1443+ self .assertRaises (TypeError , len , segment )
13421444
13431445 def test_reference_loops (self ):
1344- # Test that reference loops involving GraphemeBreakIterator or
1345- # Segment can be broken by the garbage collector.
13461446 class S (str ):
13471447 pass
13481448
@@ -1363,5 +1463,12 @@ class S(str):
13631463 self .assertIsNone (wr ())
13641464
13651465
1466+ class PyGraphemeBreakTest (unittest .TestCase , BaseGraphemeBreakTest ):
1467+ @classmethod
1468+ def setUpClass (cls ):
1469+ from _py_grapheme import iter_graphemes
1470+ cls .iter_graphemes = staticmethod (iter_graphemes )
1471+
1472+
13661473if __name__ == "__main__" :
13671474 unittest .main ()
0 commit comments