@@ -1751,13 +1751,15 @@ def test_basic(self):
17511751 writer .write_utf8 (b'var' , - 1 )
17521752
17531753 # test PyUnicodeWriter_WriteChar()
1754- writer .write_char ('=' )
1754+ writer .write_char (ord ( '=' ) )
17551755
17561756 # test PyUnicodeWriter_WriteSubstring()
17571757 writer .write_substring ("[long]" , 1 , 5 )
1758+ # CRASHES writer.write_substring(NULL, 0, 0)
17581759
17591760 # test PyUnicodeWriter_WriteStr()
17601761 writer .write_str (" value " )
1762+ # CRASHES writer.write_str(NULL)
17611763
17621764 # test PyUnicodeWriter_WriteRepr()
17631765 writer .write_repr ("repr" )
@@ -1772,22 +1774,38 @@ def test_repr_null(self):
17721774 writer .write_repr_true (NULL )
17731775 self .assertEqual (writer .finish (),
17741776 "var=<NULL>" )
1777+ def test_write_char (self ):
1778+ writer = self .create_writer (0 )
1779+ writer .write_char (0 )
1780+ writer .write_char (ord ('$' ))
1781+ writer .write_char (0x20ac )
1782+ writer .write_char (0x10_ffff )
1783+ self .assertRaises (ValueError , writer .write_char , 0x11_0000 )
1784+ self .assertRaises (ValueError , writer .write_char , 0xFFFF_FFFF )
1785+ self .assertEqual (writer .finish (),
1786+ "\0 $\u20AC \U0010FFFF " )
17751787
17761788 def test_utf8 (self ):
17771789 writer = self .create_writer (0 )
17781790 writer .write_utf8 (b"ascii" , - 1 )
1779- writer .write_char ('-' )
1791+ writer .write_char (ord ( '-' ) )
17801792 writer .write_utf8 (b"latin1=\xC3 \xA9 " , - 1 )
1781- writer .write_char ('-' )
1793+ writer .write_char (ord ( '-' ) )
17821794 writer .write_utf8 (b"euro=\xE2 \x82 \xAC " , - 1 )
1783- writer .write_char ('.' )
1795+ writer .write_char (ord ('.' ))
1796+ writer .write_utf8 (NULL , 0 )
1797+ # CRASHES writer.write_utf8(NULL, 1)
1798+ # CRASHES writer.write_utf8(NULL, -1)
17841799 self .assertEqual (writer .finish (),
17851800 "ascii-latin1=\xE9 -euro=\u20AC ." )
17861801
17871802 def test_ascii (self ):
17881803 writer = self .create_writer (0 )
17891804 writer .write_ascii (b"Hello " , - 1 )
17901805 writer .write_ascii (b"" , 0 )
1806+ writer .write_ascii (NULL , 0 )
1807+ # CRASHES writer.write_ascii(NULL, 1)
1808+ # CRASHES writer.write_ascii(NULL, -1)
17911809 writer .write_ascii (b"Python! <truncated>" , 6 )
17921810 self .assertEqual (writer .finish (), "Hello Python" )
17931811
@@ -1804,6 +1822,9 @@ def test_recover_utf8_error(self):
18041822 # write fails with an invalid string
18051823 with self .assertRaises (UnicodeDecodeError ):
18061824 writer .write_utf8 (b"invalid\xFF " , - 1 )
1825+ with self .assertRaises (UnicodeDecodeError ):
1826+ s = "truncated\u20AC " .encode ()
1827+ writer .write_utf8 (s , len (s ) - 1 )
18071828
18081829 # retry write with a valid string
18091830 writer .write_utf8 (b"valid" , - 1 )
@@ -1815,13 +1836,19 @@ def test_decode_utf8(self):
18151836 # test PyUnicodeWriter_DecodeUTF8Stateful()
18161837 writer = self .create_writer (0 )
18171838 writer .decodeutf8stateful (b"ign\xFF ore" , - 1 , b"ignore" )
1818- writer .write_char ('-' )
1839+ writer .write_char (ord ( '-' ) )
18191840 writer .decodeutf8stateful (b"replace\xFF " , - 1 , b"replace" )
1820- writer .write_char ('-' )
1841+ writer .write_char (ord ( '-' ) )
18211842
18221843 # incomplete trailing UTF-8 sequence
18231844 writer .decodeutf8stateful (b"incomplete\xC3 " , - 1 , b"replace" )
18241845
1846+ writer .decodeutf8stateful (NULL , 0 , b"replace" )
1847+ # CRASHES writer.decodeutf8stateful(NULL, 1, b"replace")
1848+ # CRASHES writer.decodeutf8stateful(NULL, -1, b"replace")
1849+ with self .assertRaises (UnicodeDecodeError ):
1850+ writer .decodeutf8stateful (b"default\xFF " , - 1 , NULL )
1851+
18251852 self .assertEqual (writer .finish (),
18261853 "ignore-replace\uFFFD -incomplete\uFFFD " )
18271854
@@ -1832,12 +1859,12 @@ def test_decode_utf8_consumed(self):
18321859 # valid string
18331860 consumed = writer .decodeutf8stateful (b"text" , - 1 , b"strict" , True )
18341861 self .assertEqual (consumed , 4 )
1835- writer .write_char ('-' )
1862+ writer .write_char (ord ( '-' ) )
18361863
18371864 # non-ASCII
18381865 consumed = writer .decodeutf8stateful (b"\xC3 \xA9 -\xE2 \x82 \xAC " , 6 , b"strict" , True )
18391866 self .assertEqual (consumed , 6 )
1840- writer .write_char ('-' )
1867+ writer .write_char (ord ( '-' ) )
18411868
18421869 # invalid UTF-8 (consumed is 0 on error)
18431870 with self .assertRaises (UnicodeDecodeError ):
@@ -1846,54 +1873,92 @@ def test_decode_utf8_consumed(self):
18461873 # ignore error handler
18471874 consumed = writer .decodeutf8stateful (b"more\xFF " , - 1 , b"ignore" , True )
18481875 self .assertEqual (consumed , 5 )
1849- writer .write_char ('-' )
1876+ writer .write_char (ord ( '-' ) )
18501877
18511878 # incomplete trailing UTF-8 sequence
18521879 consumed = writer .decodeutf8stateful (b"incomplete\xC3 " , - 1 , b"ignore" , True )
18531880 self .assertEqual (consumed , 10 )
1881+ writer .write_char (ord ('-' ))
1882+
1883+ consumed = writer .decodeutf8stateful (NULL , 0 , b"replace" , True )
1884+ self .assertEqual (consumed , 0 )
1885+ # CRASHES writer.decodeutf8stateful(NULL, 1, b"replace", True)
1886+ # CRASHES writer.decodeutf8stateful(NULL, -1, b"replace", True)
1887+ consumed = writer .decodeutf8stateful (b"default\xC3 " , - 1 , NULL , True )
1888+ self .assertEqual (consumed , 7 )
18541889
1855- self .assertEqual (writer .finish (), "text-\xE9 -\u20AC -more-incomplete" )
1890+ self .assertEqual (writer .finish (), "text-\xE9 -\u20AC -more-incomplete-default " )
18561891
18571892 def test_widechar (self ):
1893+ from _testcapi import SIZEOF_WCHAR_T
1894+
1895+ if SIZEOF_WCHAR_T == 2 :
1896+ encoding = 'utf-16le' if sys .byteorder == 'little' else 'utf-16be'
1897+ elif SIZEOF_WCHAR_T == 4 :
1898+ encoding = 'utf-32le' if sys .byteorder == 'little' else 'utf-32be'
1899+
18581900 writer = self .create_writer (0 )
1859- writer .write_widechar ("latin1=\xE9 " )
1860- writer .write_widechar ("-" )
1861- writer .write_widechar ("euro=\u20AC " )
1862- writer .write_char ("-" )
1863- writer .write_widechar ("max=\U0010ffff " )
1864- writer .write_char ('.' )
1901+ writer .write_widechar ("latin1=\xE9 " .encode (encoding ))
1902+ writer .write_char (ord ("-" ))
1903+ writer .write_widechar ("euro=\u20AC " .encode (encoding ))
1904+ writer .write_char (ord ("-" ))
1905+ writer .write_widechar ("max=\U0010ffff " .encode (encoding ))
1906+ writer .write_char (ord ("-" ))
1907+ writer .write_widechar ("zeroes=" .encode (encoding ).ljust (SIZEOF_WCHAR_T * 10 , b'\0 ' ),
1908+ 10 )
1909+ writer .write_char (ord ('.' ))
1910+
1911+ if SIZEOF_WCHAR_T == 4 :
1912+ invalid = (b'\x00 \x00 \x11 \x00 ' if sys .byteorder == 'little' else
1913+ b'\x00 \x11 \x00 \x00 ' )
1914+ with self .assertRaises (ValueError ):
1915+ writer .write_widechar ("invalid=" .encode (encoding ) + invalid )
1916+ writer .write_widechar (b'' , - 5 )
1917+ writer .write_widechar (NULL , 0 )
1918+ # CRASHES writer.write_widechar(NULL, 1)
1919+ # CRASHES writer.write_widechar(NULL, -1)
1920+
18651921 self .assertEqual (writer .finish (),
1866- "latin1=\xE9 -euro=\u20AC -max=\U0010ffff ." )
1922+ "latin1=\xE9 -euro=\u20AC -max=\U0010ffff -zeroes= \0 \0 \0 ." )
18671923
18681924 def test_ucs4 (self ):
1925+ encoding = 'utf-32le' if sys .byteorder == 'little' else 'utf-32be'
1926+
18691927 writer = self .create_writer (0 )
1870- writer .write_ucs4 ("ascii IGNORED" , 5 )
1871- writer .write_char ("-" )
1872- writer .write_ucs4 ("latin1=\xe9 " , 8 )
1873- writer .write_char ("-" )
1874- writer .write_ucs4 ("euro=\u20ac " , 6 )
1875- writer .write_char ("-" )
1876- writer .write_ucs4 ("max=\U0010ffff " , 5 )
1877- writer .write_char ("." )
1928+ writer .write_ucs4 ("ascii IGNORED" . encode ( encoding ) , 5 )
1929+ writer .write_char (ord ( "-" ) )
1930+ writer .write_ucs4 ("latin1=\xe9 " . encode ( encoding ) )
1931+ writer .write_char (ord ( "-" ) )
1932+ writer .write_ucs4 ("euro=\u20ac " . encode ( encoding ) )
1933+ writer .write_char (ord ( "-" ) )
1934+ writer .write_ucs4 ("max=\U0010ffff " . encode ( encoding ) )
1935+ writer .write_char (ord ( "." ) )
18781936 self .assertEqual (writer .finish (),
18791937 "ascii-latin1=\xE9 -euro=\u20AC -max=\U0010ffff ." )
18801938
18811939 # Test some special characters
18821940 writer = self .create_writer (0 )
18831941 # Lone surrogate character
1884- writer .write_ucs4 ("lone\uDC80 " , 5 )
1885- writer .write_char ("-" )
1942+ writer .write_ucs4 ("lone\uDC80 " . encode ( encoding , 'surrogatepass' ) )
1943+ writer .write_char (ord ( "-" ) )
18861944 # Surrogate pair
1887- writer .write_ucs4 ("pair\uDBFF \uDFFF " , 5 )
1888- writer .write_char ("-" )
1889- writer .write_ucs4 ("null[\0 ]" , 7 )
1945+ writer .write_ucs4 ("pair\uD83D \uDC0D " .encode (encoding , 'surrogatepass' ))
1946+ writer .write_char (ord ("-" ))
1947+ writer .write_ucs4 ("null[\0 ]" .encode (encoding ), 7 )
1948+ invalid = (b'\x00 \x00 \x11 \x00 ' if sys .byteorder == 'little' else
1949+ b'\x00 \x11 \x00 \x00 ' )
1950+ # CRASHES writer.write_ucs4("invalid".encode(encoding) + invalid)
1951+ writer .write_ucs4 (NULL , 0 )
1952+ # CRASHES writer.write_ucs4(NULL, 1)
18901953 self .assertEqual (writer .finish (),
1891- "lone\udc80 -pair\udbff -null[\0 ]" )
1954+ "lone\udc80 -pair\ud83d \udc0d -null[\x00 ]" )
18921955
18931956 # invalid size
18941957 writer = self .create_writer (0 )
18951958 with self .assertRaises (ValueError ):
1896- writer .write_ucs4 ("text" , - 1 )
1959+ writer .write_ucs4 ("text" .encode (encoding ), - 1 )
1960+ self .assertRaises (ValueError , writer .write_ucs4 , b'' , - 1 )
1961+ self .assertRaises (ValueError , writer .write_ucs4 , NULL , - 1 )
18971962
18981963 def test_substring_empty (self ):
18991964 writer = self .create_writer (0 )
@@ -1919,7 +1984,7 @@ def test_format(self):
19191984 from ctypes import c_int
19201985 writer = self .create_writer (0 )
19211986 self .writer_format (writer , b'%s %i' , b'abc' , c_int (123 ))
1922- writer .write_char ('.' )
1987+ writer .write_char (ord ( '.' ) )
19231988 self .assertEqual (writer .finish (), 'abc 123.' )
19241989
19251990 def test_recover_error (self ):
0 commit comments