@@ -1751,35 +1751,54 @@ def test_basic(self):
17511751 writer .write_utf8 (b'var' , - 1 )
17521752
17531753 # test PyUnicodeWriter_WriteChar()
1754- writer .write_char ('=' )
1754+ writer .write_char (ord ( '=' ) )
17551755
17561756 # test PyUnicodeWriter_WriteSubstring()
17571757 writer .write_substring ("[long]" , 1 , 5 )
1758+ # CRASHES writer.write_substring(NULL, 0, 0)
17581759
17591760 # test PyUnicodeWriter_WriteStr()
17601761 writer .write_str (" value " )
1762+ # CRASHES writer.write_str(NULL)
17611763
17621764 # test PyUnicodeWriter_WriteRepr()
17631765 writer .write_repr ("repr" )
17641766
17651767 self .assertEqual (writer .finish (),
17661768 "var=long value 'repr'" )
17671769
1770+ def test_write_char (self ):
1771+ writer = self .create_writer (0 )
1772+ writer .write_char (0 )
1773+ writer .write_char (ord ('$' ))
1774+ writer .write_char (0x20ac )
1775+ writer .write_char (0x10_ffff )
1776+ self .assertRaises (ValueError , writer .write_char , 0x11_0000 )
1777+ self .assertRaises (ValueError , writer .write_char , 0xFFFF_FFFF )
1778+ self .assertEqual (writer .finish (),
1779+ "\0 $\u20AC \U0010FFFF " )
1780+
17681781 def test_utf8 (self ):
17691782 writer = self .create_writer (0 )
17701783 writer .write_utf8 (b"ascii" , - 1 )
1771- writer .write_char ('-' )
1784+ writer .write_char (ord ( '-' ) )
17721785 writer .write_utf8 (b"latin1=\xC3 \xA9 " , - 1 )
1773- writer .write_char ('-' )
1786+ writer .write_char (ord ( '-' ) )
17741787 writer .write_utf8 (b"euro=\xE2 \x82 \xAC " , - 1 )
1775- writer .write_char ('.' )
1788+ writer .write_char (ord ('.' ))
1789+ writer .write_utf8 (NULL , 0 )
1790+ # CRASHES writer.write_utf8(NULL, 1)
1791+ # CRASHES writer.write_utf8(NULL, -1)
17761792 self .assertEqual (writer .finish (),
17771793 "ascii-latin1=\xE9 -euro=\u20AC ." )
17781794
17791795 def test_ascii (self ):
17801796 writer = self .create_writer (0 )
17811797 writer .write_ascii (b"Hello " , - 1 )
17821798 writer .write_ascii (b"" , 0 )
1799+ writer .write_ascii (NULL , 0 )
1800+ # CRASHES writer.write_ascii(NULL, 1)
1801+ # CRASHES writer.write_ascii(NULL, -1)
17831802 writer .write_ascii (b"Python! <truncated>" , 6 )
17841803 self .assertEqual (writer .finish (), "Hello Python" )
17851804
@@ -1796,6 +1815,9 @@ def test_recover_utf8_error(self):
17961815 # write fails with an invalid string
17971816 with self .assertRaises (UnicodeDecodeError ):
17981817 writer .write_utf8 (b"invalid\xFF " , - 1 )
1818+ with self .assertRaises (UnicodeDecodeError ):
1819+ s = "truncated\u20AC " .encode ()
1820+ writer .write_utf8 (s , len (s ) - 1 )
17991821
18001822 # retry write with a valid string
18011823 writer .write_utf8 (b"valid" , - 1 )
@@ -1807,13 +1829,19 @@ def test_decode_utf8(self):
18071829 # test PyUnicodeWriter_DecodeUTF8Stateful()
18081830 writer = self .create_writer (0 )
18091831 writer .decodeutf8stateful (b"ign\xFF ore" , - 1 , b"ignore" )
1810- writer .write_char ('-' )
1832+ writer .write_char (ord ( '-' ) )
18111833 writer .decodeutf8stateful (b"replace\xFF " , - 1 , b"replace" )
1812- writer .write_char ('-' )
1834+ writer .write_char (ord ( '-' ) )
18131835
18141836 # incomplete trailing UTF-8 sequence
18151837 writer .decodeutf8stateful (b"incomplete\xC3 " , - 1 , b"replace" )
18161838
1839+ writer .decodeutf8stateful (NULL , 0 , b"replace" )
1840+ # CRASHES writer.decodeutf8stateful(NULL, 1, b"replace")
1841+ # CRASHES writer.decodeutf8stateful(NULL, -1, b"replace")
1842+ with self .assertRaises (UnicodeDecodeError ):
1843+ writer .decodeutf8stateful (b"default\xFF " , - 1 , NULL )
1844+
18171845 self .assertEqual (writer .finish (),
18181846 "ignore-replace\uFFFD -incomplete\uFFFD " )
18191847
@@ -1824,12 +1852,12 @@ def test_decode_utf8_consumed(self):
18241852 # valid string
18251853 consumed = writer .decodeutf8stateful (b"text" , - 1 , b"strict" , True )
18261854 self .assertEqual (consumed , 4 )
1827- writer .write_char ('-' )
1855+ writer .write_char (ord ( '-' ) )
18281856
18291857 # non-ASCII
18301858 consumed = writer .decodeutf8stateful (b"\xC3 \xA9 -\xE2 \x82 \xAC " , 6 , b"strict" , True )
18311859 self .assertEqual (consumed , 6 )
1832- writer .write_char ('-' )
1860+ writer .write_char (ord ( '-' ) )
18331861
18341862 # invalid UTF-8 (consumed is 0 on error)
18351863 with self .assertRaises (UnicodeDecodeError ):
@@ -1838,54 +1866,92 @@ def test_decode_utf8_consumed(self):
18381866 # ignore error handler
18391867 consumed = writer .decodeutf8stateful (b"more\xFF " , - 1 , b"ignore" , True )
18401868 self .assertEqual (consumed , 5 )
1841- writer .write_char ('-' )
1869+ writer .write_char (ord ( '-' ) )
18421870
18431871 # incomplete trailing UTF-8 sequence
18441872 consumed = writer .decodeutf8stateful (b"incomplete\xC3 " , - 1 , b"ignore" , True )
18451873 self .assertEqual (consumed , 10 )
1874+ writer .write_char (ord ('-' ))
18461875
1847- self .assertEqual (writer .finish (), "text-\xE9 -\u20AC -more-incomplete" )
1876+ consumed = writer .decodeutf8stateful (NULL , 0 , b"replace" , True )
1877+ self .assertEqual (consumed , 0 )
1878+ # CRASHES writer.decodeutf8stateful(NULL, 1, b"replace", True)
1879+ # CRASHES writer.decodeutf8stateful(NULL, -1, b"replace", True)
1880+ consumed = writer .decodeutf8stateful (b"default\xC3 " , - 1 , NULL , True )
1881+ self .assertEqual (consumed , 7 )
1882+
1883+ self .assertEqual (writer .finish (), "text-\xE9 -\u20AC -more-incomplete-default" )
18481884
18491885 def test_widechar (self ):
1886+ from _testcapi import SIZEOF_WCHAR_T
1887+
1888+ if SIZEOF_WCHAR_T == 2 :
1889+ encoding = 'utf-16le' if sys .byteorder == 'little' else 'utf-16be'
1890+ elif SIZEOF_WCHAR_T == 4 :
1891+ encoding = 'utf-32le' if sys .byteorder == 'little' else 'utf-32be'
1892+
18501893 writer = self .create_writer (0 )
1851- writer .write_widechar ("latin1=\xE9 " )
1852- writer .write_widechar ("-" )
1853- writer .write_widechar ("euro=\u20AC " )
1854- writer .write_char ("-" )
1855- writer .write_widechar ("max=\U0010ffff " )
1856- writer .write_char ('.' )
1894+ writer .write_widechar ("latin1=\xE9 " .encode (encoding ))
1895+ writer .write_char (ord ("-" ))
1896+ writer .write_widechar ("euro=\u20AC " .encode (encoding ))
1897+ writer .write_char (ord ("-" ))
1898+ writer .write_widechar ("max=\U0010ffff " .encode (encoding ))
1899+ writer .write_char (ord ("-" ))
1900+ writer .write_widechar ("zeroes=" .encode (encoding ).ljust (SIZEOF_WCHAR_T * 10 , b'\0 ' ),
1901+ 10 )
1902+ writer .write_char (ord ('.' ))
1903+
1904+ if SIZEOF_WCHAR_T == 4 :
1905+ invalid = (b'\x00 \x00 \x11 \x00 ' if sys .byteorder == 'little' else
1906+ b'\x00 \x11 \x00 \x00 ' )
1907+ with self .assertRaises (ValueError ):
1908+ writer .write_widechar ("invalid=" .encode (encoding ) + invalid )
1909+ writer .write_widechar (b'' , - 5 )
1910+ writer .write_widechar (NULL , 0 )
1911+ # CRASHES writer.write_widechar(NULL, 1)
1912+ # CRASHES writer.write_widechar(NULL, -1)
1913+
18571914 self .assertEqual (writer .finish (),
1858- "latin1=\xE9 -euro=\u20AC -max=\U0010ffff ." )
1915+ "latin1=\xE9 -euro=\u20AC -max=\U0010ffff -zeroes= \0 \0 \0 ." )
18591916
18601917 def test_ucs4 (self ):
1918+ encoding = 'utf-32le' if sys .byteorder == 'little' else 'utf-32be'
1919+
18611920 writer = self .create_writer (0 )
1862- writer .write_ucs4 ("ascii IGNORED" , 5 )
1863- writer .write_char ("-" )
1864- writer .write_ucs4 ("latin1=\xe9 " , 8 )
1865- writer .write_char ("-" )
1866- writer .write_ucs4 ("euro=\u20ac " , 6 )
1867- writer .write_char ("-" )
1868- writer .write_ucs4 ("max=\U0010ffff " , 5 )
1869- writer .write_char ("." )
1921+ writer .write_ucs4 ("ascii IGNORED" . encode ( encoding ) , 5 )
1922+ writer .write_char (ord ( "-" ) )
1923+ writer .write_ucs4 ("latin1=\xe9 " . encode ( encoding ) )
1924+ writer .write_char (ord ( "-" ) )
1925+ writer .write_ucs4 ("euro=\u20ac " . encode ( encoding ) )
1926+ writer .write_char (ord ( "-" ) )
1927+ writer .write_ucs4 ("max=\U0010ffff " . encode ( encoding ) )
1928+ writer .write_char (ord ( "." ) )
18701929 self .assertEqual (writer .finish (),
18711930 "ascii-latin1=\xE9 -euro=\u20AC -max=\U0010ffff ." )
18721931
18731932 # Test some special characters
18741933 writer = self .create_writer (0 )
18751934 # Lone surrogate character
1876- writer .write_ucs4 ("lone\uDC80 " , 5 )
1877- writer .write_char ("-" )
1935+ writer .write_ucs4 ("lone\uDC80 " . encode ( encoding , 'surrogatepass' ) )
1936+ writer .write_char (ord ( "-" ) )
18781937 # Surrogate pair
1879- writer .write_ucs4 ("pair\uDBFF \uDFFF " , 5 )
1880- writer .write_char ("-" )
1881- writer .write_ucs4 ("null[\0 ]" , 7 )
1938+ writer .write_ucs4 ("pair\uD83D \uDC0D " .encode (encoding , 'surrogatepass' ))
1939+ writer .write_char (ord ("-" ))
1940+ writer .write_ucs4 ("null[\0 ]" .encode (encoding ), 7 )
1941+ invalid = (b'\x00 \x00 \x11 \x00 ' if sys .byteorder == 'little' else
1942+ b'\x00 \x11 \x00 \x00 ' )
1943+ # CRASHES writer.write_ucs4("invalid".encode(encoding) + invalid)
1944+ writer .write_ucs4 (NULL , 0 )
1945+ # CRASHES writer.write_ucs4(NULL, 1)
18821946 self .assertEqual (writer .finish (),
1883- "lone\udc80 -pair\udbff -null[\0 ]" )
1947+ "lone\udc80 -pair\ud83d \udc0d -null[\x00 ]" )
18841948
18851949 # invalid size
18861950 writer = self .create_writer (0 )
18871951 with self .assertRaises (ValueError ):
1888- writer .write_ucs4 ("text" , - 1 )
1952+ writer .write_ucs4 ("text" .encode (encoding ), - 1 )
1953+ self .assertRaises (ValueError , writer .write_ucs4 , b'' , - 1 )
1954+ self .assertRaises (ValueError , writer .write_ucs4 , NULL , - 1 )
18891955
18901956 def test_substring_empty (self ):
18911957 writer = self .create_writer (0 )
@@ -1911,7 +1977,7 @@ def test_format(self):
19111977 from ctypes import c_int
19121978 writer = self .create_writer (0 )
19131979 self .writer_format (writer , b'%s %i' , b'abc' , c_int (123 ))
1914- writer .write_char ('.' )
1980+ writer .write_char (ord ( '.' ) )
19151981 self .assertEqual (writer .finish (), 'abc 123.' )
19161982
19171983 def test_recover_error (self ):
0 commit comments