@@ -742,25 +742,50 @@ impl BangType {
742742
743743 /// If element is finished, returns its content up to `>` symbol and
744744 /// an index of this symbol, otherwise returns `None`
745+ ///
746+ /// # Parameters
747+ /// - `buf`: buffer with data consumed on previous iterations
748+ /// - `chunk`: data read on current iteration and not yet consumed from reader
745749 #[ inline( always) ]
746- fn parse < ' b > ( & self , chunk : & ' b [ u8 ] , offset : usize ) -> Option < ( & ' b [ u8 ] , usize ) > {
750+ fn parse < ' b > ( & self , buf : & [ u8 ] , chunk : & ' b [ u8 ] ) -> Option < ( & ' b [ u8 ] , usize ) > {
747751 for i in memchr:: memchr_iter ( b'>' , chunk) {
748752 match self {
749753 // Need to read at least 6 symbols (`!---->`) for properly finished comment
750754 // <!----> - XML comment
751755 // 012345 - i
752- Self :: Comment => {
753- if offset + i > 4 && chunk[ ..i] . ends_with ( b"--" ) {
756+ Self :: Comment if buf . len ( ) + i > 4 => {
757+ if chunk[ ..i] . ends_with ( b"--" ) {
754758 // We cannot strip last `--` from the buffer because we need it in case of
755759 // check_comments enabled option. XML standard requires that comment
756760 // will not end with `--->` sequence because this is a special case of
757761 // `--` in the comment (https://www.w3.org/TR/xml11/#sec-comments)
758762 return Some ( ( & chunk[ ..i] , i + 1 ) ) ; // +1 for `>`
759763 }
764+ // End sequence `-|->` was splitted at |
765+ // buf --/ \-- chunk
766+ if i == 1 && buf. ends_with ( b"-" ) && chunk[ 0 ] == b'-' {
767+ return Some ( ( & chunk[ ..i] , i + 1 ) ) ; // +1 for `>`
768+ }
769+ // End sequence `--|>` was splitted at |
770+ // buf --/ \-- chunk
771+ if i == 0 && buf. ends_with ( b"--" ) {
772+ return Some ( ( & [ ] , i + 1 ) ) ; // +1 for `>`
773+ }
760774 }
775+ Self :: Comment => { }
761776 Self :: CData => {
762777 if chunk[ ..i] . ends_with ( b"]]" ) {
763- return Some ( ( & chunk[ ..i - 2 ] , i + 1 ) ) ; // +1 for `>`
778+ return Some ( ( & chunk[ ..i] , i + 1 ) ) ; // +1 for `>`
779+ }
780+ // End sequence `]|]>` was splitted at |
781+ // buf --/ \-- chunk
782+ if i == 1 && buf. ends_with ( b"]" ) && chunk[ 0 ] == b']' {
783+ return Some ( ( & chunk[ ..i] , i + 1 ) ) ; // +1 for `>`
784+ }
785+ // End sequence `]]|>` was splitted at |
786+ // buf --/ \-- chunk
787+ if i == 0 && buf. ends_with ( b"]]" ) {
788+ return Some ( ( & [ ] , i + 1 ) ) ; // +1 for `>`
764789 }
765790 }
766791 Self :: DocType => {
@@ -1021,7 +1046,7 @@ mod test {
10211046 $( . $await) ?
10221047 . unwrap( )
10231048 . map( |( ty, data) | ( ty, Bytes ( data) ) ) ,
1024- Some ( ( BangType :: CData , Bytes ( b"![CDATA[" ) ) )
1049+ Some ( ( BangType :: CData , Bytes ( b"![CDATA[]] " ) ) )
10251050 ) ;
10261051 assert_eq!( position, 11 ) ;
10271052 }
@@ -1042,7 +1067,7 @@ mod test {
10421067 $( . $await) ?
10431068 . unwrap( )
10441069 . map( |( ty, data) | ( ty, Bytes ( data) ) ) ,
1045- Some ( ( BangType :: CData , Bytes ( b"![CDATA[cdata]] ]>content" ) ) )
1070+ Some ( ( BangType :: CData , Bytes ( b"![CDATA[cdata]] ]>content]] " ) ) )
10461071 ) ;
10471072 assert_eq!( position, 28 ) ;
10481073 }
@@ -1751,8 +1776,157 @@ mod test {
17511776 } ;
17521777 }
17531778
1754- // Export a macro for the child modules:
1779+ /// Tests for https://github.com/tafia/quick-xml/issues/469
1780+ macro_rules! small_buffers {
1781+ (
1782+ #[ $test: meta]
1783+ $read_event: ident: $BufReader: ty
1784+ $( , $async: ident, $await: ident) ?
1785+ ) => {
1786+ mod small_buffers {
1787+ use crate :: events:: { BytesCData , BytesDecl , BytesStart , BytesText , Event } ;
1788+ use crate :: reader:: Reader ;
1789+ use pretty_assertions:: assert_eq;
1790+
1791+ #[ $test]
1792+ $( $async) ? fn decl( ) {
1793+ let xml = "<?xml ?>" ;
1794+ // ^^^^^^^ data that fit into buffer
1795+ let size = xml. match_indices( "?>" ) . next( ) . unwrap( ) . 0 + 1 ;
1796+ let br = <$BufReader>:: with_capacity( size, xml. as_bytes( ) ) ;
1797+ let mut reader = Reader :: from_reader( br) ;
1798+ let mut buf = Vec :: new( ) ;
1799+
1800+ assert_eq!(
1801+ reader. $read_event( & mut buf) $( . $await) ? . unwrap( ) ,
1802+ Event :: Decl ( BytesDecl :: from_start( BytesStart :: from_content( "xml " , 3 ) ) )
1803+ ) ;
1804+ assert_eq!(
1805+ reader. $read_event( & mut buf) $( . $await) ? . unwrap( ) ,
1806+ Event :: Eof
1807+ ) ;
1808+ }
1809+
1810+ #[ $test]
1811+ $( $async) ? fn pi( ) {
1812+ let xml = "<?pi?>" ;
1813+ // ^^^^^ data that fit into buffer
1814+ let size = xml. match_indices( "?>" ) . next( ) . unwrap( ) . 0 + 1 ;
1815+ let br = <$BufReader>:: with_capacity( size, xml. as_bytes( ) ) ;
1816+ let mut reader = Reader :: from_reader( br) ;
1817+ let mut buf = Vec :: new( ) ;
1818+
1819+ assert_eq!(
1820+ reader. $read_event( & mut buf) $( . $await) ? . unwrap( ) ,
1821+ Event :: PI ( BytesText :: new( "pi" ) )
1822+ ) ;
1823+ assert_eq!(
1824+ reader. $read_event( & mut buf) $( . $await) ? . unwrap( ) ,
1825+ Event :: Eof
1826+ ) ;
1827+ }
1828+
1829+ #[ $test]
1830+ $( $async) ? fn empty( ) {
1831+ let xml = "<empty/>" ;
1832+ // ^^^^^^^ data that fit into buffer
1833+ let size = xml. match_indices( "/>" ) . next( ) . unwrap( ) . 0 + 1 ;
1834+ let br = <$BufReader>:: with_capacity( size, xml. as_bytes( ) ) ;
1835+ let mut reader = Reader :: from_reader( br) ;
1836+ let mut buf = Vec :: new( ) ;
1837+
1838+ assert_eq!(
1839+ reader. $read_event( & mut buf) $( . $await) ? . unwrap( ) ,
1840+ Event :: Empty ( BytesStart :: new( "empty" ) )
1841+ ) ;
1842+ assert_eq!(
1843+ reader. $read_event( & mut buf) $( . $await) ? . unwrap( ) ,
1844+ Event :: Eof
1845+ ) ;
1846+ }
1847+
1848+ #[ $test]
1849+ $( $async) ? fn cdata1( ) {
1850+ let xml = "<![CDATA[cdata]]>" ;
1851+ // ^^^^^^^^^^^^^^^ data that fit into buffer
1852+ let size = xml. match_indices( "]]>" ) . next( ) . unwrap( ) . 0 + 1 ;
1853+ let br = <$BufReader>:: with_capacity( size, xml. as_bytes( ) ) ;
1854+ let mut reader = Reader :: from_reader( br) ;
1855+ let mut buf = Vec :: new( ) ;
1856+
1857+ assert_eq!(
1858+ reader. $read_event( & mut buf) $( . $await) ? . unwrap( ) ,
1859+ Event :: CData ( BytesCData :: new( "cdata" ) )
1860+ ) ;
1861+ assert_eq!(
1862+ reader. $read_event( & mut buf) $( . $await) ? . unwrap( ) ,
1863+ Event :: Eof
1864+ ) ;
1865+ }
1866+
1867+ #[ $test]
1868+ $( $async) ? fn cdata2( ) {
1869+ let xml = "<![CDATA[cdata]]>" ;
1870+ // ^^^^^^^^^^^^^^^^ data that fit into buffer
1871+ let size = xml. match_indices( "]]>" ) . next( ) . unwrap( ) . 0 + 2 ;
1872+ let br = <$BufReader>:: with_capacity( size, xml. as_bytes( ) ) ;
1873+ let mut reader = Reader :: from_reader( br) ;
1874+ let mut buf = Vec :: new( ) ;
1875+
1876+ assert_eq!(
1877+ reader. $read_event( & mut buf) $( . $await) ? . unwrap( ) ,
1878+ Event :: CData ( BytesCData :: new( "cdata" ) )
1879+ ) ;
1880+ assert_eq!(
1881+ reader. $read_event( & mut buf) $( . $await) ? . unwrap( ) ,
1882+ Event :: Eof
1883+ ) ;
1884+ }
1885+
1886+ #[ $test]
1887+ $( $async) ? fn comment1( ) {
1888+ let xml = "<!--comment-->" ;
1889+ // ^^^^^^^^^^^^ data that fit into buffer
1890+ let size = xml. match_indices( "-->" ) . next( ) . unwrap( ) . 0 + 1 ;
1891+ let br = <$BufReader>:: with_capacity( size, xml. as_bytes( ) ) ;
1892+ let mut reader = Reader :: from_reader( br) ;
1893+ let mut buf = Vec :: new( ) ;
1894+
1895+ assert_eq!(
1896+ reader. $read_event( & mut buf) $( . $await) ? . unwrap( ) ,
1897+ Event :: Comment ( BytesText :: new( "comment" ) )
1898+ ) ;
1899+ assert_eq!(
1900+ reader. $read_event( & mut buf) $( . $await) ? . unwrap( ) ,
1901+ Event :: Eof
1902+ ) ;
1903+ }
1904+
1905+ #[ $test]
1906+ $( $async) ? fn comment2( ) {
1907+ let xml = "<!--comment-->" ;
1908+ // ^^^^^^^^^^^^^ data that fit into buffer
1909+ let size = xml. match_indices( "-->" ) . next( ) . unwrap( ) . 0 + 2 ;
1910+ let br = <$BufReader>:: with_capacity( size, xml. as_bytes( ) ) ;
1911+ let mut reader = Reader :: from_reader( br) ;
1912+ let mut buf = Vec :: new( ) ;
1913+
1914+ assert_eq!(
1915+ reader. $read_event( & mut buf) $( . $await) ? . unwrap( ) ,
1916+ Event :: Comment ( BytesText :: new( "comment" ) )
1917+ ) ;
1918+ assert_eq!(
1919+ reader. $read_event( & mut buf) $( . $await) ? . unwrap( ) ,
1920+ Event :: Eof
1921+ ) ;
1922+ }
1923+ }
1924+ } ;
1925+ }
1926+
1927+ // Export macros for the child modules:
17551928 // - buffered_reader
17561929 // - slice_reader
17571930 pub ( super ) use check;
1931+ pub ( super ) use small_buffers;
17581932}
0 commit comments