@@ -742,25 +742,50 @@ impl BangType {
742
742
743
743
/// If element is finished, returns its content up to `>` symbol and
744
744
/// an index of this symbol, otherwise returns `None`
745
+ ///
746
+ /// # Parameters
747
+ /// - `buf`: buffer with data consumed on previous iterations
748
+ /// - `chunk`: data read on current iteration and not yet consumed from reader
745
749
#[ inline( always) ]
746
- fn parse < ' b > ( & self , chunk : & ' b [ u8 ] , offset : usize ) -> Option < ( & ' b [ u8 ] , usize ) > {
750
+ fn parse < ' b > ( & self , buf : & [ u8 ] , chunk : & ' b [ u8 ] ) -> Option < ( & ' b [ u8 ] , usize ) > {
747
751
for i in memchr:: memchr_iter ( b'>' , chunk) {
748
752
match self {
749
753
// Need to read at least 6 symbols (`!---->`) for properly finished comment
750
754
// <!----> - XML comment
751
755
// 012345 - i
752
- Self :: Comment => {
753
- if offset + i > 4 && chunk[ ..i] . ends_with ( b"--" ) {
756
+ Self :: Comment if buf . len ( ) + i > 4 => {
757
+ if chunk[ ..i] . ends_with ( b"--" ) {
754
758
// We cannot strip last `--` from the buffer because we need it in case of
755
759
// check_comments enabled option. XML standard requires that comment
756
760
// will not end with `--->` sequence because this is a special case of
757
761
// `--` in the comment (https://www.w3.org/TR/xml11/#sec-comments)
758
762
return Some ( ( & chunk[ ..i] , i + 1 ) ) ; // +1 for `>`
759
763
}
764
+ // End sequence `-|->` was splitted at |
765
+ // buf --/ \-- chunk
766
+ if i == 1 && buf. ends_with ( b"-" ) && chunk[ 0 ] == b'-' {
767
+ return Some ( ( & chunk[ ..i] , i + 1 ) ) ; // +1 for `>`
768
+ }
769
+ // End sequence `--|>` was splitted at |
770
+ // buf --/ \-- chunk
771
+ if i == 0 && buf. ends_with ( b"--" ) {
772
+ return Some ( ( & [ ] , i + 1 ) ) ; // +1 for `>`
773
+ }
760
774
}
775
+ Self :: Comment => { }
761
776
Self :: CData => {
762
777
if chunk[ ..i] . ends_with ( b"]]" ) {
763
- return Some ( ( & chunk[ ..i - 2 ] , i + 1 ) ) ; // +1 for `>`
778
+ return Some ( ( & chunk[ ..i] , i + 1 ) ) ; // +1 for `>`
779
+ }
780
+ // End sequence `]|]>` was splitted at |
781
+ // buf --/ \-- chunk
782
+ if i == 1 && buf. ends_with ( b"]" ) && chunk[ 0 ] == b']' {
783
+ return Some ( ( & chunk[ ..i] , i + 1 ) ) ; // +1 for `>`
784
+ }
785
+ // End sequence `]]|>` was splitted at |
786
+ // buf --/ \-- chunk
787
+ if i == 0 && buf. ends_with ( b"]]" ) {
788
+ return Some ( ( & [ ] , i + 1 ) ) ; // +1 for `>`
764
789
}
765
790
}
766
791
Self :: DocType => {
@@ -1021,7 +1046,7 @@ mod test {
1021
1046
$( . $await) ?
1022
1047
. unwrap( )
1023
1048
. map( |( ty, data) | ( ty, Bytes ( data) ) ) ,
1024
- Some ( ( BangType :: CData , Bytes ( b"![CDATA[" ) ) )
1049
+ Some ( ( BangType :: CData , Bytes ( b"![CDATA[]] " ) ) )
1025
1050
) ;
1026
1051
assert_eq!( position, 11 ) ;
1027
1052
}
@@ -1042,7 +1067,7 @@ mod test {
1042
1067
$( . $await) ?
1043
1068
. unwrap( )
1044
1069
. map( |( ty, data) | ( ty, Bytes ( data) ) ) ,
1045
- Some ( ( BangType :: CData , Bytes ( b"![CDATA[cdata]] ]>content" ) ) )
1070
+ Some ( ( BangType :: CData , Bytes ( b"![CDATA[cdata]] ]>content]] " ) ) )
1046
1071
) ;
1047
1072
assert_eq!( position, 28 ) ;
1048
1073
}
@@ -1751,8 +1776,157 @@ mod test {
1751
1776
} ;
1752
1777
}
1753
1778
1754
- // Export a macro for the child modules:
1779
+ /// Tests for https://github.com/tafia/quick-xml/issues/469
1780
+ macro_rules! small_buffers {
1781
+ (
1782
+ #[ $test: meta]
1783
+ $read_event: ident: $BufReader: ty
1784
+ $( , $async: ident, $await: ident) ?
1785
+ ) => {
1786
+ mod small_buffers {
1787
+ use crate :: events:: { BytesCData , BytesDecl , BytesStart , BytesText , Event } ;
1788
+ use crate :: reader:: Reader ;
1789
+ use pretty_assertions:: assert_eq;
1790
+
1791
+ #[ $test]
1792
+ $( $async) ? fn decl( ) {
1793
+ let xml = "<?xml ?>" ;
1794
+ // ^^^^^^^ data that fit into buffer
1795
+ let size = xml. match_indices( "?>" ) . next( ) . unwrap( ) . 0 + 1 ;
1796
+ let br = <$BufReader>:: with_capacity( size, xml. as_bytes( ) ) ;
1797
+ let mut reader = Reader :: from_reader( br) ;
1798
+ let mut buf = Vec :: new( ) ;
1799
+
1800
+ assert_eq!(
1801
+ reader. $read_event( & mut buf) $( . $await) ? . unwrap( ) ,
1802
+ Event :: Decl ( BytesDecl :: from_start( BytesStart :: from_content( "xml " , 3 ) ) )
1803
+ ) ;
1804
+ assert_eq!(
1805
+ reader. $read_event( & mut buf) $( . $await) ? . unwrap( ) ,
1806
+ Event :: Eof
1807
+ ) ;
1808
+ }
1809
+
1810
+ #[ $test]
1811
+ $( $async) ? fn pi( ) {
1812
+ let xml = "<?pi?>" ;
1813
+ // ^^^^^ data that fit into buffer
1814
+ let size = xml. match_indices( "?>" ) . next( ) . unwrap( ) . 0 + 1 ;
1815
+ let br = <$BufReader>:: with_capacity( size, xml. as_bytes( ) ) ;
1816
+ let mut reader = Reader :: from_reader( br) ;
1817
+ let mut buf = Vec :: new( ) ;
1818
+
1819
+ assert_eq!(
1820
+ reader. $read_event( & mut buf) $( . $await) ? . unwrap( ) ,
1821
+ Event :: PI ( BytesText :: new( "pi" ) )
1822
+ ) ;
1823
+ assert_eq!(
1824
+ reader. $read_event( & mut buf) $( . $await) ? . unwrap( ) ,
1825
+ Event :: Eof
1826
+ ) ;
1827
+ }
1828
+
1829
+ #[ $test]
1830
+ $( $async) ? fn empty( ) {
1831
+ let xml = "<empty/>" ;
1832
+ // ^^^^^^^ data that fit into buffer
1833
+ let size = xml. match_indices( "/>" ) . next( ) . unwrap( ) . 0 + 1 ;
1834
+ let br = <$BufReader>:: with_capacity( size, xml. as_bytes( ) ) ;
1835
+ let mut reader = Reader :: from_reader( br) ;
1836
+ let mut buf = Vec :: new( ) ;
1837
+
1838
+ assert_eq!(
1839
+ reader. $read_event( & mut buf) $( . $await) ? . unwrap( ) ,
1840
+ Event :: Empty ( BytesStart :: new( "empty" ) )
1841
+ ) ;
1842
+ assert_eq!(
1843
+ reader. $read_event( & mut buf) $( . $await) ? . unwrap( ) ,
1844
+ Event :: Eof
1845
+ ) ;
1846
+ }
1847
+
1848
+ #[ $test]
1849
+ $( $async) ? fn cdata1( ) {
1850
+ let xml = "<![CDATA[cdata]]>" ;
1851
+ // ^^^^^^^^^^^^^^^ data that fit into buffer
1852
+ let size = xml. match_indices( "]]>" ) . next( ) . unwrap( ) . 0 + 1 ;
1853
+ let br = <$BufReader>:: with_capacity( size, xml. as_bytes( ) ) ;
1854
+ let mut reader = Reader :: from_reader( br) ;
1855
+ let mut buf = Vec :: new( ) ;
1856
+
1857
+ assert_eq!(
1858
+ reader. $read_event( & mut buf) $( . $await) ? . unwrap( ) ,
1859
+ Event :: CData ( BytesCData :: new( "cdata" ) )
1860
+ ) ;
1861
+ assert_eq!(
1862
+ reader. $read_event( & mut buf) $( . $await) ? . unwrap( ) ,
1863
+ Event :: Eof
1864
+ ) ;
1865
+ }
1866
+
1867
+ #[ $test]
1868
+ $( $async) ? fn cdata2( ) {
1869
+ let xml = "<![CDATA[cdata]]>" ;
1870
+ // ^^^^^^^^^^^^^^^^ data that fit into buffer
1871
+ let size = xml. match_indices( "]]>" ) . next( ) . unwrap( ) . 0 + 2 ;
1872
+ let br = <$BufReader>:: with_capacity( size, xml. as_bytes( ) ) ;
1873
+ let mut reader = Reader :: from_reader( br) ;
1874
+ let mut buf = Vec :: new( ) ;
1875
+
1876
+ assert_eq!(
1877
+ reader. $read_event( & mut buf) $( . $await) ? . unwrap( ) ,
1878
+ Event :: CData ( BytesCData :: new( "cdata" ) )
1879
+ ) ;
1880
+ assert_eq!(
1881
+ reader. $read_event( & mut buf) $( . $await) ? . unwrap( ) ,
1882
+ Event :: Eof
1883
+ ) ;
1884
+ }
1885
+
1886
+ #[ $test]
1887
+ $( $async) ? fn comment1( ) {
1888
+ let xml = "<!--comment-->" ;
1889
+ // ^^^^^^^^^^^^ data that fit into buffer
1890
+ let size = xml. match_indices( "-->" ) . next( ) . unwrap( ) . 0 + 1 ;
1891
+ let br = <$BufReader>:: with_capacity( size, xml. as_bytes( ) ) ;
1892
+ let mut reader = Reader :: from_reader( br) ;
1893
+ let mut buf = Vec :: new( ) ;
1894
+
1895
+ assert_eq!(
1896
+ reader. $read_event( & mut buf) $( . $await) ? . unwrap( ) ,
1897
+ Event :: Comment ( BytesText :: new( "comment" ) )
1898
+ ) ;
1899
+ assert_eq!(
1900
+ reader. $read_event( & mut buf) $( . $await) ? . unwrap( ) ,
1901
+ Event :: Eof
1902
+ ) ;
1903
+ }
1904
+
1905
+ #[ $test]
1906
+ $( $async) ? fn comment2( ) {
1907
+ let xml = "<!--comment-->" ;
1908
+ // ^^^^^^^^^^^^^ data that fit into buffer
1909
+ let size = xml. match_indices( "-->" ) . next( ) . unwrap( ) . 0 + 2 ;
1910
+ let br = <$BufReader>:: with_capacity( size, xml. as_bytes( ) ) ;
1911
+ let mut reader = Reader :: from_reader( br) ;
1912
+ let mut buf = Vec :: new( ) ;
1913
+
1914
+ assert_eq!(
1915
+ reader. $read_event( & mut buf) $( . $await) ? . unwrap( ) ,
1916
+ Event :: Comment ( BytesText :: new( "comment" ) )
1917
+ ) ;
1918
+ assert_eq!(
1919
+ reader. $read_event( & mut buf) $( . $await) ? . unwrap( ) ,
1920
+ Event :: Eof
1921
+ ) ;
1922
+ }
1923
+ }
1924
+ } ;
1925
+ }
1926
+
1927
+ // Export macros for the child modules:
1755
1928
// - buffered_reader
1756
1929
// - slice_reader
1757
1930
pub ( super ) use check;
1931
+ pub ( super ) use small_buffers;
1758
1932
}
0 commit comments