@@ -742,25 +742,50 @@ impl BangType {
742
742
743
743
/// If element is finished, returns its content up to `>` symbol and
744
744
/// an index of this symbol, otherwise returns `None`
745
+ ///
746
+ /// # Parameters
747
+ /// - `buf`: buffer with data consumed on previous iterations
748
+ /// - `chunk`: data read on current iteration and not yet consumed from reader
745
749
#[ inline( always) ]
746
- fn parse < ' b > ( & self , chunk : & ' b [ u8 ] , offset : usize ) -> Option < ( & ' b [ u8 ] , usize ) > {
750
+ fn parse < ' b > ( & self , buf : & [ u8 ] , chunk : & ' b [ u8 ] ) -> Option < ( & ' b [ u8 ] , usize ) > {
747
751
for i in memchr:: memchr_iter ( b'>' , chunk) {
748
752
match self {
749
753
// Need to read at least 6 symbols (`!---->`) for properly finished comment
750
754
// <!----> - XML comment
751
755
// 012345 - i
752
- Self :: Comment => {
753
- if offset + i > 4 && chunk[ ..i] . ends_with ( b"--" ) {
756
+ Self :: Comment if buf . len ( ) + i > 4 => {
757
+ if chunk[ ..i] . ends_with ( b"--" ) {
754
758
// We cannot strip last `--` from the buffer because we need it in case of
755
759
// check_comments enabled option. XML standard requires that comment
756
760
// will not end with `--->` sequence because this is a special case of
757
761
// `--` in the comment (https://www.w3.org/TR/xml11/#sec-comments)
758
762
return Some ( ( & chunk[ ..i] , i + 1 ) ) ; // +1 for `>`
759
763
}
764
+ // End sequence `-|->` was splitted at |
765
+ // buf --/ \-- chunk
766
+ if i == 1 && buf. ends_with ( b"-" ) && chunk[ 0 ] == b'-' {
767
+ return Some ( ( & chunk[ ..i] , i + 1 ) ) ; // +1 for `>`
768
+ }
769
+ // End sequence `--|>` was splitted at |
770
+ // buf --/ \-- chunk
771
+ if i == 0 && buf. ends_with ( b"--" ) {
772
+ return Some ( ( & [ ] , i + 1 ) ) ; // +1 for `>`
773
+ }
760
774
}
775
+ Self :: Comment => { }
761
776
Self :: CData => {
762
777
if chunk[ ..i] . ends_with ( b"]]" ) {
763
- return Some ( ( & chunk[ ..i - 2 ] , i + 1 ) ) ; // +1 for `>`
778
+ return Some ( ( & chunk[ ..i] , i + 1 ) ) ; // +1 for `>`
779
+ }
780
+ // End sequence `]|]>` was splitted at |
781
+ // buf --/ \-- chunk
782
+ if i == 1 && buf. ends_with ( b"]" ) && chunk[ 0 ] == b']' {
783
+ return Some ( ( & chunk[ ..i] , i + 1 ) ) ; // +1 for `>`
784
+ }
785
+ // End sequence `]]|>` was splitted at |
786
+ // buf --/ \-- chunk
787
+ if i == 0 && buf. ends_with ( b"]]" ) {
788
+ return Some ( ( & [ ] , i + 1 ) ) ; // +1 for `>`
764
789
}
765
790
}
766
791
Self :: DocType => {
@@ -1021,7 +1046,7 @@ mod test {
1021
1046
$( . $await) ?
1022
1047
. unwrap( )
1023
1048
. map( |( ty, data) | ( ty, Bytes ( data) ) ) ,
1024
- Some ( ( BangType :: CData , Bytes ( b"![CDATA[" ) ) )
1049
+ Some ( ( BangType :: CData , Bytes ( b"![CDATA[]] " ) ) )
1025
1050
) ;
1026
1051
assert_eq!( position, 11 ) ;
1027
1052
}
@@ -1042,7 +1067,7 @@ mod test {
1042
1067
$( . $await) ?
1043
1068
. unwrap( )
1044
1069
. map( |( ty, data) | ( ty, Bytes ( data) ) ) ,
1045
- Some ( ( BangType :: CData , Bytes ( b"![CDATA[cdata]] ]>content" ) ) )
1070
+ Some ( ( BangType :: CData , Bytes ( b"![CDATA[cdata]] ]>content]] " ) ) )
1046
1071
) ;
1047
1072
assert_eq!( position, 28 ) ;
1048
1073
}
0 commit comments