@@ -742,25 +742,50 @@ impl BangType {
742742
743743 /// If element is finished, returns its content up to `>` symbol and
744744 /// an index of this symbol, otherwise returns `None`
745+ ///
746+ /// # Parameters
747+ /// - `buf`: buffer with data consumed on previous iterations
748+ /// - `chunk`: data read on current iteration and not yet consumed from reader
745749 #[ inline( always) ]
746- fn parse < ' b > ( & self , chunk : & ' b [ u8 ] , offset : usize ) -> Option < ( & ' b [ u8 ] , usize ) > {
750+ fn parse < ' b > ( & self , buf : & [ u8 ] , chunk : & ' b [ u8 ] ) -> Option < ( & ' b [ u8 ] , usize ) > {
747751 for i in memchr:: memchr_iter ( b'>' , chunk) {
748752 match self {
749753 // Need to read at least 6 symbols (`!---->`) for properly finished comment
750754 // <!----> - XML comment
751755 // 012345 - i
752- Self :: Comment => {
753- if offset + i > 4 && chunk[ ..i] . ends_with ( b"--" ) {
756+ Self :: Comment if buf . len ( ) + i > 4 => {
757+ if chunk[ ..i] . ends_with ( b"--" ) {
754758 // We cannot strip last `--` from the buffer because we need it in case of
755759 // check_comments enabled option. XML standard requires that comment
756760 // will not end with `--->` sequence because this is a special case of
757761 // `--` in the comment (https://www.w3.org/TR/xml11/#sec-comments)
758762 return Some ( ( & chunk[ ..i] , i + 1 ) ) ; // +1 for `>`
759763 }
764+ // End sequence `-|->` was splitted at |
765+ // buf --/ \-- chunk
766+ if i == 1 && buf. ends_with ( b"-" ) && chunk[ 0 ] == b'-' {
767+ return Some ( ( & chunk[ ..i] , i + 1 ) ) ; // +1 for `>`
768+ }
769+ // End sequence `--|>` was splitted at |
770+ // buf --/ \-- chunk
771+ if i == 0 && buf. ends_with ( b"--" ) {
772+ return Some ( ( & [ ] , i + 1 ) ) ; // +1 for `>`
773+ }
760774 }
775+ Self :: Comment => { }
761776 Self :: CData => {
762777 if chunk[ ..i] . ends_with ( b"]]" ) {
763- return Some ( ( & chunk[ ..i - 2 ] , i + 1 ) ) ; // +1 for `>`
778+ return Some ( ( & chunk[ ..i] , i + 1 ) ) ; // +1 for `>`
779+ }
780+ // End sequence `]|]>` was splitted at |
781+ // buf --/ \-- chunk
782+ if i == 1 && buf. ends_with ( b"]" ) && chunk[ 0 ] == b']' {
783+ return Some ( ( & chunk[ ..i] , i + 1 ) ) ; // +1 for `>`
784+ }
785+ // End sequence `]]|>` was splitted at |
786+ // buf --/ \-- chunk
787+ if i == 0 && buf. ends_with ( b"]]" ) {
788+ return Some ( ( & [ ] , i + 1 ) ) ; // +1 for `>`
764789 }
765790 }
766791 Self :: DocType => {
@@ -1021,7 +1046,7 @@ mod test {
10211046 $( . $await) ?
10221047 . unwrap( )
10231048 . map( |( ty, data) | ( ty, Bytes ( data) ) ) ,
1024- Some ( ( BangType :: CData , Bytes ( b"![CDATA[" ) ) )
1049+ Some ( ( BangType :: CData , Bytes ( b"![CDATA[]] " ) ) )
10251050 ) ;
10261051 assert_eq!( position, 11 ) ;
10271052 }
@@ -1042,7 +1067,7 @@ mod test {
10421067 $( . $await) ?
10431068 . unwrap( )
10441069 . map( |( ty, data) | ( ty, Bytes ( data) ) ) ,
1045- Some ( ( BangType :: CData , Bytes ( b"![CDATA[cdata]] ]>content" ) ) )
1070+ Some ( ( BangType :: CData , Bytes ( b"![CDATA[cdata]] ]>content]] " ) ) )
10461071 ) ;
10471072 assert_eq!( position, 28 ) ;
10481073 }
0 commit comments