Skip to content

Commit 5467b6c

Browse files
committed
Use internal decoder of event instead of supplied one in parameters
1 parent 4af1fc4 commit 5467b6c

File tree

12 files changed

+48
-80
lines changed

12 files changed

+48
-80
lines changed

Changelog.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,8 @@
156156
because writer anyway works in UTF-8 only
157157
- [#428]: Changed the event and `Attributes` constructors to accept a `&str` slices instead of `&[u8]` slices.
158158
Handmade events has always been assumed to store their content UTF-8 encoded.
159+
- [#428]: Removed `Decoder` parameter from `_and_decode` versions of functions for
160+
`BytesText` (remember, that those functions was renamed in #415).
159161

160162
### New Tests
161163

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ loop {
5353
_ => (),
5454
}
5555
}
56-
Ok(Event::Text(e)) => txt.push(e.decode_and_unescape(&reader).unwrap().into_owned()),
56+
Ok(Event::Text(e)) => txt.push(e.unescape().unwrap().into_owned()),
5757

5858
// There are several other `Event`s we do not consider here
5959
_ => (),

benches/macrobenches.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ fn parse_document(doc: &[u8]) -> XmlResult<()> {
2828
}
2929
}
3030
Event::Text(e) => {
31-
criterion::black_box(e.decode_and_unescape(&r)?);
31+
criterion::black_box(e.unescape()?);
3232
}
3333
Event::CData(e) => {
3434
criterion::black_box(e.into_inner());

benches/microbenches.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -174,7 +174,7 @@ fn one_event(c: &mut Criterion) {
174174
.check_comments(false)
175175
.trim_text(true);
176176
match r.read_event_into(&mut buf) {
177-
Ok(Event::Comment(e)) => nbtxt += e.decode_and_unescape(&r).unwrap().len(),
177+
Ok(Event::Comment(e)) => nbtxt += e.unescape().unwrap().len(),
178178
something_else => panic!("Did not expect {:?}", something_else),
179179
};
180180

examples/custom_entities.rs

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -60,9 +60,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
6060
Ok(Event::Text(ref e)) => {
6161
println!(
6262
"text value: {}",
63-
e.decode_and_unescape_with(&reader, |ent| custom_entities
64-
.get(ent)
65-
.map(|s| s.as_str()))
63+
e.unescape_with(|ent| custom_entities.get(ent).map(|s| s.as_str()))
6664
.unwrap()
6765
);
6866
}

src/de/mod.rs

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -612,16 +612,15 @@ where
612612
unescape: bool,
613613
allow_start: bool,
614614
) -> Result<Cow<'de, str>, DeError> {
615-
let decoder = self.reader.decoder();
616615
match self.next()? {
617-
DeEvent::Text(e) => Ok(e.decode(decoder, unescape)?),
618-
DeEvent::CData(e) => Ok(e.decode(decoder)?),
616+
DeEvent::Text(e) => Ok(e.decode(unescape)?),
617+
DeEvent::CData(e) => Ok(e.decode()?),
619618
DeEvent::Start(e) if allow_start => {
620619
// allow one nested level
621620
let inner = self.next()?;
622621
let t = match inner {
623-
DeEvent::Text(t) => t.decode(decoder, unescape)?,
624-
DeEvent::CData(t) => t.decode(decoder)?,
622+
DeEvent::Text(t) => t.decode(unescape)?,
623+
DeEvent::CData(t) => t.decode()?,
625624
DeEvent::Start(s) => {
626625
return Err(DeError::UnexpectedStart(s.name().as_ref().to_owned()))
627626
}

src/events/mod.rs

Lines changed: 23 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@
2929
//!
3030
//! See [`Writer`] for further information.
3131
//!
32+
//! [`Reader::read_event_into`]: crate::reader::Reader::read_event_into
33+
//! [`Reader`]: crate::reader::Reader
3234
//! [`Writer`]: crate::writer::Writer
3335
//! [`Event`]: crate::events::Event
3436
@@ -44,7 +46,7 @@ use std::str::from_utf8;
4446
use crate::errors::{Error, Result};
4547
use crate::escape::{escape, partial_escape, unescape_with};
4648
use crate::name::{LocalName, QName};
47-
use crate::reader::{Decoder, Reader};
49+
use crate::reader::Decoder;
4850
use crate::utils::write_cow_string;
4951
use attributes::{Attribute, Attributes};
5052

@@ -84,9 +86,9 @@ impl<'a> BytesStartText<'a> {
8486
///
8587
/// This method does not unescapes content, because no escape sequences can
8688
/// appeared in the BOM or in the text before the first tag.
87-
pub fn decode_with_bom_removal(&self, decoder: Decoder) -> Result<String> {
89+
pub fn decode_with_bom_removal(&self) -> Result<String> {
8890
//TODO: Fix lifetime issue - it should be possible to borrow string
89-
let decoded = decoder.decode_with_bom_removal(&*self)?;
91+
let decoded = self.content.decoder.decode_with_bom_removal(&*self)?;
9092

9193
Ok(decoded.to_string())
9294
}
@@ -758,59 +760,23 @@ impl<'a> BytesText<'a> {
758760
}
759761
}
760762

761-
/// Decodes using UTF-8 then unescapes the content of the event.
762-
///
763-
/// Searches for '&' into content and try to escape the coded character if possible
764-
/// returns Malformed error with index within element if '&' is not followed by ';'
765-
///
766-
/// See also [`unescape_with()`](Self::unescape_with)
767-
///
768-
/// This method is available only if `encoding` feature is **not** enabled.
769-
#[cfg(any(doc, not(feature = "encoding")))]
770-
pub fn unescape(&self) -> Result<Cow<str>> {
771-
self.unescape_with(|_| None)
772-
}
773-
774-
/// Decodes using UTF-8 then unescapes the content of the event with custom entities.
775-
///
776-
/// Searches for '&' into content and try to escape the coded character if possible
777-
/// returns Malformed error with index within element if '&' is not followed by ';'
778-
/// A fallback resolver for additional custom entities can be provided via `resolve_entity`.
779-
///
780-
/// See also [`unescape()`](Self::unescape)
781-
///
782-
/// This method is available only if `encoding` feature is **not** enabled.
783-
#[cfg(any(doc, not(feature = "encoding")))]
784-
pub fn unescape_with<'entity>(
785-
&self,
786-
resolve_entity: impl Fn(&str) -> Option<&'entity str>,
787-
) -> Result<Cow<str>> {
788-
// from_utf8 should never fail because content is always UTF-8 encoded
789-
Ok(unescape_with(from_utf8(&self.content)?, resolve_entity)?)
790-
}
791-
792763
/// Decodes then unescapes the content of the event.
793764
///
794765
/// This will allocate if the value contains any escape sequences or in
795766
/// non-UTF-8 encoding.
796-
pub fn decode_and_unescape<B>(&self, reader: &Reader<B>) -> Result<Cow<str>> {
797-
self.decode_and_unescape_with(reader, |_| None)
767+
pub fn unescape(&self) -> Result<Cow<str>> {
768+
self.unescape_with(|_| None)
798769
}
799770

800771
/// Decodes then unescapes the content of the event with custom entities.
801772
///
802773
/// This will allocate if the value contains any escape sequences or in
803774
/// non-UTF-8 encoding.
804-
///
805-
/// # Pre-condition
806-
///
807-
/// The implementation of `resolve_entity` is expected to operate over UTF-8 inputs.
808-
pub fn decode_and_unescape_with<'entity, B>(
775+
pub fn unescape_with<'entity>(
809776
&self,
810-
reader: &Reader<B>,
811777
resolve_entity: impl Fn(&str) -> Option<&'entity str>,
812778
) -> Result<Cow<str>> {
813-
let decoded = reader.decoder().decode(&*self)?;
779+
let decoded = self.decoder.decode(&*self)?;
814780

815781
match unescape_with(&decoded, resolve_entity)? {
816782
// Because result is borrowed, no replacements was done and we can use original string
@@ -820,15 +786,15 @@ impl<'a> BytesText<'a> {
820786
}
821787

822788
/// Gets content of this text buffer in the specified encoding and optionally
823-
/// unescapes it. Unlike [`Self::decode_and_unescape`] & Co., the lifetime
789+
/// unescapes it. Unlike [`Self::unescape`] & Co., the lifetime
824790
/// of the returned `Cow` is bound to the original buffer / input
825791
#[cfg(feature = "serialize")]
826-
pub(crate) fn decode(&self, decoder: Decoder, unescape: bool) -> Result<Cow<'a, str>> {
792+
pub(crate) fn decode(&self, unescape: bool) -> Result<Cow<'a, str>> {
827793
//TODO: too many copies, can be optimized
828794
let text = match &self.content {
829-
Cow::Borrowed(bytes) => decoder.decode(bytes)?,
795+
Cow::Borrowed(bytes) => self.decoder.decode(bytes)?,
830796
// Convert to owned, because otherwise Cow will be bound with wrong lifetime
831-
Cow::Owned(bytes) => decoder.decode(bytes)?.into_owned().into(),
797+
Cow::Owned(bytes) => self.decoder.decode(bytes)?.into_owned().into(),
832798
};
833799
let text = if unescape {
834800
//FIXME: need to take into account entities defined in the document
@@ -930,8 +896,8 @@ impl<'a> BytesCData<'a> {
930896
/// | `&` | `&amp;`
931897
/// | `'` | `&apos;`
932898
/// | `"` | `&quot;`
933-
pub fn escape(self, decoder: Decoder) -> Result<BytesText<'a>> {
934-
let decoded = self.decode(decoder)?;
899+
pub fn escape(self) -> Result<BytesText<'a>> {
900+
let decoded = self.decode()?;
935901
Ok(BytesText::wrap(
936902
match escape(&decoded) {
937903
// Because result is borrowed, no replacements was done and we can use original content
@@ -955,8 +921,8 @@ impl<'a> BytesCData<'a> {
955921
/// | `<` | `&lt;`
956922
/// | `>` | `&gt;`
957923
/// | `&` | `&amp;`
958-
pub fn partial_escape(self, decoder: Decoder) -> Result<BytesText<'a>> {
959-
let decoded = self.decode(decoder)?;
924+
pub fn partial_escape(self) -> Result<BytesText<'a>> {
925+
let decoded = self.decode()?;
960926
Ok(BytesText::wrap(
961927
match partial_escape(&decoded) {
962928
// Because result is borrowed, no replacements was done and we can use original content
@@ -968,11 +934,11 @@ impl<'a> BytesCData<'a> {
968934
}
969935

970936
/// Gets content of this text buffer in the specified encoding
971-
pub(crate) fn decode(&self, decoder: Decoder) -> Result<Cow<'a, str>> {
937+
pub(crate) fn decode(&self) -> Result<Cow<'a, str>> {
972938
Ok(match &self.content {
973-
Cow::Borrowed(bytes) => decoder.decode(bytes)?,
939+
Cow::Borrowed(bytes) => self.decoder.decode(bytes)?,
974940
// Convert to owned, because otherwise Cow will be bound with wrong lifetime
975-
Cow::Owned(bytes) => decoder.decode(bytes)?.into_owned().into(),
941+
Cow::Owned(bytes) => self.decoder.decode(bytes)?.into_owned().into(),
976942
})
977943
}
978944
}
@@ -996,6 +962,8 @@ impl<'a> Deref for BytesCData<'a> {
996962
////////////////////////////////////////////////////////////////////////////////////////////////////
997963

998964
/// Event emitted by [`Reader::read_event_into`].
965+
///
966+
/// [`Reader::read_event_into`]: crate::reader::Reader::read_event_into
999967
#[derive(Clone, Debug, Eq, PartialEq)]
1000968
pub enum Event<'a> {
1001969
/// Text that appeared before the first opening tag or an [XML declaration].
@@ -1044,6 +1012,7 @@ pub enum Event<'a> {
10441012
///
10451013
/// [XML declaration]: Event::Decl
10461014
/// [std]: https://www.w3.org/TR/xml11/#NT-document
1015+
/// [`Reader`]: crate::reader::Reader
10471016
/// [`Writer`]: crate::writer::Writer
10481017
StartText(BytesStartText<'a>),
10491018
/// Start tag (with attributes) `<tag attr="value">`.

src/reader/buffered_reader.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ impl<R: BufRead> Reader<R> {
4848
/// loop {
4949
/// match reader.read_event_into(&mut buf) {
5050
/// Ok(Event::Start(ref e)) => count += 1,
51-
/// Ok(Event::Text(e)) => txt.push(e.decode_and_unescape(&reader).unwrap().into_owned()),
51+
/// Ok(Event::Text(e)) => txt.push(e.unescape().unwrap().into_owned()),
5252
/// Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e),
5353
/// Ok(Event::Eof) => break,
5454
/// _ => (),
@@ -207,7 +207,7 @@ impl<R: BufRead> Reader<R> {
207207
let s = match self.read_event_into(buf) {
208208
Err(e) => return Err(e),
209209

210-
Ok(Event::Text(e)) => e.decode_and_unescape(self)?.into_owned(),
210+
Ok(Event::Text(e)) => e.unescape()?.into_owned(),
211211
Ok(Event::End(e)) if e.name() == end => return Ok("".to_string()),
212212
Ok(Event::Eof) => return Err(Error::UnexpectedEof("Text".to_string())),
213213
_ => return Err(Error::TextNotFound),

src/reader/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -269,7 +269,7 @@ impl EncodingRef {
269269
/// _ => (),
270270
/// }
271271
/// }
272-
/// Ok(Event::Text(e)) => txt.push(e.decode_and_unescape(&reader).unwrap().into_owned()),
272+
/// Ok(Event::Text(e)) => txt.push(e.unescape().unwrap().into_owned()),
273273
///
274274
/// // There are several other `Event`s we do not consider here
275275
/// _ => (),

src/reader/ns_reader.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -329,7 +329,7 @@ impl<R: BufRead> NsReader<R> {
329329
/// }
330330
/// }
331331
/// Event::Text(e) => {
332-
/// txt.push(e.decode_and_unescape(&reader).unwrap().into_owned())
332+
/// txt.push(e.unescape().unwrap().into_owned())
333333
/// }
334334
/// Event::Eof => break,
335335
/// _ => (),
@@ -388,7 +388,7 @@ impl<R: BufRead> NsReader<R> {
388388
/// (_, Event::Start(_)) => unreachable!(),
389389
///
390390
/// (_, Event::Text(e)) => {
391-
/// txt.push(e.decode_and_unescape(&reader).unwrap().into_owned())
391+
/// txt.push(e.unescape().unwrap().into_owned())
392392
/// }
393393
/// (_, Event::Eof) => break,
394394
/// _ => (),
@@ -566,7 +566,7 @@ impl<'i> NsReader<&'i [u8]> {
566566
/// }
567567
/// }
568568
/// Event::Text(e) => {
569-
/// txt.push(e.decode_and_unescape(&reader).unwrap().into_owned())
569+
/// txt.push(e.unescape().unwrap().into_owned())
570570
/// }
571571
/// Event::Eof => break,
572572
/// _ => (),
@@ -624,7 +624,7 @@ impl<'i> NsReader<&'i [u8]> {
624624
/// (_, Event::Start(_)) => unreachable!(),
625625
///
626626
/// (_, Event::Text(e)) => {
627-
/// txt.push(e.decode_and_unescape(&reader).unwrap().into_owned())
627+
/// txt.push(e.unescape().unwrap().into_owned())
628628
/// }
629629
/// (_, Event::Eof) => break,
630630
/// _ => (),

tests/test.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ fn test_koi8_r_encoding() {
9898
loop {
9999
match r.read_event() {
100100
Ok(Text(e)) => {
101-
e.decode_and_unescape(&r).unwrap();
101+
e.unescape().unwrap();
102102
}
103103
Ok(Eof) => break,
104104
_ => (),
@@ -157,7 +157,7 @@ fn fuzz_101() {
157157
}
158158
}
159159
Ok(Text(e)) => {
160-
if e.decode_and_unescape(&reader).is_err() {
160+
if e.unescape().is_err() {
161161
break;
162162
}
163163
}

tests/unit_tests.rs

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -506,7 +506,7 @@ fn test_escaped_content() {
506506
"content unexpected: expecting '&lt;test&gt;', got '{:?}'",
507507
from_utf8(&*e)
508508
);
509-
match e.decode_and_unescape(&r) {
509+
match e.unescape() {
510510
Ok(c) => assert_eq!(c, "<test>"),
511511
Err(e) => panic!(
512512
"cannot escape content at position {}: {:?}",
@@ -595,7 +595,7 @@ fn test_read_write_roundtrip_escape_text() -> Result<()> {
595595
match reader.read_event()? {
596596
Eof => break,
597597
Text(e) => {
598-
let t = e.decode_and_unescape(&reader).unwrap();
598+
let t = e.unescape().unwrap();
599599
assert!(writer
600600
.write_event(Text(BytesText::from_plain_str(&t)))
601601
.is_ok());
@@ -737,7 +737,7 @@ mod decode_with_bom_removal {
737737

738738
loop {
739739
match reader.read_event() {
740-
Ok(StartText(e)) => txt.push(e.decode_with_bom_removal(reader.decoder()).unwrap()),
740+
Ok(StartText(e)) => txt.push(e.decode_with_bom_removal().unwrap()),
741741
Ok(Eof) => break,
742742
_ => (),
743743
}
@@ -760,7 +760,7 @@ mod decode_with_bom_removal {
760760

761761
loop {
762762
match reader.read_event() {
763-
Ok(StartText(e)) => txt.push(e.decode_with_bom_removal(reader.decoder()).unwrap()),
763+
Ok(StartText(e)) => txt.push(e.decode_with_bom_removal().unwrap()),
764764
Ok(Eof) => break,
765765
_ => (),
766766
}
@@ -778,7 +778,7 @@ mod decode_with_bom_removal {
778778

779779
loop {
780780
match reader.read_event() {
781-
Ok(StartText(e)) => txt.push(e.decode_with_bom_removal(reader.decoder()).unwrap()),
781+
Ok(StartText(e)) => txt.push(e.decode_with_bom_removal().unwrap()),
782782
Ok(Eof) => break,
783783
_ => (),
784784
}
@@ -798,7 +798,7 @@ mod decode_with_bom_removal {
798798

799799
loop {
800800
match reader.read_event() {
801-
Ok(StartText(e)) => txt.push(e.decode_with_bom_removal(reader.decoder()).unwrap()),
801+
Ok(StartText(e)) => txt.push(e.decode_with_bom_removal().unwrap()),
802802
Ok(Eof) => break,
803803
_ => (),
804804
}

0 commit comments

Comments
 (0)