diff --git a/src/de/map.rs b/src/de/map.rs index 9f2a9876..a4be57e8 100644 --- a/src/de/map.rs +++ b/src/de/map.rs @@ -540,8 +540,16 @@ where where V: Visitor<'de>, { - match self.map.de.peek()? { + let _ = self.map.de.peek()?; + match self.map.de.readable_peek().expect("This exists as we called peek before") { DeEvent::Text(t) if t.is_empty() => visitor.visit_none(), + DeEvent::Start(start) + // if the `xsi:nil` attribute is set to true we got a none value + if start.has_nil_attr(&self.map.de.reader.reader) => + { + self.map.de.skip_nil_tag()?; + visitor.visit_none() + } _ => visitor.visit_some(self), } } diff --git a/src/de/mod.rs b/src/de/mod.rs index 484c31b0..9cfddd66 100644 --- a/src/de/mod.rs +++ b/src/de/mod.rs @@ -2010,14 +2010,15 @@ pub use self::resolver::{EntityResolver, PredefinedEntityResolver}; pub use self::simple_type::SimpleTypeDeserializer; pub use crate::errors::serialize::DeError; +use crate::name::{LocalName, Namespace, PrefixDeclaration, ResolveResult}; use crate::{ de::map::ElementMapAccess, encoding::Decoder, errors::Error, events::{BytesCData, BytesEnd, BytesStart, BytesText, Event}, name::QName, - reader::Reader, utils::CowRef, + NsReader, }; use serde::de::{ self, Deserialize, DeserializeOwned, DeserializeSeed, IntoDeserializer, SeqAccess, Visitor, @@ -2534,6 +2535,17 @@ where } } + fn readable_peek(&self) -> Option<&DeEvent<'de>> { + #[cfg(not(feature = "overlapped-lists"))] + { + self.peek.as_ref() + } + #[cfg(feature = "overlapped-lists")] + { + self.read.front() + } + } + fn next(&mut self) -> Result, DeError> { // Replay skipped or peeked events #[cfg(feature = "overlapped-lists")] @@ -2764,6 +2776,14 @@ where } self.reader.read_to_end(name) } + + fn skip_nil_tag(&mut self) -> Result<(), DeError> { + let DeEvent::Start(start) = self.next()? else { + unreachable!("Only call this if the next event is a start event") + }; + let name = start.name(); + self.read_to_end(name) + } } impl<'de> Deserializer<'de, SliceReader<'de>> { @@ -2783,7 +2803,7 @@ where /// Create new deserializer that will borrow data from the specified string /// and use specified entity resolver. pub fn from_str_with_resolver(source: &'de str, entity_resolver: E) -> Self { - let mut reader = Reader::from_str(source); + let mut reader = NsReader::from_str(source); let config = reader.config_mut(); config.expand_empty_elements = true; @@ -2826,7 +2846,7 @@ where /// will borrow instead of copy. If you have `&[u8]` which is known to represent /// UTF-8, you can decode it first before using [`from_str`]. pub fn with_resolver(reader: R, entity_resolver: E) -> Self { - let mut reader = Reader::from_reader(reader); + let mut reader = NsReader::from_reader(reader); let config = reader.config_mut(); config.expand_empty_elements = true; @@ -2945,9 +2965,17 @@ where where V: Visitor<'de>, { - match self.peek()? { + let _ = self.peek()?; + match self.readable_peek().expect("This exists as we called peek before") { DeEvent::Text(t) if t.is_empty() => visitor.visit_none(), DeEvent::Eof => visitor.visit_none(), + DeEvent::Start(start) + // if the `xsi:nil` attribute is set to true we got a none value + if start.has_nil_attr(&self.reader.reader) => + { + self.skip_nil_tag()?; + visitor.visit_none() + } _ => visitor.visit_some(self), } } @@ -3071,6 +3099,14 @@ pub trait XmlRead<'i> { /// A copy of the reader's decoder used to decode strings. fn decoder(&self) -> Decoder; + + /// Resolves a potentially qualified **attribute name** into _(namespace name, local name)_. + /// + /// See [`NsReader::resolve_attribute`] for details + fn resolve_attribute<'n>(&self, name: QName<'n>) -> (ResolveResult, LocalName<'n>); + + /// Get the current default namespace + fn default_namespace(&self) -> Option>; } /// XML input source that reads from a std::io input stream. @@ -3078,7 +3114,7 @@ pub trait XmlRead<'i> { /// You cannot create it, it is created automatically when you call /// [`Deserializer::from_reader`] pub struct IoReader { - reader: Reader, + reader: NsReader, start_trimmer: StartTrimmer, buf: Vec, } @@ -3113,7 +3149,7 @@ impl IoReader { /// assert_eq!(reader.error_position(), 28); /// assert_eq!(reader.buffer_position(), 41); /// ``` - pub const fn get_ref(&self) -> &Reader { + pub const fn get_ref(&self) -> &NsReader { &self.reader } } @@ -3140,6 +3176,20 @@ impl<'i, R: BufRead> XmlRead<'i> for IoReader { fn decoder(&self) -> Decoder { self.reader.decoder() } + + fn resolve_attribute<'n>(&self, name: QName<'n>) -> (ResolveResult, LocalName<'n>) { + self.reader.resolve_attribute(name) + } + + fn default_namespace(&self) -> Option> { + self.reader.prefixes().find_map(|(key, value)| { + if PrefixDeclaration::Default == key { + Some(value) + } else { + None + } + }) + } } /// XML input source that reads from a slice of bytes and can borrow from it. @@ -3147,7 +3197,7 @@ impl<'i, R: BufRead> XmlRead<'i> for IoReader { /// You cannot create it, it is created automatically when you call /// [`Deserializer::from_str`]. pub struct SliceReader<'de> { - reader: Reader<&'de [u8]>, + reader: NsReader<&'de [u8]>, start_trimmer: StartTrimmer, } @@ -3180,7 +3230,7 @@ impl<'de> SliceReader<'de> { /// assert_eq!(reader.error_position(), 28); /// assert_eq!(reader.buffer_position(), 41); /// ``` - pub const fn get_ref(&self) -> &Reader<&'de [u8]> { + pub const fn get_ref(&self) -> &NsReader<&'de [u8]> { &self.reader } } @@ -3205,6 +3255,20 @@ impl<'de> XmlRead<'de> for SliceReader<'de> { fn decoder(&self) -> Decoder { self.reader.decoder() } + + fn resolve_attribute<'n>(&self, name: QName<'n>) -> (ResolveResult, LocalName<'n>) { + self.reader.resolve_attribute(name) + } + + fn default_namespace(&self) -> Option> { + self.reader.prefixes().find_map(|(key, value)| { + if PrefixDeclaration::Default == key { + Some(value) + } else { + None + } + }) + } } #[cfg(test)] @@ -3781,12 +3845,12 @@ mod tests { "#; let mut reader1 = IoReader { - reader: Reader::from_reader(s.as_bytes()), + reader: NsReader::from_reader(s.as_bytes()), start_trimmer: StartTrimmer::default(), buf: Vec::new(), }; let mut reader2 = SliceReader { - reader: Reader::from_str(s), + reader: NsReader::from_str(s), start_trimmer: StartTrimmer::default(), }; @@ -3812,7 +3876,7 @@ mod tests { "#; let mut reader = SliceReader { - reader: Reader::from_str(s), + reader: NsReader::from_str(s), start_trimmer: StartTrimmer::default(), }; diff --git a/src/events/mod.rs b/src/events/mod.rs index ded2c6d0..4bae2ce5 100644 --- a/src/events/mod.rs +++ b/src/events/mod.rs @@ -51,12 +51,17 @@ use crate::errors::{Error, IllFormedError}; use crate::escape::{ escape, minimal_escape, partial_escape, resolve_predefined_entity, unescape_with, }; +#[cfg(feature = "serialize")] +use crate::name::Namespace; use crate::name::{LocalName, QName}; #[cfg(feature = "serialize")] use crate::utils::CowRef; use crate::utils::{name_len, trim_xml_end, trim_xml_start, write_cow_string, Bytes}; use attributes::{AttrError, Attribute, Attributes}; +#[cfg(feature = "serialize")] +const XSI_NAMESPACE_URL: Namespace = Namespace(b"http://www.w3.org/2001/XMLSchema-instance"); + /// Opening tag data (`Event::Start`), with optional attributes: ``. /// /// The name can be accessed using the [`name`] or [`local_name`] methods. @@ -232,6 +237,34 @@ impl<'a> BytesStart<'a> { Cow::Owned(ref o) => CowRef::Slice(&o[..self.name_len]), } } + + /// This method checks if the current tag has a `xsi::nil` attribute + /// + /// This attribute should be used for deciding if the value is not set + /// according to https://www.w3.org/TR/xmlschema-1/#xsi_nil + #[cfg(feature = "serialize")] + pub(crate) fn has_nil_attr(&self, reader: &dyn crate::de::XmlRead) -> bool { + use crate::name::ResolveResult; + + let default_ns = reader.default_namespace(); + self.attributes().any(|attr| { + if let Ok(attr) = attr { + let value_is_true = &*attr.value == b"true" || &*attr.value == b"1"; + let might_be_nil_attr = attr.key.0.ends_with(b"nil"); + if value_is_true && might_be_nil_attr { + let (res, local_name) = reader.resolve_attribute(attr.key); + (matches!(res, ResolveResult::Bound(XSI_NAMESPACE_URL)) + || (matches!(res, ResolveResult::Unbound) + && default_ns == Some(XSI_NAMESPACE_URL))) + && local_name.as_ref() == b"nil" + } else { + false + } + } else { + false + } + }) + } } /// Attribute-related methods @@ -278,7 +311,7 @@ impl<'a> BytesStart<'a> { } /// Returns an iterator over the attributes of this tag. - pub fn attributes(&self) -> Attributes { + pub fn attributes<'b>(&'b self) -> Attributes<'b> { Attributes::wrap(&self.buf, self.name_len, false) } diff --git a/tests/serde-de.rs b/tests/serde-de.rs index 75526b40..a63f9a81 100644 --- a/tests/serde-de.rs +++ b/tests/serde-de.rs @@ -1639,3 +1639,134 @@ mod xml_prolog { ); } } + +mod nil { + use quick_xml::DeError; + + #[derive(Debug, serde::Deserialize, PartialEq)] + struct Foo { + tag: String, + } + + #[derive(Debug, serde::Deserialize, PartialEq)] + struct Bar { + foo: Option, + } + + #[derive(Debug, PartialEq, serde::Deserialize)] + struct Helper { + #[serde(rename = "@attr")] + attr: String, + #[serde(rename = "$value")] + inner: Option, + } + + #[derive(Debug, PartialEq, serde::Deserialize)] + struct BarWithHelper { + foo: Helper, + } + + macro_rules! assert_error_matches { + ($res: expr, $err: pat) => { + assert!( + matches!($res, Err($err)), + concat!("Expected `", stringify!($err), "`, but got `{:?}`"), + $res + ); + }; + } + + #[test] + fn true_() { + let data = r#""#; + + let res = quick_xml::de::from_str::>(data).unwrap(); + assert_eq!(res, None); + + let data = r#""#; + let res = quick_xml::de::from_str::(data).unwrap(); + assert_eq!( + res, + Helper { + attr: String::from("value"), + inner: None + } + ); + + let data = r#"Foo"#; + let res = quick_xml::de::from_str::>(data).unwrap(); + assert_eq!(res, None); + } + + #[test] + fn false_() { + let data = + r#""#; + let res = quick_xml::de::from_str::>(data); + assert_error_matches!(res, DeError::Custom(_)); + let data = r#""#; + let res = quick_xml::de::from_str::>(data); + assert_error_matches!(res, DeError::Custom(_)); + let data = r#"Foo"#; + let res = quick_xml::de::from_str::>(data).unwrap(); + assert_eq!( + res, + Some(Foo { + tag: String::from("Foo") + }) + ); + } + + #[test] + fn nested_nil_true() { + let data = r#""#; + let res = quick_xml::de::from_str::(data).unwrap(); + assert_eq!(res, Bar { foo: None }); + + let data = r#""#; + let res = quick_xml::de::from_str::(data).unwrap(); + assert_eq!( + res, + BarWithHelper { + foo: Helper { + attr: String::from("value"), + inner: None + } + } + ); + + let data = r#"Foo"#; + let res = quick_xml::de::from_str::(data).unwrap(); + assert_eq!(res, Bar { foo: None }); + } + + #[test] + fn nested_nil_false() { + let data = r#""#; + let res = quick_xml::de::from_str::(data); + assert_error_matches!(res, DeError::Custom(_)); + + let data = r#""#; + let res = quick_xml::de::from_str::(data).unwrap(); + assert_eq!( + res, + BarWithHelper { + foo: Helper { + attr: String::from("value"), + inner: None + } + } + ); + + let data = r#"Foo"#; + let res = quick_xml::de::from_str::(data).unwrap(); + assert_eq!( + res, + Bar { + foo: Some(Foo { + tag: String::from("Foo") + }) + } + ); + } +}