Skip to content

Commit

Permalink
Properly use xsi:nil to deserialize null values via serde
Browse files Browse the repository at this point in the history
This commit fixes an issue that causes `quick-xml` trying to deserialize
empty tags via the serde interface even if these tags were explicitly
marked as `xsi:nil="true"`

For example the following XML failed to deserialize before this commit:

```xml
<bar>
	<foo xsi:nil="true"/>
</bar>
```

into the following rust type:

```rust
#[derive(Deserialize)]
struct Bar {
    foo: Option<Inner>,
}

#[derive(Deserialize)]
struct Foo {
    baz: String,
}
```

Before this commit this failed to deserialize with an error message that
complained that the `baz` field was missing. After this commit this uses
the `xsi:nil` attribute to deserialize this into `foo: None` instead.
The standard (https://www.w3.org/TR/xmlschema-1/#xsi_nil) seems to
support this behaviour. 

Fix #497
  • Loading branch information
weiznich committed Jan 29, 2025
1 parent 8f91a9c commit ad89a00
Show file tree
Hide file tree
Showing 4 changed files with 249 additions and 13 deletions.
10 changes: 9 additions & 1 deletion src/de/map.rs
Original file line number Diff line number Diff line change
Expand Up @@ -540,8 +540,16 @@ where
where
V: Visitor<'de>,
{
match self.map.de.peek()? {
let _ = self.map.de.peek()?;
match self.map.de.readable_peek().expect("This exists as we called peek before") {
DeEvent::Text(t) if t.is_empty() => visitor.visit_none(),
DeEvent::Start(start)
// if the `xsi:nil` attribute is set to true we got a none value
if start.has_nil_attr(&self.map.de.reader.reader) =>
{
self.map.de.skip_nil_tag()?;
visitor.visit_none()
}
_ => visitor.visit_some(self),
}
}
Expand Down
86 changes: 75 additions & 11 deletions src/de/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2010,14 +2010,15 @@ pub use self::resolver::{EntityResolver, PredefinedEntityResolver};
pub use self::simple_type::SimpleTypeDeserializer;
pub use crate::errors::serialize::DeError;

use crate::name::{LocalName, Namespace, PrefixDeclaration, ResolveResult};
use crate::{
de::map::ElementMapAccess,
encoding::Decoder,
errors::Error,
events::{BytesCData, BytesEnd, BytesStart, BytesText, Event},
name::QName,
reader::Reader,
utils::CowRef,
NsReader,
};
use serde::de::{
self, Deserialize, DeserializeOwned, DeserializeSeed, IntoDeserializer, SeqAccess, Visitor,
Expand Down Expand Up @@ -2534,6 +2535,17 @@ where
}
}

fn readable_peek(&self) -> Option<&DeEvent<'de>> {
#[cfg(not(feature = "overlapped-lists"))]
{
self.peek.as_ref()
}
#[cfg(feature = "overlapped-lists")]
{
self.read.front()
}
}

fn next(&mut self) -> Result<DeEvent<'de>, DeError> {
// Replay skipped or peeked events
#[cfg(feature = "overlapped-lists")]
Expand Down Expand Up @@ -2764,6 +2776,14 @@ where
}
self.reader.read_to_end(name)
}

fn skip_nil_tag(&mut self) -> Result<(), DeError> {
let DeEvent::Start(start) = self.next()? else {
unreachable!("Only call this if the next event is a start event")
};
let name = start.name();
self.read_to_end(name)
}
}

impl<'de> Deserializer<'de, SliceReader<'de>> {
Expand All @@ -2783,7 +2803,7 @@ where
/// Create new deserializer that will borrow data from the specified string
/// and use specified entity resolver.
pub fn from_str_with_resolver(source: &'de str, entity_resolver: E) -> Self {
let mut reader = Reader::from_str(source);
let mut reader = NsReader::from_str(source);
let config = reader.config_mut();
config.expand_empty_elements = true;

Expand Down Expand Up @@ -2826,7 +2846,7 @@ where
/// will borrow instead of copy. If you have `&[u8]` which is known to represent
/// UTF-8, you can decode it first before using [`from_str`].
pub fn with_resolver(reader: R, entity_resolver: E) -> Self {
let mut reader = Reader::from_reader(reader);
let mut reader = NsReader::from_reader(reader);
let config = reader.config_mut();
config.expand_empty_elements = true;

Expand Down Expand Up @@ -2945,9 +2965,17 @@ where
where
V: Visitor<'de>,
{
match self.peek()? {
let _ = self.peek()?;
match self.readable_peek().expect("This exists as we called peek before") {
DeEvent::Text(t) if t.is_empty() => visitor.visit_none(),
DeEvent::Eof => visitor.visit_none(),
DeEvent::Start(start)
// if the `xsi:nil` attribute is set to true we got a none value
if start.has_nil_attr(&self.reader.reader) =>
{
self.skip_nil_tag()?;
visitor.visit_none()
}
_ => visitor.visit_some(self),
}
}
Expand Down Expand Up @@ -3071,14 +3099,22 @@ pub trait XmlRead<'i> {

/// A copy of the reader's decoder used to decode strings.
fn decoder(&self) -> Decoder;

/// Resolves a potentially qualified **attribute name** into _(namespace name, local name)_.
///
/// See [`NsReader::resolve_attribute`] for details
fn resolve_attribute<'n>(&self, name: QName<'n>) -> (ResolveResult, LocalName<'n>);

/// Get the current default namespace
fn default_namespace(&self) -> Option<Namespace<'_>>;
}

/// XML input source that reads from a std::io input stream.
///
/// You cannot create it, it is created automatically when you call
/// [`Deserializer::from_reader`]
pub struct IoReader<R: BufRead> {
reader: Reader<R>,
reader: NsReader<R>,
start_trimmer: StartTrimmer,
buf: Vec<u8>,
}
Expand Down Expand Up @@ -3113,7 +3149,7 @@ impl<R: BufRead> IoReader<R> {
/// assert_eq!(reader.error_position(), 28);
/// assert_eq!(reader.buffer_position(), 41);
/// ```
pub const fn get_ref(&self) -> &Reader<R> {
pub const fn get_ref(&self) -> &NsReader<R> {
&self.reader
}
}
Expand All @@ -3140,14 +3176,28 @@ impl<'i, R: BufRead> XmlRead<'i> for IoReader<R> {
fn decoder(&self) -> Decoder {
self.reader.decoder()
}

fn resolve_attribute<'n>(&self, name: QName<'n>) -> (ResolveResult, LocalName<'n>) {
self.reader.resolve_attribute(name)
}

fn default_namespace(&self) -> Option<Namespace<'_>> {
self.reader.prefixes().find_map(|(key, value)| {
if PrefixDeclaration::Default == key {
Some(value)
} else {
None
}
})
}
}

/// XML input source that reads from a slice of bytes and can borrow from it.
///
/// You cannot create it, it is created automatically when you call
/// [`Deserializer::from_str`].
pub struct SliceReader<'de> {
reader: Reader<&'de [u8]>,
reader: NsReader<&'de [u8]>,
start_trimmer: StartTrimmer,
}

Expand Down Expand Up @@ -3180,7 +3230,7 @@ impl<'de> SliceReader<'de> {
/// assert_eq!(reader.error_position(), 28);
/// assert_eq!(reader.buffer_position(), 41);
/// ```
pub const fn get_ref(&self) -> &Reader<&'de [u8]> {
pub const fn get_ref(&self) -> &NsReader<&'de [u8]> {
&self.reader
}
}
Expand All @@ -3205,6 +3255,20 @@ impl<'de> XmlRead<'de> for SliceReader<'de> {
fn decoder(&self) -> Decoder {
self.reader.decoder()
}

fn resolve_attribute<'n>(&self, name: QName<'n>) -> (ResolveResult, LocalName<'n>) {
self.reader.resolve_attribute(name)
}

fn default_namespace(&self) -> Option<Namespace<'_>> {
self.reader.prefixes().find_map(|(key, value)| {
if PrefixDeclaration::Default == key {
Some(value)
} else {
None
}
})
}
}

#[cfg(test)]
Expand Down Expand Up @@ -3781,12 +3845,12 @@ mod tests {
"#;

let mut reader1 = IoReader {
reader: Reader::from_reader(s.as_bytes()),
reader: NsReader::from_reader(s.as_bytes()),
start_trimmer: StartTrimmer::default(),
buf: Vec::new(),
};
let mut reader2 = SliceReader {
reader: Reader::from_str(s),
reader: NsReader::from_str(s),
start_trimmer: StartTrimmer::default(),
};

Expand All @@ -3812,7 +3876,7 @@ mod tests {
"#;

let mut reader = SliceReader {
reader: Reader::from_str(s),
reader: NsReader::from_str(s),
start_trimmer: StartTrimmer::default(),
};

Expand Down
35 changes: 34 additions & 1 deletion src/events/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,12 +51,17 @@ use crate::errors::{Error, IllFormedError};
use crate::escape::{
escape, minimal_escape, partial_escape, resolve_predefined_entity, unescape_with,
};
#[cfg(feature = "serialize")]
use crate::name::Namespace;
use crate::name::{LocalName, QName};
#[cfg(feature = "serialize")]
use crate::utils::CowRef;
use crate::utils::{name_len, trim_xml_end, trim_xml_start, write_cow_string, Bytes};
use attributes::{AttrError, Attribute, Attributes};

#[cfg(feature = "serialize")]
const XSI_NAMESPACE_URL: Namespace = Namespace(b"http://www.w3.org/2001/XMLSchema-instance");

/// Opening tag data (`Event::Start`), with optional attributes: `<name attr="value">`.
///
/// The name can be accessed using the [`name`] or [`local_name`] methods.
Expand Down Expand Up @@ -232,6 +237,34 @@ impl<'a> BytesStart<'a> {
Cow::Owned(ref o) => CowRef::Slice(&o[..self.name_len]),
}
}

/// This method checks if the current tag has a `xsi::nil` attribute
///
/// This attribute should be used for deciding if the value is not set
/// according to https://www.w3.org/TR/xmlschema-1/#xsi_nil
#[cfg(feature = "serialize")]
pub(crate) fn has_nil_attr(&self, reader: &dyn crate::de::XmlRead) -> bool {
use crate::name::ResolveResult;

let default_ns = reader.default_namespace();
self.attributes().any(|attr| {
if let Ok(attr) = attr {
let value_is_true = &*attr.value == b"true" || &*attr.value == b"1";
let might_be_nil_attr = attr.key.0.ends_with(b"nil");
if value_is_true && might_be_nil_attr {
let (res, local_name) = reader.resolve_attribute(attr.key);
(matches!(res, ResolveResult::Bound(XSI_NAMESPACE_URL))
|| (matches!(res, ResolveResult::Unbound)
&& default_ns == Some(XSI_NAMESPACE_URL)))
&& local_name.as_ref() == b"nil"
} else {
false
}
} else {
false
}
})
}
}

/// Attribute-related methods
Expand Down Expand Up @@ -278,7 +311,7 @@ impl<'a> BytesStart<'a> {
}

/// Returns an iterator over the attributes of this tag.
pub fn attributes(&self) -> Attributes {
pub fn attributes<'b>(&'b self) -> Attributes<'b> {
Attributes::wrap(&self.buf, self.name_len, false)
}

Expand Down
Loading

0 comments on commit ad89a00

Please sign in to comment.