Skip to content

Commit 33b7771

Browse files
authored
Optionally ignore unknown attributes and children (#8)
1 parent 3cea936 commit 33b7771

File tree

5 files changed

+54
-11
lines changed

5 files changed

+54
-11
lines changed

README.md

+5-1
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@ Requires Python 3.7 or higher.
2727

2828
* Whitespace and comments aren't supported in the data model. They must be stripped when loading the XML
2929
* So far, I haven't found any examples where XML can't be mapped to a dataclass, but it's likely possible given how complex XML is
30-
* Strict mapping. Currently, if an unknown element is encountered, an error is raised (see [#3](https://github.com/tobywf/xml_dataclasses/issues/3), pull requests welcome)
3130
* No typing/type conversions. Since XML is untyped, only string values are currently allowed. Type conversions are tricky to implement in a type-safe and extensible manner.
3231
* Dataclasses must be written by hand, no tools are provided to generate these from, DTDs, XML schema definitions, or RELAX NG schemas
3332

@@ -54,6 +53,8 @@ class Foo:
5453

5554
For now, you can work around this limitation with properties that do the conversion, and perform post-load validation.
5655

56+
By default, unknown attributes raise an error. This can be disabled by passing `Options` to `load` with `ignore_unknown_attributes`.
57+
5758
### Defining text
5859

5960
Like attributes, text can be either `str` or `Optional[str]`. You must declare text content with the `text` function. Similar to `rename`, this function can use an existing field definition, or take the `default` argument. Text cannot be renamed or namespaced. Every class can only have one field defining text content. If a class has text content, it cannot have any children.
@@ -87,6 +88,8 @@ If a class has children, it cannot have text content.
8788

8889
Children can be renamed via the `rename` function. However, attempting to set a namespace is invalid, since the namespace is provided by the child type's XML dataclass. Also, unions of XML dataclasses must have the same namespace (you can use different fields with renaming if they have different namespaces, since the XML names will be resolved as a combination of namespace and name).
8990

91+
By default, unknown children raise an error. This can be disabled by passing `Options` to `load` with `ignore_unknown_children`.
92+
9093
### Defining post-load validation
9194

9295
Simply implement an instance method called `xml_validate` with no parameters, and no return value (if you're using type hints):
@@ -216,6 +219,7 @@ This makes sense in many cases, but possibly not every case.
216219
### [0.0.7] - unreleased
217220

218221
* Warn if comments are found/don't treat comments as child elements in error messages
222+
* Allow lenient loading of undeclared attributes or children
219223

220224
### [0.0.6] - 2020-03-25
221225

src/xml_dataclasses/__init__.py

+1
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
logging.getLogger(__name__).addHandler(logging.NullHandler())
44

5+
from .options import Options # isort:skip
56
from .modifiers import rename, text, ignored # isort:skip
67
from .resolve_types import ( # isort:skip
78
is_xml_dataclass,

src/xml_dataclasses/options.py

+7
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
from dataclasses import dataclass
2+
3+
4+
@dataclass
5+
class Options:
6+
ignore_unknown_attributes: bool = False
7+
ignore_unknown_children: bool = False

src/xml_dataclasses/serde.py

+18-7
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from lxml.etree import _Comment as Comment # type: ignore
88

99
from .lxml_utils import strip_ns
10+
from .options import Options
1011
from .resolve_types import (
1112
ChildInfo,
1213
TextInfo,
@@ -18,7 +19,9 @@
1819
_T = TypeVar("_T")
1920

2021

21-
def _load_attributes(cls: Type[XmlDataclass], el: Any) -> Mapping[str, str]:
22+
def _load_attributes(
23+
cls: Type[XmlDataclass], el: Any, options: Options
24+
) -> Mapping[str, str]:
2225
values = {}
2326
processed = set()
2427

@@ -37,7 +40,7 @@ def _load_attributes(cls: Type[XmlDataclass], el: Any) -> Mapping[str, str]:
3740
values[attr.dt_name] = attr_value
3841

3942
unprocessed = set(el.attrib.keys()) - processed
40-
if unprocessed:
43+
if unprocessed and not options.ignore_unknown_attributes:
4144
readable = ", ".join(f"'{v}'" for v in unprocessed)
4245
raise ValueError(f"Found undeclared attributes on '{el.tag}': {readable}")
4346

@@ -60,7 +63,9 @@ def _load_text(info: TextInfo, el: Any) -> Mapping[str, str]:
6063
return {info.dt_name: text}
6164

6265

63-
def _load_children(cls: Type[XmlDataclass], el: Any) -> Mapping[str, XmlDataclass]:
66+
def _load_children(
67+
cls: Type[XmlDataclass], el: Any, options: Options
68+
) -> Mapping[str, XmlDataclass]:
6469
if el.text and el.text.strip():
6570
raise ValueError(f"Element '{el.tag}' has text (expected child elements only)")
6671

@@ -126,7 +131,7 @@ def _get_one_child_value(child: ChildInfo) -> Any:
126131
values[child.dt_name] = child_value
127132

128133
unprocessed = el_children.keys() - processed
129-
if unprocessed:
134+
if unprocessed and not options.ignore_unknown_children:
130135
readable = ", ".join(f"'{v}'" for v in unprocessed)
131136
raise ValueError(f"Found undeclared child elements on '{el.tag}': {readable}")
132137

@@ -144,20 +149,26 @@ def _validate_name(cls: Type[XmlDataclass], el: Any, name: str) -> None:
144149

145150

146151
def load(
147-
cls: Type[XmlDataclassInstance], el: Any, name: Optional[str] = None
152+
cls: Type[XmlDataclassInstance],
153+
el: Any,
154+
name: Optional[str] = None,
155+
options: Optional[Options] = None,
148156
) -> XmlDataclassInstance:
149157
if not is_xml_dataclass(cls):
150158
raise ValueError(f"Class '{cls!r}' is not an XML dataclass")
151159

160+
if not options:
161+
options = Options()
162+
152163
if name:
153164
_validate_name(cls, el, name)
154165

155-
attr_values = _load_attributes(cls, el)
166+
attr_values = _load_attributes(cls, el, options)
156167
# are we just looking for text content?
157168
if cls.__text_field__:
158169
text_values = _load_text(cls.__text_field__, el)
159170
else:
160-
child_values = _load_children(cls, el)
171+
child_values = _load_children(cls, el, options)
161172

162173
if cls.__text_field__:
163174
child_values = {}

tests/load_test.py

+23-3
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from lxml import etree
66

77
# test public exports
8-
from xml_dataclasses import load, text, xml_dataclass
8+
from xml_dataclasses import Options, load, text, xml_dataclass
99

1010
NS = "https://tobywf.com"
1111

@@ -135,7 +135,7 @@ class Foo:
135135
assert foo.bar == "baz"
136136

137137

138-
def test_load_attributes_undeclared():
138+
def test_load_attributes_undeclared_strict():
139139
@xml_dataclass
140140
class Foo:
141141
__ns__ = None
@@ -150,6 +150,16 @@ class Foo:
150150
assert "'bar'" in msg
151151

152152

153+
def test_load_attributes_undeclared_lenient():
154+
@xml_dataclass
155+
class Foo:
156+
__ns__ = None
157+
158+
el = etree.fromstring('<foo bar="baz" />')
159+
options = Options(ignore_unknown_attributes=True)
160+
load(Foo, el, "foo", options)
161+
162+
153163
def test_load_text_present_required():
154164
@xml_dataclass
155165
class Foo:
@@ -290,7 +300,7 @@ class Foo:
290300
assert foo.bar is None
291301

292302

293-
def test_load_children_single_undeclared():
303+
def test_load_children_single_undeclared_strict():
294304
@xml_dataclass
295305
class Foo:
296306
__ns__ = None
@@ -305,6 +315,16 @@ class Foo:
305315
assert "'bar'" in msg
306316

307317

318+
def test_load_children_single_undeclared_lenient():
319+
@xml_dataclass
320+
class Foo:
321+
__ns__ = None
322+
323+
el = etree.fromstring("<foo><bar /></foo>")
324+
options = Options(ignore_unknown_children=True)
325+
load(Foo, el, "foo", options)
326+
327+
308328
def test_load_children_single_multiple_els():
309329
@xml_dataclass
310330
class Foo:

0 commit comments

Comments
 (0)