Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
- Speed up membership tests (`key in ...`) on out-of-order tables with a native `OutOfOrderTableProxy.__contains__`, completing [#483](https://github.com/python-poetry/tomlkit/issues/483) for the last mapping type that still inherited the slow `MutableMapping` mixin (which resolves the value and builds an exception on every absent key). ([#515](https://github.com/python-poetry/tomlkit/pull/515))
- Speed up parsing documents with many dotted keys or table headers sharing a prefix by validating out-of-order tables incrementally: each new fragment is merged into a cached validation container once, instead of re-merging (and deep-copying) every earlier fragment on each append, turning a super-cubic worst case into linear time (80 shared-prefix dotted keys: ~8 s → ~10 ms). ([#479](https://github.com/python-poetry/tomlkit/issues/479))
- Speed up parsing of arrays that close right after a value (e.g. the `files = [...]` blocks that dominate lock files): the parser no longer attempts to read a value while sitting on the closing `]`, which previously built an `UnexpectedCharError` just to discard it — and constructing that exception eagerly computes a line/column by scanning the whole document, making it O(document size) per such array. ([#517](https://github.com/python-poetry/tomlkit/pull/517))
- Speed up `unwrap()` (converting a parsed document to a plain `dict`) by resolving each key directly from the container's key map instead of iterating the inherited `MutableMapping` view, which rebuilt a `SingleKey` from the bare string for every key just to re-look-up the value. Out-of-order tables still resolve through their proxy, so their validation is unchanged. ([#521](https://github.com/python-poetry/tomlkit/pull/521))

### Fixed

Expand Down
23 changes: 23 additions & 0 deletions tests/test_toml_document.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from tomlkit import ws
from tomlkit._utils import _utc
from tomlkit.api import document
from tomlkit.exceptions import KeyAlreadyPresent
from tomlkit.exceptions import NonExistentKey
from tomlkit.toml_document import TOMLDocument

Expand Down Expand Up @@ -579,6 +580,28 @@ def test_out_of_order_table_can_add_multiple_tables() -> None:
assert doc["a"]["a"] == {"b": {"x": 1}, "c": {"y": 1}, "d": {"z": 1}}


def test_unwrap_out_of_order_tables() -> None:
# unwrap() resolves out-of-order tables through the same proxy as item
# access, so the fragments are merged into one dict.
doc = parse("[a.x]\np = 1\n[foo]\nbar = 2\n[a.y]\nq = 3\n")
assert doc.unwrap() == {"a": {"x": {"p": 1}, "y": {"q": 3}}, "foo": {"bar": 2}}


def test_unwrap_preserves_raise_on_invalid_out_of_order_fragment() -> None:
# Regression guard for the unwrap() fast path: this document is actually
# *invalid* TOML -- `b` is a value under [a], then reopened as a table by
# the out-of-order [a.b]. Ideally tomlkit would reject it at parse (it does
# for the in-order form, and so does the stdlib tomllib); today the conflict
# is only detected lazily when the out-of-order proxy is built, so it
# surfaces at access/unwrap time. This test does NOT bless that deferred
# timing -- it only pins that the faster unwrap() keeps going through the
# proxy and still raises, rather than silently merging the conflict into a
# corrupted dict.
doc = parse("[a]\nb = true\n[zz]\nq = 9\n[a.b]\narr = [1, 2]\n")
with pytest.raises(KeyAlreadyPresent):
doc.unwrap()


def test_out_of_order_tables_are_still_dicts() -> None:
content = """
[a.a]
Expand Down
23 changes: 13 additions & 10 deletions tomlkit/container.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,17 +61,20 @@ def body(self) -> list[tuple[Key | None, Item]]:
def unwrap(self) -> dict[str, Any]:
"""Returns as pure python object (ppo)"""
unwrapped: dict[str, Any] = {}
for k, v in self.items():
if k is None:
continue

key_str: str = k.key if isinstance(k, Key) else k
val: Any = v.unwrap() if hasattr(v, "unwrap") else v

if key_str in unwrapped:
merge_dicts(unwrapped[key_str], val)
# Resolve each key straight from _map, which already holds the parsed
# Key objects and their body index, instead of via self.items(): the
# inherited MutableMapping iteration goes through __getitem__, which
# rebuilds a SingleKey from the bare string on every key only to throw
# it away. Out-of-order keys (a tuple index) still go through
# OutOfOrderTableProxy so their validation (and fragment merge) runs
# exactly as before. _map iterates in the same insertion order as the
# old self.items().
for key, idx in self._map.items():
if isinstance(idx, tuple):
value: Any = OutOfOrderTableProxy(self, idx)
else:
unwrapped[key_str] = val
value = self._body[idx][1]
unwrapped[key.key] = value.unwrap() if hasattr(value, "unwrap") else value

return unwrapped

Expand Down
14 changes: 5 additions & 9 deletions tomlkit/items.py
Original file line number Diff line number Diff line change
Expand Up @@ -1747,15 +1747,11 @@ def __init__(self, value: container.Container, trivia: Trivia):
dict.__setitem__(self, k.key, v)

def unwrap(self) -> dict[str, Any]:
unwrapped = {}
for k, v in self.items():
if isinstance(k, Key):
k = k.key
if hasattr(v, "unwrap"):
v = v.unwrap()
unwrapped[k] = v

return unwrapped
# Delegate to the inner container's unwrap, which walks its _body
# directly instead of re-resolving every key through items()/__getitem__
# (which rebuilds a SingleKey, and an OutOfOrderTableProxy for
# out-of-order keys, per key just to throw it away).
return self._value.unwrap()

@property
def value(self) -> container.Container:
Expand Down
Loading