Skip to content

Thread-safe Buffer.__items__; proper KeysView, ItemsView, ValuesView #93

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Mar 29, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions .coveragerc
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
[run]
source =
zict
omit =

[report]
show_missing = True
exclude_lines =
# re-enable the standard pragma
pragma: nocover
pragma: no cover
# always ignore type checking blocks
TYPE_CHECKING
@overload

[html]
directory = coverage_html_report
4 changes: 3 additions & 1 deletion doc/source/changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,12 @@ Changelog
- ``LMDB`` now uses memory-mapped I/O on MacOSX and is usable on Windows.
(:pr:`78`) `Guido Imperiale`_
- The library is now partially thread-safe.
(:pr:`82`, :pr:`90`) `Guido Imperiale`_
(:pr:`82`, :pr:`90`, :pr:`93`) `Guido Imperiale`_
- :class:`LRU` and :class:`Buffer` now support delayed eviction.
New objects :class:`Accumulator` and :class:`InsertionSortedSet`.
(:pr:`87`) `Guido Imperiale`_
- All mappings now return proper KeysView, ItemsView, and ValuesView objects from their
keys(), items(), and values() methods (:pr:`93`) `Guido Imperiale`_


2.2.0 - 2022-04-28
Expand Down
53 changes: 44 additions & 9 deletions zict/buffer.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@

from collections.abc import Callable, Iterator, MutableMapping
from itertools import chain
from typing import ( # TODO import from collections.abc (needs Python >=3.9)
ItemsView,
ValuesView,
)

from zict.common import KT, VT, ZictBase, close, flush
from zict.lru import LRU
Expand Down Expand Up @@ -136,21 +140,30 @@ def __delitem__(self, key: KT) -> None:
except KeyError:
del self.slow[key]

# FIXME dictionary views https://github.com/dask/zict/issues/61
def keys(self) -> Iterator[KT]: # type: ignore
return iter(self)
def values(self) -> ValuesView[VT]:
return BufferValuesView(self)

def values(self) -> Iterator[VT]: # type: ignore
return chain(self.fast.values(), self.slow.values())

def items(self) -> Iterator[tuple[KT, VT]]: # type: ignore
return chain(self.fast.items(), self.slow.items())
def items(self) -> ItemsView[KT, VT]:
return BufferItemsView(self)

def __len__(self) -> int:
return len(self.fast) + len(self.slow)

def __iter__(self) -> Iterator[KT]:
return chain(self.fast, self.slow)
"""Make sure that the iteration is not disrupted if you evict/restore a key in
the middle of it
"""
seen = set()
while True:
try:
for d in (self.fast, self.slow):
for key in d:
if key not in seen:
seen.add(key)
yield key
return
except RuntimeError:
pass

def __contains__(self, key: object) -> bool:
return key in self.fast or key in self.slow
Expand All @@ -165,3 +178,25 @@ def flush(self) -> None:

def close(self) -> None:
close(self.fast, self.slow)


class BufferItemsView(ItemsView[KT, VT]):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we also override __contains__ in BufferItemsView to avoid changing the LRU?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The reason why __iter__ avoids changing the LRU is because it would raise RuntimeError halfway through. If you're testing if your buffer contains a specific key/value pair, and that causes the value to be unevicted from slow, then I don't see why it should bypass the principle of temporal locality.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the explanation; now it makes sense to me. I had assumed we wanted this to be a general property of the views.

If you're testing if your buffer contains a specific key/value pair, and that causes the value to be unevicted from slow, then I don't see why it should bypass the principle of temporal locality.

Sounds good to me!

_mapping: Buffer # FIXME CPython implementation detail
__slots__ = ()

def __iter__(self) -> Iterator[tuple[KT, VT]]:
# Avoid changing the LRU
return chain(self._mapping.fast.items(), self._mapping.slow.items())


class BufferValuesView(ValuesView[VT]):
_mapping: Buffer # FIXME CPython implementation detail
__slots__ = ()

def __contains__(self, value: object) -> bool:
# Avoid changing the LRU
return any(value == v for v in self)

def __iter__(self) -> Iterator[VT]:
# Avoid changing the LRU
return chain(self._mapping.fast.values(), self._mapping.slow.values())
8 changes: 1 addition & 7 deletions zict/cache.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from __future__ import annotations

import weakref
from collections.abc import Iterator, KeysView, MutableMapping
from collections.abc import Iterator, MutableMapping
from typing import TYPE_CHECKING

from zict.common import KT, VT, ZictBase, close, flush
Expand Down Expand Up @@ -67,7 +67,6 @@ def __getitem__(self, key: KT) -> VT:
def __setitem__(self, key: KT, value: VT) -> None:
# If the item was already in cache and data.__setitem__ fails, e.g. because it's
# a File and the disk is full, make sure that the cache is invalidated.
# FIXME https://github.com/python/mypy/issues/10152
try:
del self.cache[key]
except KeyError:
Expand All @@ -94,11 +93,6 @@ def __contains__(self, key: object) -> bool:
# Do not let MutableMapping call self.data[key]
return key in self.data

def keys(self) -> KeysView[KT]:
# Return a potentially optimized set-like, instead of letting MutableMapping
# build it from __iter__ on the fly
return self.data.keys()

def flush(self) -> None:
flush(self.cache, self.data)

Expand Down
5 changes: 1 addition & 4 deletions zict/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import mmap
import os
import pathlib
from collections.abc import Iterator, KeysView
from collections.abc import Iterator
from urllib.parse import quote, unquote

from zict.common import ZictBase
Expand Down Expand Up @@ -139,9 +139,6 @@ def __setitem__(
def __contains__(self, key: object) -> bool:
return key in self.filenames

def keys(self) -> KeysView[str]:
return self.filenames.keys()

def __iter__(self) -> Iterator[str]:
return iter(self.filenames)

Expand Down
12 changes: 1 addition & 11 deletions zict/func.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from __future__ import annotations

from collections.abc import Callable, Iterable, Iterator, KeysView, MutableMapping
from collections.abc import Callable, Iterable, Iterator, MutableMapping
from typing import Generic, TypeVar

from zict.common import KT, VT, ZictBase, close, flush
Expand Down Expand Up @@ -71,16 +71,6 @@ def __contains__(self, key: object) -> bool:
def __delitem__(self, key: KT) -> None:
del self.d[key]

def keys(self) -> KeysView[KT]:
return self.d.keys()

# FIXME dictionary views https://github.com/dask/zict/issues/61
def values(self) -> Iterator[VT]: # type: ignore
return (self.load(v) for v in self.d.values())

def items(self) -> Iterator[tuple[KT, VT]]: # type: ignore
return ((k, self.load(v)) for k, v in self.d.items())

def _do_update(self, items: Iterable[tuple[KT, VT]]) -> None:
it = ((k, self.dump(v)) for k, v in items)
self.d.update(it)
Expand Down
54 changes: 42 additions & 12 deletions zict/lmdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@
import pathlib
import sys
from collections.abc import Iterable, Iterator
from typing import ( # TODO import from collections.abc (needs Python >=3.9)
ItemsView,
ValuesView,
)

from zict.common import ZictBase

Expand Down Expand Up @@ -77,18 +81,15 @@ def __contains__(self, key: object) -> bool:
with self.db.begin() as txn:
return txn.cursor().set_key(_encode_key(key))

# FIXME dictionary views https://github.com/dask/zict/issues/61
def items(self) -> Iterator[tuple[str, bytes]]: # type: ignore
cursor = self.db.begin().cursor()
return ((_decode_key(k), v) for k, v in cursor.iternext(keys=True, values=True))

def keys(self) -> Iterator[str]: # type: ignore
def __iter__(self) -> Iterator[str]:
cursor = self.db.begin().cursor()
return (_decode_key(k) for k in cursor.iternext(keys=True, values=False))

def values(self) -> Iterator[bytes]: # type: ignore
cursor = self.db.begin().cursor()
return cursor.iternext(keys=False, values=True)
def items(self) -> ItemsView[str, bytes]:
return LMDBItemsView(self)

def values(self) -> ValuesView[bytes]:
return LMDBValuesView(self)

def _do_update(self, items: Iterable[tuple[str, bytes]]) -> None:
# Optimized version of update() using a single putmulti() call.
Expand All @@ -97,9 +98,6 @@ def _do_update(self, items: Iterable[tuple[str, bytes]]) -> None:
consumed, added = txn.cursor().putmulti(items_enc)
assert consumed == added == len(items_enc)

def __iter__(self) -> Iterator[str]:
return self.keys()

def __delitem__(self, key: str) -> None:
with self.db.begin(write=True) as txn:
if not txn.delete(_encode_key(key)):
Expand All @@ -110,3 +108,35 @@ def __len__(self) -> int:

def close(self) -> None:
self.db.close()


class LMDBItemsView(ItemsView[str, bytes]):
_mapping: LMDB # FIXME CPython implementation detail
__slots__ = ()

def __contains__(self, item: object) -> bool:
key: str
value: object
key, value = item # type: ignore
try:
v = self._mapping[key]
except (KeyError, AttributeError):
return False
else:
return v == value

def __iter__(self) -> Iterator[tuple[str, bytes]]:
cursor = self._mapping.db.begin().cursor()
return ((_decode_key(k), v) for k, v in cursor.iternext(keys=True, values=True))


class LMDBValuesView(ValuesView[bytes]):
_mapping: LMDB # FIXME CPython implementation detail
__slots__ = ()

def __contains__(self, value: object) -> bool:
return any(value == v for v in self)

def __iter__(self) -> Iterator[bytes]:
cursor = self._mapping.db.begin().cursor()
return cursor.iternext(keys=False, values=True)
13 changes: 1 addition & 12 deletions zict/sieve.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

from collections import defaultdict
from collections.abc import Callable, Iterable, Iterator, Mapping, MutableMapping
from itertools import chain
from typing import Generic, TypeVar

from zict.common import KT, VT, ZictBase, close, flush
Expand Down Expand Up @@ -92,21 +91,11 @@ def _do_update(self, items: Iterable[tuple[KT, VT]]) -> None:
for key, _ in mitems:
self.key_to_mapping[key] = mapping

# FIXME dictionary views https://github.com/dask/zict/issues/61
def keys(self) -> Iterator[KT]: # type: ignore
return chain.from_iterable(self.mappings.values())

def values(self) -> Iterator[VT]: # type: ignore
return chain.from_iterable(m.values() for m in self.mappings.values())

def items(self) -> Iterator[tuple[KT, VT]]: # type: ignore
return chain.from_iterable(m.items() for m in self.mappings.values())

def __len__(self) -> int:
return sum(map(len, self.mappings.values()))

def __iter__(self) -> Iterator[KT]:
return self.keys()
return iter(self.key_to_mapping)

def __contains__(self, key: object) -> bool:
return key in self.key_to_mapping
Expand Down
14 changes: 14 additions & 0 deletions zict/tests/test_buffer.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,3 +228,17 @@ def test_set_noevict():
assert a == {"y": 3, "x": 1}
assert b == {"z": 6}
assert f2s == s2f == []


def test_evict_restore_during_iter():
"""Test that __iter__ won't be disrupted if another thread evicts or restores a key"""
buff = Buffer({"x": 1, "y": 2}, {"z": 3}, n=5)
assert list(buff) == ["x", "y", "z"]
it = iter(buff)
assert next(it) == "x"
buff.fast.evict("x")
assert next(it) == "y"
assert buff["x"] == 1
assert next(it) == "z"
with pytest.raises(StopIteration):
next(it)
10 changes: 10 additions & 0 deletions zict/tests/test_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import pytest

from zict import Cache, WeakValueMapping
from zict.tests import utils_test


def test_cache_get_set_del():
Expand Down Expand Up @@ -129,3 +130,12 @@ class C:
b = "bbb"
d["b"] = b
assert "b" not in d


def test_mapping():
"""
Test mapping interface for Cache().
"""
buff = Cache({}, {})
utils_test.check_mapping(buff)
utils_test.check_closing(buff)
2 changes: 1 addition & 1 deletion zict/tests/test_lru.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def test_simple():
assert "y" not in lru

lru["a"] = 5
assert set(lru.keys()) == {"z", "a"}
assert set(lru) == {"z", "a"}


def test_str():
Expand Down
Loading