Skip to content

Commit 2b1ea6f

Browse files
committed
initial commit
0 parents  commit 2b1ea6f

File tree

8 files changed

+2257
-0
lines changed

8 files changed

+2257
-0
lines changed

README.md

+91
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
# ZnSlice
2+
3+
A lightweight library (without external dependencies) for:
4+
- advanced slicing.
5+
- cache `__getitem__(self, item)`
6+
- lazy load `__getitem__(self, item)`
7+
8+
# Installation
9+
10+
```bash
11+
pip install znslice
12+
```
13+
14+
# Usage
15+
16+
## Advanced Slicing and Cache
17+
```python
18+
import znslice
19+
import collections.abc
20+
21+
class MapList(collections.abc.Sequence):
22+
def __init__(self, data, func):
23+
self.data = data
24+
self.func = func
25+
26+
@znslice.znslice
27+
def __getitem__(self, item: int):
28+
print(f"Loading item = {item}")
29+
return self.func(self.data[item])
30+
31+
def __len__(self):
32+
return len(self.data)
33+
34+
data = MapList([0, 1, 2, 3, 4], lambda x: x ** 2)
35+
36+
assert data[0] == 0
37+
assert data[[1, 2, 3]] == [1, 4, 9]
38+
# calling data[:] will now only compute data[4] and load the remaining data from cache
39+
assert data[:] == [0, 1, 4, 9, 16]
40+
```
41+
42+
## Lazy Database Loading
43+
44+
You can use `znslice` to lazy load data from a database. This is useful if you have a large database and only want to load a small subset of the data.
45+
46+
In the following we will use the `ase` package to generate `Atoms` objects stored in a database and load them lazily.
47+
48+
```python
49+
import ase.io
50+
import ase.db
51+
import znslice
52+
import tqdm
53+
import random
54+
55+
# create a database
56+
with ase.db.connect("data.db", append=False) as db:
57+
for _ in range(10):
58+
atoms = ase.Atoms('CO', positions=[(0, 0, 0), (0, 0, random.random())])
59+
db.write(atoms, group="data")
60+
61+
# load the database lazily
62+
class ReadASEDB:
63+
def __init__(self, file):
64+
self.file = file
65+
66+
@znslice.znslice(
67+
advanced_slicing=True, # this getitem supports advanced slicingn
68+
lazy=True # we want to lazy load the data
69+
)
70+
def __getitem__(self, item):
71+
data = []
72+
with ase.db.connect(self.file) as database:
73+
if isinstance(item, int):
74+
print(f"get {item = }")
75+
return database[item + 1].toatoms()
76+
for idx in tqdm.tqdm(item):
77+
data.append(database[idx + 1].toatoms())
78+
return data
79+
80+
def __len__(self):
81+
with ase.db.connect(self.file) as db:
82+
return len(db)
83+
84+
db = ReadASEDB("data.db")
85+
86+
data = db[::2] # LazySequence([<__main__.ReadASEDB>], [[0, 2, 4, 6, 8]])
87+
data.tolist() # list[ase.Atoms]
88+
89+
# supports addition, advanced slicing, etc.
90+
data = db[::2] + db[1::2]
91+
```

poetry.lock

+1,567
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

+27
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
[tool.poetry]
2+
name = "ZnSlice"
3+
version = "0.1.0"
4+
description = "Cache, advanced slicing and lazy loading for __getitem__"
5+
license = "Apache-2.0"
6+
authors = ["zincwarecode <[email protected]>"]
7+
readme = "README.md"
8+
9+
[tool.poetry.urls]
10+
repository = "https://github.com/zincware/ZnSlice"
11+
12+
[tool.poetry.dependencies]
13+
python = "^3.8"
14+
15+
[tool.poetry.group.dev.dependencies]
16+
black = "^22.12.0"
17+
isort = "^5.11.4"
18+
ruff = "^0.0.224"
19+
pytest = "^7.2.1"
20+
ipykernel = "^6.20.2"
21+
ase = "^3.22.1"
22+
tqdm = "^4.64.1"
23+
coverage = "^7.0.5"
24+
25+
[build-system]
26+
requires = ["poetry-core"]
27+
build-backend = "poetry.core.masonry.api"

tests/test_LazyList.py

+55
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
import collections.abc
2+
3+
import znslice
4+
5+
6+
class DatabaseMock:
7+
def __init__(self, maximum: int = 100):
8+
self.maximum = maximum
9+
10+
def __getitem__(self, item):
11+
if isinstance(item, int):
12+
if item > self.maximum:
13+
raise IndexError
14+
return item
15+
raise TypeError(
16+
f"Index of type {type(item)} not supported. Only int supported."
17+
)
18+
19+
def __len__(self):
20+
return self.maximum
21+
22+
23+
class LazyList(collections.abc.Sequence):
24+
"""Lazy List for ASE Atoms Objects"""
25+
26+
def __init__(self, obj=None, indices=None):
27+
self._obj = obj
28+
self._indices = (
29+
znslice.utils.item_to_indices(indices, range(len(obj))) if indices else None
30+
)
31+
32+
@znslice.znslice(lazy=True)
33+
def __getitem__(self, item: int):
34+
if self._indices is None:
35+
return self._obj[item]
36+
return self._obj[self._indices[item]]
37+
38+
def __len__(self):
39+
if self._indices is None:
40+
return len(self._obj)
41+
return len(self._indices)
42+
43+
44+
def test_LazyList():
45+
lsta = LazyList(DatabaseMock(), indices=[1, 4, 7])
46+
assert lsta[0] == 1
47+
assert isinstance(lsta[:2], znslice.LazySequence)
48+
49+
assert lsta[[0, 1, 2]].tolist() == [1, 4, 7]
50+
51+
lstb = LazyList(DatabaseMock(), indices=[9, 11, 26])
52+
assert lstb[[0, 1, 2]].tolist() == [9, 11, 26]
53+
54+
lstc = LazyList(DatabaseMock(), indices=slice(None, None, 3))
55+
assert lstc[:3].tolist() == [0, 3, 6]

tests/test_utils.py

+16
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
import pytest
2+
3+
import znslice
4+
5+
6+
def test_item_to_indices():
7+
8+
lst = list(range(10))
9+
10+
assert znslice.utils.item_to_indices(1, lst) == 1
11+
assert znslice.utils.item_to_indices(slice(4, 6), lst) == [4, 5]
12+
assert znslice.utils.item_to_indices([1, 2, 3], lst) == [1, 2, 3]
13+
assert znslice.utils.item_to_indices((1, 2, 3), lst) == [1, 2, 3]
14+
15+
with pytest.raises(ValueError):
16+
znslice.utils.item_to_indices("data", lst)

0 commit comments

Comments
 (0)