Skip to content

Commit c5d4a5e

Browse files
committed
Modify getsize to return total size, not just the top level
1 parent 9d046ea commit c5d4a5e

File tree

3 files changed

+44
-45
lines changed

3 files changed

+44
-45
lines changed

docs/release.rst

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,13 @@ Enhancements
2424
By :user:`Deepak Cherian <dcherian>`.
2525

2626

27+
Bug fixes
28+
~~~~~~~~~
29+
30+
* ``getsize`` now returns the total size of all nested arrays.
31+
By :user:`Ben Jeffery <benjeffery>` :issue:`253`.
32+
33+
2734
Docs
2835
~~~~
2936

zarr/storage.py

Lines changed: 34 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@
3030
from collections import OrderedDict
3131
from collections.abc import MutableMapping
3232
from functools import lru_cache
33-
from os import scandir
3433
from pickle import PicklingError
3534
from threading import Lock, RLock
3635
from typing import Sequence, Mapping, Optional, Union, List, Tuple, Dict, Any
@@ -269,9 +268,15 @@ def _getsize(store: BaseStore, path: Path = None) -> int:
269268
# also include zarr.json?
270269
# members += ['zarr.json']
271270
else:
272-
members = listdir(store, path)
273-
prefix = _path_to_prefix(path)
274-
members = [prefix + k for k in members]
271+
to_visit = [path]
272+
members = []
273+
while to_visit:
274+
print(to_visit)
275+
current_path = to_visit.pop()
276+
current_members = listdir(store, current_path)
277+
prefix = _path_to_prefix(current_path)
278+
members.extend([prefix + k for k in current_members])
279+
to_visit.extend([prefix + k for k in current_members])
275280
for k in members:
276281
try:
277282
v = store[k]
@@ -971,8 +976,12 @@ def getsize(self, path: Path = None):
971976
elif isinstance(value, self.cls):
972977
# total size for directory
973978
size = 0
974-
for v in value.values():
975-
if not isinstance(v, self.cls):
979+
to_visit = list(value.values())
980+
while to_visit:
981+
v = to_visit.pop()
982+
if isinstance(v, self.cls):
983+
to_visit.extend(v.values())
984+
else:
976985
size += buffer_size(v)
977986
return size
978987

@@ -1269,9 +1278,10 @@ def getsize(self, path=None):
12691278
return os.path.getsize(fs_path)
12701279
elif os.path.isdir(fs_path):
12711280
size = 0
1272-
for child in scandir(fs_path):
1273-
if child.is_file():
1274-
size += child.stat().st_size
1281+
for root, dirs, files in os.walk(fs_path):
1282+
for file in files:
1283+
file_path = os.path.join(root, file)
1284+
size += os.path.getsize(file_path)
12751285
return size
12761286
else:
12771287
return 0
@@ -1903,29 +1913,19 @@ def listdir(self, path=None):
19031913
def getsize(self, path=None):
19041914
path = normalize_storage_path(path)
19051915
with self.mutex:
1906-
children = self.listdir(path)
1907-
if children:
1908-
size = 0
1909-
for child in children:
1910-
if path:
1911-
name = path + "/" + child
1912-
else:
1913-
name = child
1914-
try:
1915-
info = self.zf.getinfo(name)
1916-
except KeyError:
1917-
pass
1918-
else:
1919-
size += info.compress_size
1920-
return size
1921-
elif path:
1916+
to_visit = [path] if path else self.listdir(path)
1917+
total_size = 0
1918+
while to_visit:
1919+
current_path = to_visit.pop()
19221920
try:
1923-
info = self.zf.getinfo(path)
1924-
return info.compress_size
1921+
info = self.zf.getinfo(current_path)
1922+
total_size += info.compress_size
19251923
except KeyError:
1926-
return 0
1927-
else:
1928-
return 0
1924+
children = self.listdir(current_path)
1925+
for child in children:
1926+
full_path = current_path + "/" + child if current_path else child
1927+
to_visit.append(full_path)
1928+
return total_size
19291929

19301930
def clear(self):
19311931
if self.mode == "r":
@@ -2488,6 +2488,8 @@ def listdir(self, path: Path = None):
24882488
return listing
24892489

24902490
def getsize(self, path=None) -> int:
2491+
print("WYF")
2492+
print(self._store, path)
24912493
return getsize(self._store, path=path)
24922494

24932495
def _pop_value(self):
@@ -2745,10 +2747,9 @@ def getsize(self, path=None):
27452747
size = self.cursor.execute(
27462748
"""
27472749
SELECT COALESCE(SUM(LENGTH(v)), 0) FROM zarr
2748-
WHERE k LIKE (? || "%") AND
2749-
0 == INSTR(LTRIM(SUBSTR(k, LENGTH(?) + 1), "/"), "/")
2750+
WHERE k LIKE (? || "%")
27502751
""",
2751-
(path, path),
2752+
(path,),
27522753
)
27532754
for (s,) in size:
27542755
return s

zarr/tests/test_storage.py

Lines changed: 3 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -366,19 +366,10 @@ def test_hierarchy(self):
366366

367367
# test getsize (optional)
368368
if hasattr(store, "getsize"):
369-
# TODO: proper behavior of getsize?
370-
# v3 returns size of all nested arrays, not just the
371-
# size of the arrays in the current folder.
372-
if self.version == 2:
373-
assert 6 == store.getsize()
374-
else:
375-
assert 15 == store.getsize()
369+
assert 15 == store.getsize()
376370
assert 3 == store.getsize("a")
377371
assert 3 == store.getsize("b")
378-
if self.version == 2:
379-
assert 3 == store.getsize("c")
380-
else:
381-
assert 9 == store.getsize("c")
372+
assert 9 == store.getsize("c")
382373
assert 3 == store.getsize("c/d")
383374
assert 6 == store.getsize("c/e")
384375
assert 3 == store.getsize("c/e/f")
@@ -2256,7 +2247,7 @@ def test_getsize():
22562247
store["foo"] = b"aaa"
22572248
store["bar"] = b"bbbb"
22582249
store["baz/quux"] = b"ccccc"
2259-
assert 7 == getsize(store)
2250+
assert 12 == getsize(store)
22602251
assert 5 == getsize(store, "baz")
22612252

22622253
store = KVStore(dict())

0 commit comments

Comments
 (0)