Skip to content

Commit b9a1984

Browse files
adamjstewartpmeier
andauthored
Simpler file chunking (#7673)
Co-authored-by: Philip Meier <[email protected]>
1 parent 3d70e4b commit b9a1984

File tree

4 files changed

+4
-14
lines changed

4 files changed

+4
-14
lines changed

packaging/wheel/relocate.py

+1-11
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22

33
import glob
44
import hashlib
5-
import io
65

76
# Standard library imports
87
import os
@@ -65,21 +64,12 @@
6564
PYTHON_VERSION = sys.version_info
6665

6766

68-
def read_chunks(file, size=io.DEFAULT_BUFFER_SIZE):
69-
"""Yield pieces of data from a file-like object until EOF."""
70-
while True:
71-
chunk = file.read(size)
72-
if not chunk:
73-
break
74-
yield chunk
75-
76-
7767
def rehash(path, blocksize=1 << 20):
7868
"""Return (hash, length) for path using hashlib.sha256()"""
7969
h = hashlib.sha256()
8070
length = 0
8171
with open(path, "rb") as f:
82-
for block in read_chunks(f, size=blocksize):
72+
while block := f.read(blocksize):
8373
length += len(block)
8474
h.update(block)
8575
digest = "sha256=" + urlsafe_b64encode(h.digest()).decode("latin1").rstrip("=")

torchvision/datasets/utils.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ def calculate_md5(fpath: str, chunk_size: int = 1024 * 1024) -> str:
5757
else:
5858
md5 = hashlib.md5()
5959
with open(fpath, "rb") as f:
60-
for chunk in iter(lambda: f.read(chunk_size), b""):
60+
while chunk := f.read(chunk_size):
6161
md5.update(chunk)
6262
return md5.hexdigest()
6363

torchvision/prototype/datasets/_builtin/README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ import hashlib
9191
def sha256sum(path, chunk_size=1024 * 1024):
9292
checksum = hashlib.sha256()
9393
with open(path, "rb") as f:
94-
for chunk in iter(lambda: f.read(chunk_size), b""):
94+
while chunk := f.read(chunk_size):
9595
checksum.update(chunk)
9696
print(checksum.hexdigest())
9797
```

torchvision/prototype/datasets/utils/_resource.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,7 @@ def download(self, root: Union[str, pathlib.Path], *, skip_integrity_check: bool
136136
def _check_sha256(self, path: pathlib.Path, *, chunk_size: int = 1024 * 1024) -> None:
137137
hash = hashlib.sha256()
138138
with open(path, "rb") as file:
139-
for chunk in iter(lambda: file.read(chunk_size), b""):
139+
while chunk := file.read(chunk_size):
140140
hash.update(chunk)
141141
sha256 = hash.hexdigest()
142142
if sha256 != self.sha256:

0 commit comments

Comments
 (0)