-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathhash.py
95 lines (74 loc) · 2.57 KB
/
hash.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
from builtins import object
import hashlib
import os
import struct
import sys
from zlib import crc32
from typing import Union
try:
if os.environ.get('CFV_NOMMAP'):
raise ImportError
import mmap
def dommap(fileno, len) -> Union[bytes, mmap.mmap]: # generic mmap. ACCESS_* args work on both nix and win.
if len == 0:
return b'' # mmap doesn't like length=0
return mmap.mmap(fileno, len, access=mmap.ACCESS_READ)
_nommap = 0
except ImportError:
_nommap = 1
_MAX_MMAP = 2 ** 32 - 1
_FALLBACK_MMAP = 2 ** 31 - 1
md5 = hashlib.md5
sha1 = hashlib.sha1
def _getfilechecksum(filename, hasher, callback) -> tuple:
if filename == '':
f = sys.stdin.buffer
else:
f = open(filename, 'rb')
def finish(m, s):
while 1:
x = f.read(65536)
if not x:
return m.digest(), s
s += len(x)
m.update(x)
if callback:
callback(s)
if f == sys.stdin.buffer or _nommap or callback:
return finish(hasher(), 0)
else:
s = os.path.getsize(filename)
try:
if s > _MAX_MMAP:
# Work around python 2.[56] problem with md5 of large mmap objects
raise OverflowError
m = hasher(dommap(f.fileno(), s))
except OverflowError:
# mmap size is limited by C's int type, which even on 64 bit
# arches is often 32 bits, so we can't use sys.maxint
# either. If we get the error, just assume 32 bits.
mmapsize = min(s, _FALLBACK_MMAP)
m = hasher(dommap(f.fileno(), mmapsize))
f.seek(mmapsize)
# unfortunatly, python's mmap module doesn't support the
# offset parameter, so we just have to do the rest of the
# file the old fashioned way.
return finish(m, mmapsize)
return m.digest(), s
def getfilechecksumgeneric(algo: str) -> tuple:
if hasattr(hashlib, algo):
hasher = getattr(hashlib, algo)
else:
def hasher():
return hashlib.new(algo)
return lambda filename, callback: _getfilechecksum(filename, hasher, callback), hasher().digest_size
class CRC32(object):
digest_size = 4
def __init__(self, s=b'') -> None:
self.value = crc32(s)
def update(self, s) -> None:
self.value = crc32(s, self.value)
def digest(self) -> bytes:
return struct.pack('>I', self.value & 0xFFFFFFFF)
def getfilecrc(filename, callback) -> tuple:
return _getfilechecksum(filename, CRC32, callback)