Skip to content

Commit eaf13b1

Browse files
committed
fix(win): FIX and HIDE 2 win-errors remaining
+ File-in-use errors were fixed with `gitdb.util.mman.collect()`! + This call is disabled `gitdb.util.HIDE_WINDOWS_KNOWN_ERRORS == False`. + Depend on latest smmp `leaks` branch
1 parent 08b1f5f commit eaf13b1

15 files changed

+119
-90
lines changed

Diff for: .appveyor.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ install:
3838
git config --global user.name "Travis Runner"
3939
4040
- pip install -e .
41-
- pip install -I git+https://github.com/ankostis/smmap.git@v2.1.0.dev0
41+
- pip install -I git+https://github.com/ankostis/smmap.git@leaks
4242

4343
build: false
4444

Diff for: .travis.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ git:
1212
depth: 1000
1313
install:
1414
- pip install coveralls
15-
- pip install -I git+https://github.com/ankostis/smmap.git@v2.1.0.dev0
15+
- pip install -I git+https://github.com/ankostis/smmap.git@leaks
1616
script:
1717
- ulimit -n 48
1818
- ulimit -n

Diff for: gitdb/db/loose.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,8 @@
4040
rename,
4141
dirname,
4242
basename,
43-
join
43+
join,
44+
is_win,
4445
)
4546

4647
from gitdb.fun import (
@@ -71,7 +72,7 @@ class LooseObjectDB(FileDBBase, ObjectDBR, ObjectDBW):
7172
# On windows we need to keep it writable, otherwise it cannot be removed
7273
# either
7374
new_objects_mode = int("444", 8)
74-
if os.name == 'nt':
75+
if is_win:
7576
new_objects_mode = int("644", 8)
7677

7778
def __init__(self, root_path):
@@ -226,7 +227,7 @@ def store(self, istream):
226227
mkdir(obj_dir)
227228
# END handle destination directory
228229
# rename onto existing doesn't work on windows
229-
if os.name == 'nt':
230+
if is_win:
230231
if isfile(obj_path):
231232
remove(tmp_path)
232233
else:

Diff for: gitdb/db/pack.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -43,13 +43,13 @@ def __init__(self, root_path):
4343
# * hits - number of times the pack was hit with a request
4444
# * entity - Pack entity instance
4545
# * sha_to_index - PackIndexFile.sha_to_index method for direct cache query
46-
# self._entities = list() # lazy loaded list
46+
# self._entities = [] # lazy loaded list
4747
self._hit_count = 0 # amount of hits
4848
self._st_mtime = 0 # last modification data of our root path
4949

5050
def _set_cache_(self, attr):
5151
if attr == '_entities':
52-
self._entities = list()
52+
self._entities = []
5353
self.update_cache(force=True)
5454
# END handle entities initialization
5555

Diff for: gitdb/pack.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@
4040
pass
4141
# END try c module
4242

43-
from gitdb.base import ( # Amazing !
43+
from gitdb.base import (
4444
OInfo,
4545
OStream,
4646
OPackInfo,

Diff for: gitdb/stream.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
write,
2626
close,
2727
suppress,
28+
is_darwin,
2829
)
2930

3031
from gitdb.const import NULL_BYTE, BYTE_SPACE
@@ -318,7 +319,7 @@ def read(self, size=-1):
318319
# However, the zlib VERSIONs as well as the platform check is used to further match the entries in the
319320
# table in the github issue. This is it ... it was the only way I could make this work everywhere.
320321
# IT's CERTAINLY GOING TO BITE US IN THE FUTURE ... .
321-
if PY26 or ((zlib.ZLIB_VERSION == '1.2.7' or zlib.ZLIB_VERSION == '1.2.5') and not sys.platform == 'darwin'):
322+
if PY26 or ((zlib.ZLIB_VERSION == '1.2.7' or zlib.ZLIB_VERSION == '1.2.5') and not is_darwin):
322323
unused_datalen = len(self._zip.unconsumed_tail)
323324
else:
324325
unused_datalen = len(self._zip.unconsumed_tail) + len(self._zip.unused_data)
@@ -447,7 +448,7 @@ def _set_cache_brute_(self, attr):
447448
# TODO: There should be a special case if there is only one stream
448449
# Then the default-git algorithm should perform a tad faster, as the
449450
# delta is not peaked into, causing less overhead.
450-
buffer_info_list = list()
451+
buffer_info_list = []
451452
max_target_size = 0
452453
for dstream in self._dstreams:
453454
buf = dstream.read(512) # read the header information + X

Diff for: gitdb/test/db/lib.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ class TestDBBase(TestBase):
3939

4040
# data
4141
two_lines = b'1234\nhello world'
42-
all_data = (two_lines, )
42+
all_data = (two_lines,)
4343

4444
def _assert_object_writing_simple(self, db):
4545
# write a bunch of objects and query their streams and info
@@ -56,10 +56,10 @@ def _assert_object_writing_simple(self, db):
5656
assert isinstance(info, OInfo)
5757
assert info.type == istream.type and info.size == istream.size
5858

59-
stream = db.stream(istream.binsha)
60-
assert isinstance(stream, OStream)
61-
assert stream.binsha == info.binsha and stream.type == info.type
62-
assert stream.read() == data
59+
with db.stream(istream.binsha) as stream:
60+
assert isinstance(stream, OStream)
61+
assert stream.binsha == info.binsha and stream.type == info.type
62+
assert stream.read() == data
6363
# END for each item
6464

6565
assert db.size() == null_objs + ni

Diff for: gitdb/test/db/test_git.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,8 @@ def test_reading(self):
2424
# access should be possible
2525
gitdb_sha = next(gdb.sha_iter())
2626
assert isinstance(gdb.info(gitdb_sha), OInfo)
27-
assert isinstance(gdb.stream(gitdb_sha), OStream)
27+
with gdb.stream(gitdb_sha) as stream:
28+
assert isinstance(gdb.stream(gitdb_sha), OStream)
2829
ni = 50
2930
assert gdb.size() >= ni
3031
sha_list = list(gdb.sha_iter())

Diff for: gitdb/test/db/test_pack.py

+10
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,16 @@
1313

1414
import os
1515
import random
16+
from gitdb.util import mman, HIDE_WINDOWS_KNOWN_ERRORS
1617

1718

1819
class TestPackDB(TestDBBase):
1920

21+
## Unless HIDE_WINDOWS_KNOWN_ERRORS, on Windows fails with:
22+
# File "D:\Work\gitdb.git\gitdb\test\db\test_pack.py", line 41, in test_writing
23+
# os.rename(pack_path, new_pack_path)
24+
# PermissionError: [WinError 32] The process cannot access the file
25+
# because it is being used by another process: 'pack-c0438c19fb16422b6bbcce24387b3264416d485b.packrenamed'
2026
@with_rw_directory
2127
@with_packs_rw
2228
def test_writing(self, path):
@@ -30,6 +36,10 @@ def test_writing(self, path):
3036
# packs removed - rename a file, should affect the glob
3137
pack_path = pdb.entities()[0].pack().path()
3238
new_pack_path = pack_path + "renamed"
39+
## FIXME: Had to manually collect leaked files!!
40+
if HIDE_WINDOWS_KNOWN_ERRORS:
41+
leaked_mmaps = mman.collect()
42+
self.assertEqual(leaked_mmaps, 6)
3343
os.rename(pack_path, new_pack_path)
3444

3545
pdb.update_cache(force=True)

Diff for: gitdb/test/lib.py

+8-1
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
import unittest
1818

1919
from gitdb import OStream
20-
from gitdb.util import rmtree
20+
from gitdb.util import rmtree, mman, HIDE_WINDOWS_KNOWN_ERRORS
2121
from gitdb.utils.compat import xrange
2222

2323

@@ -96,6 +96,13 @@ def wrapper(self):
9696
# memory maps closed, once objects go out of scope. For some reason
9797
# though this is not the case here unless we collect explicitly.
9898
if not keep:
99+
if HIDE_WINDOWS_KNOWN_ERRORS:
100+
## Or else 2 Windows TCs fail with:
101+
# File "D:\Work\gitdb.git\gitdb\util.py", line 141, in onerror
102+
# func(path) # Will scream if still not possible to delete.
103+
# PermissionError: [WinError 32] The process cannot access the file
104+
# because it is being used by another process: 'sss\\index_cc_wll5'
105+
mman.collect()
99106
gc.collect()
100107
rmtree(path)
101108
# END handle exception

Diff for: gitdb/test/performance/test_pack_streaming.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,8 @@ def test_pack_writing(self):
4646
st = time()
4747
for sha in pdb.sha_iter():
4848
count += 1
49-
pdb.stream(sha)
49+
with pdb.stream(sha):
50+
pass
5051
if count == ni:
5152
break
5253
# END gather objects for pack-writing
@@ -55,6 +56,8 @@ def test_pack_writing(self):
5556
(ni, elapsed, ni / (elapsed or 1)), file=sys.stderr)
5657

5758
st = time()
59+
## We are leaking files here, but we don't care...
60+
# and we need a `contextlib.ExitStack` to safely close them.
5861
PackEntity.write_pack((pdb.stream(sha) for sha in pdb.sha_iter()), ostream.write, object_count=ni)
5962
elapsed = time() - st
6063
total_kb = ostream.bytes_written() / 1000

Diff for: gitdb/test/test_example.py

+7-14
Original file line numberDiff line numberDiff line change
@@ -18,26 +18,19 @@ def test_base(self):
1818

1919
for sha1 in ldb.sha_iter():
2020
oinfo = ldb.info(sha1)
21-
ostream = ldb.stream(sha1)
22-
assert oinfo[:3] == ostream[:3]
21+
with ldb.stream(sha1) as ostream:
22+
assert oinfo[:3] == ostream[:3]
2323

24-
assert len(ostream.read()) == ostream.size
24+
assert len(ostream.read()) == ostream.size
2525
assert ldb.has_object(oinfo.binsha)
2626
# END for each sha in database
27-
# assure we close all files
28-
try:
29-
del(ostream)
30-
del(oinfo)
31-
except UnboundLocalError:
32-
pass
33-
# END ignore exception if there are no loose objects
3427

3528
data = "my data".encode("ascii")
3629
istream = IStream("blob", len(data), BytesIO(data))
3730

3831
# the object does not yet have a sha
3932
assert istream.binsha is None
40-
ldb.store(istream)
41-
# now the sha is set
42-
assert len(istream.binsha) == 20
43-
assert ldb.has_object(istream.binsha)
33+
with ldb.store(istream):
34+
# now the sha is set
35+
assert len(istream.binsha) == 20
36+
assert ldb.has_object(istream.binsha)

Diff for: gitdb/test/test_pack.py

+41-36
Original file line numberDiff line numberDiff line change
@@ -88,42 +88,42 @@ def _assert_pack_file(self, pack, version, size):
8888

8989
num_obj = 0
9090
for obj in pack.stream_iter():
91-
num_obj += 1
92-
info = pack.info(obj.pack_offset)
93-
stream = pack.stream(obj.pack_offset)
94-
95-
assert info.pack_offset == stream.pack_offset
96-
assert info.type_id == stream.type_id
97-
assert hasattr(stream, 'read')
98-
99-
# it should be possible to read from both streams
100-
assert obj.read() == stream.read()
101-
102-
streams = pack.collect_streams(obj.pack_offset)
103-
assert streams
104-
105-
# read the stream
106-
try:
107-
dstream = DeltaApplyReader.new(streams)
108-
except ValueError:
109-
# ignore these, old git versions use only ref deltas,
110-
# which we havent resolved ( as we are without an index )
111-
# Also ignore non-delta streams
112-
continue
113-
# END get deltastream
114-
115-
with dstream:
116-
# read all
117-
data = dstream.read()
118-
assert len(data) == dstream.size
119-
120-
# test seek
121-
dstream.seek(0)
122-
assert dstream.read() == data
123-
124-
# read chunks
125-
# NOTE: the current implementation is safe, it basically transfers
126-
# all calls to the underlying memory map
91+
with obj:
92+
num_obj += 1
93+
info = pack.info(obj.pack_offset)
94+
with pack.stream(obj.pack_offset) as stream:
95+
assert info.pack_offset == stream.pack_offset
96+
assert info.type_id == stream.type_id
97+
assert hasattr(stream, 'read')
98+
99+
# it should be possible to read from both streams
100+
assert obj.read() == stream.read()
101+
102+
streams = pack.collect_streams(obj.pack_offset)
103+
assert streams
104+
105+
# read the stream
106+
try:
107+
dstream = DeltaApplyReader.new(streams)
108+
except ValueError:
109+
# ignore these, old git versions use only ref deltas,
110+
# which we havent resolved ( as we are without an index )
111+
# Also ignore non-delta streams
112+
continue
113+
# END get deltastream
114+
115+
with dstream:
116+
# read all
117+
data = dstream.read()
118+
assert len(data) == dstream.size
119+
120+
# test seek
121+
dstream.seek(0)
122+
assert dstream.read() == data
123+
124+
# read chunks
125+
# NOTE: the current implementation is safe, it basically transfers
126+
# all calls to the underlying memory map
127127

128128
# END for each object
129129
assert num_obj == size
@@ -142,6 +142,11 @@ def test_pack(self):
142142
self._assert_pack_file(pack, version, size)
143143
# END for each pack to test
144144

145+
## Unless HIDE_WINDOWS_KNOWN_ERRORS, on Windows fails with:
146+
# File "D:\Work\gitdb.git\gitdb\util.py", line 141, in onerror
147+
# func(path) # Will scream if still not possible to delete.
148+
# PermissionError: [WinError 32] The process cannot access the file
149+
# because it is being used by another process: 'sss\\index_cc_wll5'
145150
@with_rw_directory
146151
def test_pack_entity(self, rw_dir):
147152
pack_objs = list()

Diff for: gitdb/test/test_stream.py

+8-9
Original file line numberDiff line numberDiff line change
@@ -154,13 +154,12 @@ def test_decompress_reader_special_case(self):
154154
mdb = MemoryDB()
155155
for sha in (b'888401851f15db0eed60eb1bc29dec5ddcace911',
156156
b'7bb839852ed5e3a069966281bb08d50012fb309b',):
157-
ostream = odb.stream(hex_to_bin(sha))
158-
159-
# if there is a bug, we will be missing one byte exactly !
160-
data = ostream.read()
161-
assert len(data) == ostream.size
162-
163-
# Putting it back in should yield nothing new - after all, we have
164-
dump = mdb.store(IStream(ostream.type, ostream.size, BytesIO(data)))
165-
assert dump.hexsha == sha
157+
with odb.stream(hex_to_bin(sha)) as ostream:
158+
# if there is a bug, we will be missing one byte exactly !
159+
data = ostream.read()
160+
assert len(data) == ostream.size
161+
162+
# Putting it back in should yield nothing new - after all, we have
163+
dump = mdb.store(IStream(ostream.type, ostream.size, BytesIO(data)))
164+
assert dump.hexsha == sha
166165
# end for each loose object sha to test

0 commit comments

Comments
 (0)