Skip to content

Commit 538cc1b

Browse files
authored
Merge pull request #161 from pycompression/large-piped
Fix reading from a large compressed file using external process
2 parents 3e0c4b0 + 74642c5 commit 538cc1b

File tree

3 files changed

+15
-10
lines changed

3 files changed

+15
-10
lines changed

src/xopen/__init__.py

+9-3
Original file line numberDiff line numberDiff line change
@@ -267,6 +267,7 @@ def _open_process(self):
267267
# data continuously to the process stdin on another thread.
268268
self.in_thread = threading.Thread(target=self._feed_pipe)
269269
self.in_thread.start()
270+
self._process_explicitly_terminated = False
270271
self._file: BinaryIO = self.process.stdout # type: ignore
271272
self._wait_for_output_or_process_exit()
272273
self._raise_if_error()
@@ -290,7 +291,11 @@ def _feed_pipe(self):
290291
if chunk == b"":
291292
self.in_pipe.close()
292293
return
293-
self.in_pipe.write(chunk)
294+
try:
295+
self.in_pipe.write(chunk)
296+
except BrokenPipeError:
297+
if not self._process_explicitly_terminated:
298+
raise
294299
finally:
295300
self.in_pipe.close()
296301

@@ -329,14 +334,15 @@ def close(self) -> None:
329334
return
330335
check_allowed_code_and_message = False
331336
if "r" in self._mode:
332-
self._feeding = False
333-
self._file.read()
334337
retcode = self.process.poll()
335338
if retcode is None:
336339
# still running
340+
self._process_explicitly_terminated = True
337341
self.process.terminate()
338342
check_allowed_code_and_message = True
339343
self.process.wait()
344+
self._feeding = False
345+
self._file.read()
340346
if self.in_thread:
341347
self.in_thread.join()
342348
self._file.close()

tests/conftest.py

+6-4
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,12 @@
1010
def create_large_file(tmp_path):
1111
def _create_large_file(extension):
1212
path = tmp_path / f"large{extension}"
13-
random_text = "".join(random.choices(string.ascii_lowercase, k=1024))
14-
# Make the text a lot bigger in order to ensure that it is larger than the
15-
# pipe buffer size.
16-
random_text *= 2048
13+
random.seed(0)
14+
chars = string.ascii_lowercase + "\n"
15+
# Do not decrease this length. The generated file needs to have
16+
# a certain length after compression to trigger some bugs
17+
# (in particular, 512 kB is not sufficient).
18+
random_text = "".join(random.choices(chars, k=1024 * 1024))
1719
with xopen(path, "w") as f:
1820
f.write(random_text)
1921
return path

tests/test_piped.py

-3
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
import os
77
import shutil
88
import sys
9-
import time
109
import pytest
1110
from pathlib import Path
1211
from itertools import cycle
@@ -189,8 +188,6 @@ def test_reader_close(reader, create_large_file):
189188
large_file, "rb", program_settings=program_settings
190189
) as f:
191190
f.readline()
192-
time.sleep(0.2)
193-
# The subprocess should be properly terminated now
194191

195192

196193
def test_invalid_gzip_compression_level(gzip_writer, tmp_path):

0 commit comments

Comments
 (0)