Skip to content

Commit ff50b8b

Browse files
authored
Merge pull request #476 from Azure/dev
1.3.1 release for blob and file
2 parents 4b8dabf + 3103336 commit ff50b8b

File tree

592 files changed

+143010
-6110
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

592 files changed

+143010
-6110
lines changed

.github/ISSUE_TEMPLATE.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
### Which service(blob, file, queue) does this issue concern?
22

33

4+
### Which version of the SDK was used? Please provide the output of `pip freeze`.
5+
6+
47
### What problem was encountered?
58

69

azure-storage-blob/ChangeLog.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,10 @@
22

33
> See [BreakingChanges](BreakingChanges.md) for a detailed list of API breaks.
44
5+
## Version 1.3.1:
6+
- Fixed design flaw where get_blob_to_* methods buffer entire blob when max_connections is set to 1.
7+
- Added support for access conditions on append_blob_from_* methods.
8+
59
## Version 1.3.0:
610

711
- Support for 2018-03-28 REST version. Please see our REST API documentation and blog for information about the related added features.

azure-storage-blob/azure/storage/blob/_constants.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
# --------------------------------------------------------------------------
66

77
__author__ = 'Microsoft Corp. <[email protected]>'
8-
__version__ = '1.3.0'
8+
__version__ = '1.3.1'
99

1010
# x-ms-version for storage service.
1111
X_MS_VERSION = '2018-03-28'

azure-storage-blob/azure/storage/blob/_download_chunking.py

Lines changed: 72 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -5,18 +5,16 @@
55
# --------------------------------------------------------------------------
66
import threading
77

8-
from azure.storage.common._error import _ERROR_NO_SINGLE_THREAD_CHUNKING
9-
108

119
def _download_blob_chunks(blob_service, container_name, blob_name, snapshot,
1210
download_size, block_size, progress, start_range, end_range,
1311
stream, max_connections, progress_callback, validate_content,
1412
lease_id, if_modified_since, if_unmodified_since, if_match,
1513
if_none_match, timeout, operation_context):
16-
if max_connections <= 1:
17-
raise ValueError(_ERROR_NO_SINGLE_THREAD_CHUNKING.format('blob'))
1814

19-
downloader = _BlobChunkDownloader(
15+
downloader_class = _ParallelBlobChunkDownloader if max_connections > 1 else _SequentialBlobChunkDownloader
16+
17+
downloader = downloader_class(
2018
blob_service,
2119
container_name,
2220
blob_name,
@@ -38,35 +36,42 @@ def _download_blob_chunks(blob_service, container_name, blob_name, snapshot,
3836
operation_context,
3937
)
4038

41-
import concurrent.futures
42-
executor = concurrent.futures.ThreadPoolExecutor(max_connections)
43-
result = list(executor.map(downloader.process_chunk, downloader.get_chunk_offsets()))
39+
if max_connections > 1:
40+
import concurrent.futures
41+
executor = concurrent.futures.ThreadPoolExecutor(max_connections)
42+
list(executor.map(downloader.process_chunk, downloader.get_chunk_offsets()))
43+
else:
44+
for chunk in downloader.get_chunk_offsets():
45+
downloader.process_chunk(chunk)
4446

4547

4648
class _BlobChunkDownloader(object):
4749
def __init__(self, blob_service, container_name, blob_name, snapshot, download_size,
4850
chunk_size, progress, start_range, end_range, stream,
4951
progress_callback, validate_content, lease_id, if_modified_since,
5052
if_unmodified_since, if_match, if_none_match, timeout, operation_context):
53+
# identifiers for the blob
5154
self.blob_service = blob_service
5255
self.container_name = container_name
5356
self.blob_name = blob_name
5457
self.snapshot = snapshot
55-
self.chunk_size = chunk_size
5658

59+
# information on the download range/chunk size
60+
self.chunk_size = chunk_size
5761
self.download_size = download_size
5862
self.start_index = start_range
5963
self.blob_end = end_range
6064

65+
# the destination that we will write to
6166
self.stream = stream
62-
self.stream_start = stream.tell()
63-
self.stream_lock = threading.Lock()
67+
68+
# progress related
6469
self.progress_callback = progress_callback
6570
self.progress_total = progress
66-
self.progress_lock = threading.Lock()
71+
72+
# parameters for each get blob operation
6773
self.timeout = timeout
6874
self.operation_context = operation_context
69-
7075
self.validate_content = validate_content
7176
self.lease_id = lease_id
7277
self.if_modified_since = if_modified_since
@@ -92,17 +97,13 @@ def process_chunk(self, chunk_start):
9297
self._write_to_stream(chunk_data, chunk_start)
9398
self._update_progress(length)
9499

100+
# should be provided by the subclass
95101
def _update_progress(self, length):
96-
if self.progress_callback is not None:
97-
with self.progress_lock:
98-
self.progress_total += length
99-
total = self.progress_total
100-
self.progress_callback(total, self.download_size)
102+
pass
101103

104+
# should be provided by the subclass
102105
def _write_to_stream(self, chunk_data, chunk_start):
103-
with self.stream_lock:
104-
self.stream.seek(self.stream_start + (chunk_start - self.start_index))
105-
self.stream.write(chunk_data)
106+
pass
106107

107108
def _download_chunk(self, chunk_start, chunk_end):
108109
response = self.blob_service._get_blob(
@@ -125,3 +126,53 @@ def _download_chunk(self, chunk_start, chunk_end):
125126
# that subsequent downloads are to an unmodified blob
126127
self.if_match = response.properties.etag
127128
return response
129+
130+
131+
class _ParallelBlobChunkDownloader(_BlobChunkDownloader):
132+
def __init__(self, blob_service, container_name, blob_name, snapshot, download_size,
133+
chunk_size, progress, start_range, end_range, stream,
134+
progress_callback, validate_content, lease_id, if_modified_since,
135+
if_unmodified_since, if_match, if_none_match, timeout, operation_context):
136+
137+
super(_ParallelBlobChunkDownloader, self).__init__(blob_service, container_name, blob_name, snapshot,
138+
download_size,
139+
chunk_size, progress, start_range, end_range, stream,
140+
progress_callback, validate_content, lease_id,
141+
if_modified_since,
142+
if_unmodified_since, if_match, if_none_match, timeout,
143+
operation_context)
144+
145+
# for a parallel download, the stream is always seekable, so we note down the current position
146+
# in order to seek to the right place when out-of-order chunks come in
147+
self.stream_start = stream.tell()
148+
149+
# since parallel operations are going on
150+
# it is essential to protect the writing and progress reporting operations
151+
self.stream_lock = threading.Lock()
152+
self.progress_lock = threading.Lock()
153+
154+
def _update_progress(self, length):
155+
if self.progress_callback is not None:
156+
with self.progress_lock:
157+
self.progress_total += length
158+
total_so_far = self.progress_total
159+
self.progress_callback(total_so_far, self.download_size)
160+
161+
def _write_to_stream(self, chunk_data, chunk_start):
162+
with self.stream_lock:
163+
self.stream.seek(self.stream_start + (chunk_start - self.start_index))
164+
self.stream.write(chunk_data)
165+
166+
167+
class _SequentialBlobChunkDownloader(_BlobChunkDownloader):
168+
def __init__(self, *args):
169+
super(_SequentialBlobChunkDownloader, self).__init__(*args)
170+
171+
def _update_progress(self, length):
172+
if self.progress_callback is not None:
173+
self.progress_total += length
174+
self.progress_callback(self.progress_total, self.download_size)
175+
176+
def _write_to_stream(self, chunk_data, chunk_start):
177+
# chunk_start is ignored in the case of sequential download since we cannot seek the destination stream
178+
self.stream.write(chunk_data)

azure-storage-blob/azure/storage/blob/_upload_chunking.py

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,29 +4,31 @@
44
# license information.
55
# --------------------------------------------------------------------------
66
from io import (BytesIO, IOBase, SEEK_CUR, SEEK_END, SEEK_SET, UnsupportedOperation)
7-
from math import ceil
87
from threading import Lock
98

9+
from math import ceil
10+
1011
from azure.storage.common._common_conversion import _encode_base64
1112
from azure.storage.common._error import _ERROR_VALUE_SHOULD_BE_SEEKABLE_STREAM
1213
from azure.storage.common._serialization import (
1314
url_quote,
1415
_get_data_bytes_only,
1516
_len_plus
1617
)
18+
from ._constants import (
19+
_LARGE_BLOB_UPLOAD_MAX_READ_BUFFER_SIZE
20+
)
1721
from ._encryption import (
1822
_get_blob_encryptor_and_padder,
1923
)
2024
from .models import BlobBlock
21-
from ._constants import (
22-
_LARGE_BLOB_UPLOAD_MAX_READ_BUFFER_SIZE
23-
)
2425

2526

2627
def _upload_blob_chunks(blob_service, container_name, blob_name,
2728
blob_size, block_size, stream, max_connections,
2829
progress_callback, validate_content, lease_id, uploader_class,
29-
maxsize_condition=None, if_match=None, timeout=None,
30+
maxsize_condition=None, if_modified_since=None, if_unmodified_since=None, if_match=None,
31+
if_none_match=None, timeout=None,
3032
content_encryption_key=None, initialization_vector=None, resource_properties=None):
3133
encryptor, padder = _get_blob_encryptor_and_padder(content_encryption_key, initialization_vector,
3234
uploader_class is not _PageBlobChunkUploader)
@@ -49,9 +51,14 @@ def _upload_blob_chunks(blob_service, container_name, blob_name,
4951

5052
uploader.maxsize_condition = maxsize_condition
5153

52-
# ETag matching does not work with parallelism as a ranged upload may start
53-
# before the previous finishes and provides an etag
54-
uploader.if_match = if_match if not max_connections > 1 else None
54+
# Access conditions do not work with parallelism
55+
if max_connections > 1:
56+
uploader.if_match = uploader.if_none_match = uploader.if_modified_since = uploader.if_unmodified_since = None
57+
else:
58+
uploader.if_match = if_match
59+
uploader.if_none_match = if_none_match
60+
uploader.if_modified_since = if_modified_since
61+
uploader.if_unmodified_since = if_unmodified_since
5562

5663
if progress_callback is not None:
5764
progress_callback(0, blob_size)
@@ -322,6 +329,10 @@ def _upload_chunk(self, chunk_offset, chunk_data):
322329
lease_id=self.lease_id,
323330
maxsize_condition=self.maxsize_condition,
324331
timeout=self.timeout,
332+
if_modified_since=self.if_modified_since,
333+
if_unmodified_since=self.if_unmodified_since,
334+
if_match=self.if_match,
335+
if_none_match=self.if_none_match
325336
)
326337

327338
self.current_length = resp.append_offset

0 commit comments

Comments
 (0)