16
16
from openeo .internal .warnings import deprecated , legacy_alias
17
17
from openeo .rest import (
18
18
DEFAULT_DOWNLOAD_CHUNK_SIZE ,
19
+ DEFAULT_DOWNLOAD_RANGE_SIZE ,
19
20
DEFAULT_JOB_STATUS_POLL_CONNECTION_RETRY_INTERVAL ,
20
21
DEFAULT_JOB_STATUS_POLL_INTERVAL_MAX ,
21
22
DEFAULT_JOB_STATUS_POLL_SOFT_ERROR_MAX ,
36
37
37
38
38
39
DEFAULT_JOB_RESULTS_FILENAME = "job-results.json"
39
- MAX_RETRIES_PER_CHUNK = 3
40
+ MAX_RETRIES_PER_RANGE = 3
40
41
RETRIABLE_STATUSCODES = [408 , 429 , 500 , 501 , 502 , 503 , 504 ]
41
42
42
43
class BatchJob :
@@ -384,7 +385,7 @@ def __repr__(self):
384
385
)
385
386
386
387
def download (
387
- self , target : Optional [Union [Path , str ]] = None , * , chunk_size : int = DEFAULT_DOWNLOAD_CHUNK_SIZE
388
+ self , target : Optional [Union [Path , str ]] = None , * , chunk_size : int = DEFAULT_DOWNLOAD_CHUNK_SIZE , range_size : int = DEFAULT_DOWNLOAD_RANGE_SIZE
388
389
) -> Path :
389
390
"""
390
391
Download asset to given location
@@ -399,7 +400,7 @@ def download(
399
400
target = target / self .name
400
401
ensure_dir (target .parent )
401
402
logger .info ("Downloading Job result asset {n!r} from {h!s} to {t!s}" .format (n = self .name , h = self .href , t = target ))
402
- self ._download_to_file (url = self .href , target = target , chunk_size = chunk_size )
403
+ self ._download_to_file (url = self .href , target = target , chunk_size = chunk_size , range_size = range_size )
403
404
return target
404
405
405
406
def _get_response (self , stream = True ) -> requests .Response :
@@ -418,26 +419,26 @@ def load_bytes(self) -> bytes:
418
419
# TODO: more `load` methods e.g.: load GTiff asset directly as numpy array
419
420
420
421
421
- def _download_to_file (self , url : str , target : Path , chunk_size : int ):
422
+ def _download_to_file (self , url : str , target : Path , * , chunk_size : int = DEFAULT_DOWNLOAD_CHUNK_SIZE , range_size : int = DEFAULT_DOWNLOAD_RANGE_SIZE ):
422
423
head = self .job .connection .head (url , stream = True )
423
- if head .ok and head .headers .get ("Accept-Ranges" ) == "bytes" :
424
+ if head .ok and head .headers .get ("Accept-Ranges" ) == "bytes" and 'Content-Length' in head . headers :
424
425
file_size = int (head .headers ['Content-Length' ])
425
- self ._download_chunked (url = url , target = target , file_size = file_size , chunk_size = chunk_size )
426
+ self ._download_ranged (url = url , target = target , file_size = file_size , chunk_size = chunk_size , range_size = range_size )
426
427
else :
427
- self ._download_unchunked (url = url , target = target )
428
+ self ._download_all_at_once (url = url , target = target , chunk_size = chunk_size )
428
429
429
430
430
- def _download_chunked (self , url : str , target : Path , file_size : int , chunk_size : int ):
431
+ def _download_ranged (self , url : str , target : Path , file_size : int , * , chunk_size : int = DEFAULT_DOWNLOAD_CHUNK_SIZE , range_size : int = DEFAULT_DOWNLOAD_RANGE_SIZE ):
431
432
with target .open ('wb' ) as f :
432
- for from_byte_index in range (0 , file_size , chunk_size ):
433
- to_byte_index = min (from_byte_index + chunk_size - 1 , file_size - 1 )
434
- tries_left = MAX_RETRIES_PER_CHUNK
433
+ for from_byte_index in range (0 , file_size , range_size ):
434
+ to_byte_index = min (from_byte_index + range_size - 1 , file_size - 1 )
435
+ tries_left = MAX_RETRIES_PER_RANGE
435
436
while tries_left > 0 :
436
437
try :
437
438
range_headers = {"Range" : f"bytes={ from_byte_index } -{ to_byte_index } " }
438
439
with self .job .connection .get (path = url , headers = range_headers , stream = True ) as r :
439
440
r .raise_for_status ()
440
- shutil .copyfileobj (r .raw , f )
441
+ shutil .copyfileobj (fsrc = r .raw , fdst = f , length = chunk_size )
441
442
break
442
443
except OpenEoApiPlainError as error :
443
444
tries_left -= 1
@@ -448,11 +449,11 @@ def _download_chunked(self, url: str, target: Path, file_size: int, chunk_size:
448
449
raise error
449
450
450
451
451
- def _download_unchunked (self , url : str , target : Path ):
452
+ def _download_all_at_once (self , url : str , target : Path , * , chunk_size : int = DEFAULT_DOWNLOAD_CHUNK_SIZE ):
452
453
with self .job .connection .get (path = url , stream = True ) as r :
453
454
r .raise_for_status ()
454
455
with target .open ("wb" ) as f :
455
- shutil .copyfileobj (r .raw , f )
456
+ shutil .copyfileobj (fsrc = r .raw , fdst = f , length = chunk_size )
456
457
457
458
458
459
class MultipleAssetException (OpenEoClientException ):
@@ -532,7 +533,7 @@ def get_asset(self, name: str = None) -> ResultAsset:
532
533
"No asset {n!r} in: {a}" .format (n = name , a = [a .name for a in assets ])
533
534
)
534
535
535
- def download_file (self , target : Union [Path , str ] = None , name : str = None , chunk_size = DEFAULT_DOWNLOAD_CHUNK_SIZE ) -> Path :
536
+ def download_file (self , target : Union [Path , str ] = None , name : str = None , * , chunk_size = DEFAULT_DOWNLOAD_CHUNK_SIZE , range_size : int = DEFAULT_DOWNLOAD_RANGE_SIZE ) -> Path :
536
537
"""
537
538
Download single asset. Can be used when there is only one asset in the
538
539
:py:class:`JobResults`, or when the desired asset name is given explicitly.
@@ -544,7 +545,7 @@ def download_file(self, target: Union[Path, str] = None, name: str = None, chunk
544
545
:return: path of downloaded asset
545
546
"""
546
547
try :
547
- return self .get_asset (name = name ).download (target = target , chunk_size = chunk_size )
548
+ return self .get_asset (name = name ).download (target = target , chunk_size = chunk_size , range_size = range_size )
548
549
except MultipleAssetException :
549
550
raise OpenEoClientException (
550
551
"Can not use `download_file` with multiple assets. Use `download_files` instead." )
0 commit comments