1
+ from inventory import FileState
1
2
from treehash import TreeHash
2
3
import botocore
3
4
import cli
7
8
8
9
class FileUpload :
9
10
10
- def __init__ (self , vaultName , filePath ):
11
- self ._filePath = filePath
11
+ def __init__ (self , vaultName , inventory_entry ):
12
+ self ._startTime = 0
12
13
self ._vaultName = vaultName
13
- self ._fileName = os .path .basename (filePath )
14
- self ._fileSizeBytes = os .path .getsize (self ._filePath )
15
- self ._partSize = get_best_part_size (self ._fileSizeBytes )
16
- self ._partNumUploading = 0
14
+ self ._inventory_entry = inventory_entry
15
+ self ._fileSizeBytes = os .path .getsize (
16
+ self ._inventory_entry .get_filePath ())
17
+
18
+ if inventory_entry .get_state () == FileState .IN_PROGRESS :
19
+ self ._upload_id = inventory_entry .get_upload_id ()
20
+ self ._partSize = inventory_entry .get_part_size ()
21
+ self ._partNumUploading = inventory_entry .get_parts_uploaded ()
22
+ else :
23
+ self ._partSize = self ._get_best_part_size (self ._fileSizeBytes )
24
+ self ._partNumUploading = 0
25
+
26
+ def get_state (self ):
27
+ return self ._inventory_entry .get_state ()
28
+
29
+ def get_parts_uploaded (self ):
30
+ return self ._partNumUploading
31
+
32
+ def get_part_size (self ):
33
+ return self ._partSize
34
+
35
+ def get_upload_id (self ):
36
+ return self ._upload_id
37
+
38
+ def get_end_time (self ):
39
+ return self ._endTime
40
+
41
+ def get_checksum (self ):
42
+ return self ._checksum
43
+
44
+ def get_http_status (self ):
45
+ return self ._http_status
46
+
47
+ def get_archive_id (self ):
48
+ return self ._archive_id
49
+
50
+ def get_upload_location (self ):
51
+ return self ._upload_location
17
52
18
53
def formattedFileSize (self ):
19
54
if not hasattr (self , '_formattedFileSize' ):
@@ -27,15 +62,25 @@ def formattedPartSize(self):
27
62
28
63
def upload (self , client ):
29
64
30
- self ._upload = client .initiate_multipart_upload (
31
- vaultName = self ._vaultName ,
32
- archiveDescription = self ._fileName ,
33
- partSize = str (self ._partSize ))
34
-
35
- treehash = TreeHash ()
36
- partBegin = 0
37
- self ._partNumUploading = 0
38
- with open (self ._filePath , "rb" ) as f :
65
+ if (self ._inventory_entry .get_state () == FileState .IN_PROGRESS ):
66
+ self ._upload_id = self ._inventory_entry .get_upload_id ()
67
+ else :
68
+ tmp_upload = client .initiate_multipart_upload (
69
+ vaultName = self ._vaultName ,
70
+ archiveDescription = self ._inventory_entry .get_fileName (),
71
+ partSize = str (self ._partSize ))
72
+ self ._upload_id = tmp_upload ['uploadId' ]
73
+
74
+ if self ._partSize < self ._fileSizeBytes :
75
+ self ._inventory_entry .set_state_from_upload (
76
+ self , FileState .IN_PROGRESS )
77
+
78
+ partBegin = self ._partNumUploading * self ._partSize
79
+ data = b""
80
+ with open (self ._inventory_entry .get_filePath (), "rb" ) as f :
81
+ if partBegin :
82
+ data = f .read (partBegin )
83
+ treehash = TreeHash (data = data , block_size = self ._partSize )
39
84
while partBegin < self ._fileSizeBytes :
40
85
partEnd = partBegin + self ._partSize - 1
41
86
if partEnd > self ._fileSizeBytes :
@@ -44,65 +89,84 @@ def upload(self, client):
44
89
part = f .read (self ._partSize )
45
90
treehash .update (part )
46
91
47
- if partBegin == 0 :
92
+ if not self . _startTime :
48
93
self ._startTime = time .time ()
94
+
49
95
self ._upload_part (client , part , partBegin , partEnd )
50
96
partBegin = partEnd + 1
51
97
self ._partNumUploading += 1
52
98
99
+ if partEnd < self ._fileSizeBytes :
100
+ self ._inventory_entry .set_state_from_upload (
101
+ self , FileState .IN_PROGRESS )
102
+
103
+ completed_treehash = treehash .hexdigest ()
53
104
response = client .complete_multipart_upload (
54
105
vaultName = self ._vaultName ,
55
- uploadId = self ._upload [ 'uploadId' ] ,
106
+ uploadId = self ._upload_id ,
56
107
archiveSize = str (self ._fileSizeBytes ),
57
- checksum = treehash .hexdigest ())
108
+ checksum = completed_treehash )
109
+
110
+ self ._endTime = time .time ()
58
111
59
- cli .cli_progress (self ._fileName ,
112
+ cli .cli_progress (self ._inventory_entry . get_fileName () ,
60
113
self .formattedFileSize (),
61
114
self .formattedPartSize (),
62
115
self ._startTime ,
63
116
self ._fileSizeBytes - 1 ,
64
117
self ._fileSizeBytes - 1 )
65
118
66
- return response
119
+ # Sanity check that's probably unnecessary.
120
+ if treehash .hexdigest () != response ['checksum' ]:
121
+ raise Exception ('checksum mismatch' )
122
+
123
+ self ._checksum = response ['checksum' ]
124
+ self ._http_status = response ['ResponseMetadata' ]['HTTPStatusCode' ]
125
+ self ._archive_id = response ['archiveId' ]
126
+ self ._upload_location = response ['location' ]
127
+ # cli.pp(json.dumps(self, default=lambda o: o.__dict__))
128
+
129
+ self ._inventory_entry .set_state_from_upload (self , FileState .UPLOADED )
67
130
68
131
def _upload_part (self ,
69
132
client ,
70
133
part ,
71
134
partBegin ,
72
135
partEnd ):
73
136
74
- cli .cli_progress (self ._fileName ,
137
+ cli .cli_progress (self ._inventory_entry . get_fileName () ,
75
138
self .formattedFileSize (),
76
139
self .formattedPartSize (),
77
140
self ._startTime ,
78
141
partBegin ,
79
142
self ._fileSizeBytes - 1 )
80
143
81
144
for upload_attempt in range (0 , 2 ):
82
- # print 'Uploading bytes %d through %d (%d%%)...' % (
83
- # partBegin, partEnd,
84
- # float(partEnd)/(self._fileSizeBytes-1)*100)
145
+ print '\n Uploading bytes %d through %d (%d%%)...' % (
146
+ partBegin , partEnd ,
147
+ float (partEnd )/ (self ._fileSizeBytes - 1 )* 100 )
85
148
try :
86
149
response = client .upload_multipart_part (
87
150
vaultName = self ._vaultName ,
88
- uploadId = self ._upload [ 'uploadId' ] ,
151
+ uploadId = self ._upload_id ,
89
152
range = 'bytes %d-%d/*' % (partBegin , partEnd ),
90
153
body = part )
91
154
return response
155
+
92
156
except botocore .exceptions .ClientError , e :
93
- print "\n " + e
157
+ print "\n "
158
+ print e
94
159
print "Retrying..."
95
160
96
161
print "\n FAILED"
97
162
98
-
99
- def get_best_part_size (fileSizeBytes ):
100
- # We want the smallest possible part size. Maximum parts is 10,000.
101
- # So we find the first part size larger than file_len/10,000.
102
- targetSize = fileSizeBytes / 10000
103
- partSize = 1048576 # min size 1 MB
104
- while partSize < targetSize :
105
- partSize *= 2
106
- if partSize > targetSize or partSize == 4294967296 : # max size 4GB
107
- break
108
- return partSize
163
+ def _get_best_part_size (self , fileSizeBytes ):
164
+ # We want the smallest possible part size. Maximum parts is 10,000.
165
+ # So we find the first part size larger than file_len/10,000.
166
+ targetSize = fileSizeBytes / 10000
167
+ partSize = 1048576 # min size 1 MB
168
+ while partSize < targetSize :
169
+ partSize *= 2
170
+ if partSize > targetSize or partSize == 4294967296 : # max size 4GB
171
+ break
172
+ return partSize
0 commit comments