34
34
DOWNLOAD_TIMEOUT = int (os .getenv ('DOWNLOAD_TIMEOUT' , '1800' ))
35
35
REPO_SIZE_FILE = os .getenv ('REPO_SIZE_FILE' , '' )
36
36
37
+ package_info = {}
38
+
37
39
pattern_os_template = re .compile (r"@\{(.+)\}" )
38
40
pattern_package_name = re .compile (r"^Filename: (.+)$" , re .MULTILINE )
39
41
pattern_package_size = re .compile (r"^Size: (\d+)$" , re .MULTILINE )
@@ -58,7 +60,7 @@ def replace_os_template(os_list: List[str]) -> List[str]:
58
60
ret .append (i )
59
61
return ret
60
62
61
- def check_and_download (url : str , dst_file : Path , caching = False )-> int :
63
+ def check_and_download (url : str , dst_file : Path , rename = True , caching = False )-> int :
62
64
try :
63
65
if caching :
64
66
if url in download_cache :
@@ -99,6 +101,47 @@ def mkdir_with_dot_tmp(folder: Path)->Tuple[Path, Path]:
99
101
tmpdir .mkdir (parents = True , exist_ok = True )
100
102
return (folder , tmpdir )
101
103
104
+ def CalcFileSha256 (filname ):
105
+ ''' calculate file sha256 '''
106
+ with open (filname , "rb" ) as f :
107
+ sha256obj = hashlib .sha256 ()
108
+ sha256obj .update (f .read ())
109
+ hash_value = sha256obj .hexdigest ()
110
+ return hash_value
111
+
112
+ def flush_package_info (content ):
113
+
114
+ global package_info
115
+
116
+ for pkg in content .split ('\n \n ' ):
117
+ if len (pkg ) < 10 : # ignore blanks
118
+ continue
119
+ try :
120
+ pkg_filename = pattern_package_name .search (pkg ).group (1 )
121
+ pkg_size = int (pattern_package_size .search (pkg ).group (1 ))
122
+ pkg_checksum = pattern_package_sha256 .search (pkg ).group (1 )
123
+ if pkg_filename not in package_info :
124
+ pkg_info = {
125
+ 'size' : pkg_size ,
126
+ 'sha256' : {
127
+ 'new' : pkg_checksum ,
128
+ 'old' : None
129
+ }
130
+ }
131
+ else :
132
+ pkg_info = package_info [pkg_filename ]
133
+ pkg_info ['size' ] = pkg_size
134
+ if pkg_info ['sha256' ]['new' ] != None and pkg_info ['sha256' ]['old' ] == None :
135
+ pkg_info ['sha256' ]['old' ] = pkg_info ['sha256' ]['new' ]
136
+ pkg_info ['sha256' ]['new' ] = pkg_checksum
137
+ package_info .update ({
138
+ pkg_filename : pkg_info
139
+ })
140
+ except :
141
+ print ("Failed to parse one package description" , flush = True )
142
+ traceback .print_exc ()
143
+ return 1
144
+
102
145
def move_files_in (src : Path , dst : Path ):
103
146
empty = True
104
147
for file in src .glob ('*' ):
@@ -110,6 +153,9 @@ def move_files_in(src: Path, dst: Path):
110
153
print (f"{ src } is empty" )
111
154
112
155
def apt_mirror (base_url : str , dist : str , repo : str , arch : str , dest_base_dir : Path , deb_set : Dict [str , int ])-> int :
156
+
157
+ global package_info
158
+
113
159
if not dest_base_dir .is_dir ():
114
160
print ("Destination directory is empty, cannot continue" )
115
161
return 1
@@ -134,6 +180,8 @@ def apt_mirror(base_url: str, dist: str, repo: str, arch: str, dest_base_dir: Pa
134
180
pkgidx_dir ,pkgidx_tmp_dir = mkdir_with_dot_tmp (comp_dir / arch_dir )
135
181
with open (release_file , "r" ) as fd :
136
182
pkgidx_content = None
183
+ pkgidx_file_old = None
184
+ package_info = {}
137
185
cnt_start = False
138
186
for line in fd :
139
187
if cnt_start :
@@ -164,6 +212,8 @@ def apt_mirror(base_url: str, dist: str, repo: str, arch: str, dest_base_dir: Pa
164
212
pkgidx_file .unlink ()
165
213
continue
166
214
if pkgidx_content is None and pkgidx_file .stem == 'Packages' :
215
+ pkgidx_file_old = Path (f'{ dist_dir } /{ filename } .old' )
216
+ shutil .copy (pkgidx_file , pkgidx_file_old )
167
217
print (f"getting packages index content from { pkgidx_file .name } " , flush = True )
168
218
suffix = pkgidx_file .suffix
169
219
if suffix == '.xz' :
@@ -176,12 +226,43 @@ def apt_mirror(base_url: str, dist: str, repo: str, arch: str, dest_base_dir: Pa
176
226
pkgidx_content = content .decode ('utf-8' )
177
227
else :
178
228
print ("unsupported format" )
229
+ continue
230
+
231
+ flush_package_info (pkgidx_content )
232
+
233
+ with pkgidx_file_old .open ('rb' ) as t : content = t .read ()
234
+ if len (content ) != int (filesize ):
235
+ print (f"Invalid size of { pkgidx_file } , expected { filesize } , skipped" )
236
+ pkgidx_file .unlink ()
237
+ continue
238
+ if hashlib .sha256 (content ).hexdigest () != checksum :
239
+ print (f"Invalid checksum of { pkgidx_file } , expected { checksum } , skipped" )
240
+ pkgidx_file .unlink ()
241
+ continue
242
+ if pkgidx_file_old .stem == 'Packages' :
243
+ print (f"getting packages index content from { pkgidx_file_old .name } " , flush = True )
244
+ suffix = pkgidx_file_old .suffix
245
+ if suffix == '.xz.old' :
246
+ pkgidx_content_old = lzma .decompress (content ).decode ('utf-8' )
247
+ elif suffix == '.bz2.old' :
248
+ pkgidx_content_old = bz2 .decompress (content ).decode ('utf-8' )
249
+ elif suffix == '.gz.old' :
250
+ pkgidx_content_old = gzip .decompress (content ).decode ('utf-8' )
251
+ elif suffix == '.old' :
252
+ pkgidx_content_old = content .decode ('utf-8' )
253
+ else :
254
+ print ("unsupported format" )
255
+ continue
256
+
257
+ flush_package_info (pkgidx_content_old )
258
+
179
259
180
260
# Currently only support SHA-256 checksum, because
181
261
# "Clients may not use the MD5Sum and SHA1 fields for security purposes, and must require a SHA256 or a SHA512 field."
182
262
# from https://wiki.debian.org/DebianRepository/Format#A.22Release.22_files
183
263
if line .startswith ('SHA256:' ):
184
264
cnt_start = True
265
+
185
266
if not cnt_start :
186
267
print ("Cannot find SHA-256 checksum" )
187
268
return 1
@@ -216,18 +297,9 @@ def collect_tmp_dir():
216
297
err = 0
217
298
deb_count = 0
218
299
deb_size = 0
219
- for pkg in pkgidx_content .split ('\n \n ' ):
220
- if len (pkg ) < 10 : # ignore blanks
221
- continue
222
- try :
223
- pkg_filename = pattern_package_name .search (pkg ).group (1 )
224
- pkg_size = int (pattern_package_size .search (pkg ).group (1 ))
225
- pkg_checksum = pattern_package_sha256 .search (pkg ).group (1 )
226
- except :
227
- print ("Failed to parse one package description" , flush = True )
228
- traceback .print_exc ()
229
- err = 1
230
- continue
300
+ for pkg_filename , pkg_info in package_info .items ():
301
+ pkg_size = pkg_info ['size' ]
302
+ pkg_checksum = pkg_info ['sha256' ]
231
303
deb_count += 1
232
304
deb_size += pkg_size
233
305
@@ -237,8 +309,8 @@ def collect_tmp_dir():
237
309
dest_dir .mkdir (parents = True , exist_ok = True )
238
310
if dest_filename .suffix == '.deb' :
239
311
deb_set [str (dest_filename .relative_to (dest_base_dir ))] = pkg_size
240
- if dest_filename .is_file () and dest_filename .stat ().st_size == pkg_size :
241
- print (f"Skipping { pkg_filename } , size { pkg_size } " )
312
+ if dest_filename .is_file () and ( dest_filename .stat ().st_size == pkg_size and pkg_checksum [ 'old' ] == pkg_checksum [ 'new' ]) :
313
+ print (f"Skipping { pkg_filename } , size { pkg_size } , old sha256 { pkg_checksum [ 'old' ] } , new sha256 { pkg_checksum [ 'new' ] } " )
242
314
continue
243
315
244
316
pkg_url = f"{ base_url } /{ pkg_filename } "
@@ -253,8 +325,8 @@ def collect_tmp_dir():
253
325
with dest_tmp_filename .open ("rb" ) as f :
254
326
for block in iter (lambda : f .read (1024 ** 2 ), b"" ):
255
327
sha .update (block )
256
- if sha .hexdigest () != pkg_checksum :
257
- print (f"Invalid checksum of { dest_filename } , expected { pkg_checksum } " )
328
+ if sha .hexdigest () != pkg_checksum [ 'new' ] :
329
+ print (f"Invalid checksum of { dest_filename } , expected { pkg_checksum [ 'new' ] } " )
258
330
dest_tmp_filename .unlink ()
259
331
continue
260
332
dest_tmp_filename .rename (dest_filename )
0 commit comments