Skip to content

Commit 5d7a383

Browse files
committed
Add sha256 validation
1 parent 3fd629e commit 5d7a383

File tree

1 file changed

+89
-17
lines changed

1 file changed

+89
-17
lines changed

apt-sync.py

+89-17
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,8 @@
3434
DOWNLOAD_TIMEOUT=int(os.getenv('DOWNLOAD_TIMEOUT', '1800'))
3535
REPO_SIZE_FILE = os.getenv('REPO_SIZE_FILE', '')
3636

37+
package_info = {}
38+
3739
pattern_os_template = re.compile(r"@\{(.+)\}")
3840
pattern_package_name = re.compile(r"^Filename: (.+)$", re.MULTILINE)
3941
pattern_package_size = re.compile(r"^Size: (\d+)$", re.MULTILINE)
@@ -58,7 +60,7 @@ def replace_os_template(os_list: List[str]) -> List[str]:
5860
ret.append(i)
5961
return ret
6062

61-
def check_and_download(url: str, dst_file: Path, caching = False)->int:
63+
def check_and_download(url: str, dst_file: Path, rename = True, caching = False)->int:
6264
try:
6365
if caching:
6466
if url in download_cache:
@@ -99,6 +101,47 @@ def mkdir_with_dot_tmp(folder: Path)->Tuple[Path, Path]:
99101
tmpdir.mkdir(parents=True, exist_ok=True)
100102
return (folder, tmpdir)
101103

104+
def CalcFileSha256(filname):
105+
''' calculate file sha256 '''
106+
with open(filname, "rb") as f:
107+
sha256obj = hashlib.sha256()
108+
sha256obj.update(f.read())
109+
hash_value = sha256obj.hexdigest()
110+
return hash_value
111+
112+
def flush_package_info(content):
113+
114+
global package_info
115+
116+
for pkg in content.split('\n\n'):
117+
if len(pkg) < 10: # ignore blanks
118+
continue
119+
try:
120+
pkg_filename = pattern_package_name.search(pkg).group(1)
121+
pkg_size = int(pattern_package_size.search(pkg).group(1))
122+
pkg_checksum = pattern_package_sha256.search(pkg).group(1)
123+
if pkg_filename not in package_info:
124+
pkg_info = {
125+
'size': pkg_size,
126+
'sha256': {
127+
'new': pkg_checksum,
128+
'old': None
129+
}
130+
}
131+
else:
132+
pkg_info = package_info[pkg_filename]
133+
pkg_info['size'] = pkg_size
134+
if pkg_info['sha256']['new'] != None and pkg_info['sha256']['old'] == None:
135+
pkg_info['sha256']['old'] = pkg_info['sha256']['new']
136+
pkg_info['sha256']['new'] = pkg_checksum
137+
package_info.update({
138+
pkg_filename: pkg_info
139+
})
140+
except:
141+
print("Failed to parse one package description", flush=True)
142+
traceback.print_exc()
143+
return 1
144+
102145
def move_files_in(src: Path, dst: Path):
103146
empty = True
104147
for file in src.glob('*'):
@@ -110,6 +153,9 @@ def move_files_in(src: Path, dst: Path):
110153
print(f"{src} is empty")
111154

112155
def apt_mirror(base_url: str, dist: str, repo: str, arch: str, dest_base_dir: Path, deb_set: Dict[str, int])->int:
156+
157+
global package_info
158+
113159
if not dest_base_dir.is_dir():
114160
print("Destination directory is empty, cannot continue")
115161
return 1
@@ -134,6 +180,8 @@ def apt_mirror(base_url: str, dist: str, repo: str, arch: str, dest_base_dir: Pa
134180
pkgidx_dir,pkgidx_tmp_dir = mkdir_with_dot_tmp(comp_dir / arch_dir)
135181
with open(release_file, "r") as fd:
136182
pkgidx_content=None
183+
pkgidx_file_old = None
184+
package_info = {}
137185
cnt_start=False
138186
for line in fd:
139187
if cnt_start:
@@ -164,6 +212,8 @@ def apt_mirror(base_url: str, dist: str, repo: str, arch: str, dest_base_dir: Pa
164212
pkgidx_file.unlink()
165213
continue
166214
if pkgidx_content is None and pkgidx_file.stem == 'Packages':
215+
pkgidx_file_old = Path(f'{dist_dir}/{filename}.old')
216+
shutil.copy(pkgidx_file, pkgidx_file_old)
167217
print(f"getting packages index content from {pkgidx_file.name}", flush=True)
168218
suffix = pkgidx_file.suffix
169219
if suffix == '.xz':
@@ -176,12 +226,43 @@ def apt_mirror(base_url: str, dist: str, repo: str, arch: str, dest_base_dir: Pa
176226
pkgidx_content = content.decode('utf-8')
177227
else:
178228
print("unsupported format")
229+
continue
230+
231+
flush_package_info(pkgidx_content)
232+
233+
with pkgidx_file_old.open('rb') as t: content = t.read()
234+
if len(content) != int(filesize):
235+
print(f"Invalid size of {pkgidx_file}, expected {filesize}, skipped")
236+
pkgidx_file.unlink()
237+
continue
238+
if hashlib.sha256(content).hexdigest() != checksum:
239+
print(f"Invalid checksum of {pkgidx_file}, expected {checksum}, skipped")
240+
pkgidx_file.unlink()
241+
continue
242+
if pkgidx_file_old.stem == 'Packages':
243+
print(f"getting packages index content from {pkgidx_file_old.name}", flush=True)
244+
suffix = pkgidx_file_old.suffix
245+
if suffix == '.xz.old':
246+
pkgidx_content_old = lzma.decompress(content).decode('utf-8')
247+
elif suffix == '.bz2.old':
248+
pkgidx_content_old = bz2.decompress(content).decode('utf-8')
249+
elif suffix == '.gz.old':
250+
pkgidx_content_old = gzip.decompress(content).decode('utf-8')
251+
elif suffix == '.old':
252+
pkgidx_content_old = content.decode('utf-8')
253+
else:
254+
print("unsupported format")
255+
continue
256+
257+
flush_package_info(pkgidx_content_old)
258+
179259

180260
# Currently only support SHA-256 checksum, because
181261
# "Clients may not use the MD5Sum and SHA1 fields for security purposes, and must require a SHA256 or a SHA512 field."
182262
# from https://wiki.debian.org/DebianRepository/Format#A.22Release.22_files
183263
if line.startswith('SHA256:'):
184264
cnt_start = True
265+
185266
if not cnt_start:
186267
print("Cannot find SHA-256 checksum")
187268
return 1
@@ -216,18 +297,9 @@ def collect_tmp_dir():
216297
err = 0
217298
deb_count = 0
218299
deb_size = 0
219-
for pkg in pkgidx_content.split('\n\n'):
220-
if len(pkg) < 10: # ignore blanks
221-
continue
222-
try:
223-
pkg_filename = pattern_package_name.search(pkg).group(1)
224-
pkg_size = int(pattern_package_size.search(pkg).group(1))
225-
pkg_checksum = pattern_package_sha256.search(pkg).group(1)
226-
except:
227-
print("Failed to parse one package description", flush=True)
228-
traceback.print_exc()
229-
err = 1
230-
continue
300+
for pkg_filename, pkg_info in package_info.items():
301+
pkg_size = pkg_info['size']
302+
pkg_checksum = pkg_info['sha256']
231303
deb_count += 1
232304
deb_size += pkg_size
233305

@@ -237,8 +309,8 @@ def collect_tmp_dir():
237309
dest_dir.mkdir(parents=True, exist_ok=True)
238310
if dest_filename.suffix == '.deb':
239311
deb_set[str(dest_filename.relative_to(dest_base_dir))] = pkg_size
240-
if dest_filename.is_file() and dest_filename.stat().st_size == pkg_size:
241-
print(f"Skipping {pkg_filename}, size {pkg_size}")
312+
if dest_filename.is_file() and ( dest_filename.stat().st_size == pkg_size and pkg_checksum['old'] == pkg_checksum['new']):
313+
print(f"Skipping {pkg_filename}, size {pkg_size}, old sha256 {pkg_checksum['old']}, new sha256 {pkg_checksum['new']}")
242314
continue
243315

244316
pkg_url=f"{base_url}/{pkg_filename}"
@@ -253,8 +325,8 @@ def collect_tmp_dir():
253325
with dest_tmp_filename.open("rb") as f:
254326
for block in iter(lambda: f.read(1024**2), b""):
255327
sha.update(block)
256-
if sha.hexdigest() != pkg_checksum:
257-
print(f"Invalid checksum of {dest_filename}, expected {pkg_checksum}")
328+
if sha.hexdigest() != pkg_checksum['new']:
329+
print(f"Invalid checksum of {dest_filename}, expected {pkg_checksum['new']}")
258330
dest_tmp_filename.unlink()
259331
continue
260332
dest_tmp_filename.rename(dest_filename)

0 commit comments

Comments
 (0)