Skip to content
This repository was archived by the owner on Aug 6, 2025. It is now read-only.
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
45 commits
Select commit Hold shift + click to select a range
bbc1387
?
omesser Mar 1, 2021
0ba55cd
M
omesser Mar 2, 2021
9e3271c
m
omesser Mar 2, 2021
f00c096
manifest_layer_info list
omesser Mar 2, 2021
e469174
Not gzipping
omesser Mar 2, 2021
72c6506
No
omesser Mar 2, 2021
80ea0ae
compress
omesser Mar 2, 2021
01142fc
shlex
omesser Mar 2, 2021
8199eff
try
omesser Mar 2, 2021
c49458f
headers
omesser Mar 2, 2021
2ea506c
reorder
omesser Mar 2, 2021
802f0dc
log
omesser Mar 2, 2021
9462df9
nicer
omesser Mar 2, 2021
3224407
Moving up
omesser Mar 2, 2021
91b1aa3
M
omesser Mar 2, 2021
6fff684
Handle some cases nicely
omesser Mar 2, 2021
d655e3f
bug fix layer_info access
omesser Mar 2, 2021
497adbf
force
omesser Mar 2, 2021
91c328c
log message change
omesser Mar 2, 2021
61da504
handle symlinkx
omesser Mar 2, 2021
2d06530
Add log, unlink old symlink
omesser Mar 3, 2021
cc7800b
another log
omesser Mar 3, 2021
eb96741
No skipping
omesser Mar 3, 2021
c0a9ec4
log change
omesser Mar 3, 2021
de4e3b5
fmt
omesser Mar 3, 2021
0d75b91
Do all before
omesser Mar 3, 2021
f452bff
Write manifest
omesser Mar 3, 2021
8050a6e
Log harder
omesser Mar 3, 2021
dc34284
M
omesser Mar 3, 2021
5982f8d
M
omesser Mar 3, 2021
a18f8f6
Resolved existing layer skip issue
omesser Mar 3, 2021
c0956d9
compress
omesser Mar 3, 2021
f39ee3b
M
omesser Mar 3, 2021
21611b7
fmting
omesser Mar 3, 2021
0b733f8
M
omesser Mar 4, 2021
2d369da
M
omesser Mar 4, 2021
86564dd
Formatting
omesser Mar 4, 2021
9b1aa08
Leave the discrepency
omesser Mar 4, 2021
e9eec6c
symlinks first
omesser Mar 4, 2021
ad4e7c2
strings
omesser Mar 4, 2021
6b5bf5e
to strings
omesser Mar 4, 2021
7f6cecd
symlink bug
omesser Mar 4, 2021
b61a0aa
M
omesser Mar 5, 2021
86ca822
remove verification and parallelize gzip
omesser Mar 5, 2021
e94f892
M
omesser Mar 13, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,6 @@ ENV/
# pycharm proj
.idea

# stray tar.gz files used for manual testing
# stray archive files used for manual testing
*.tar
*.tar.gz
2 changes: 1 addition & 1 deletion core/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from .manifest_creator import ImageManifestCreator
from .image_manifest_creator import ImageManifestCreator
from .registry import Registry
from .extractor import Extractor
from .processor import Processor
37 changes: 37 additions & 0 deletions core/image_manifest_creator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
import os
import json

import utils.helpers


class ImageManifestCreator(object):
def __init__(self, name, tag, config_path, layers_info, config_info):
self._name = name
self._tag = tag
self._config_path = config_path
self._layers_info = layers_info
self._config_info = config_info

def create(self):
manifest = dict()
manifest["schemaVersion"] = 2
manifest["mediaType"] = "application/vnd.docker.distribution.manifest.v2+json"
manifest["config"] = {
"mediaType": "application/vnd.docker.container.image.v1+json",
"size": self._config_info['size'],
"digest": self._config_info['digest'],
}
manifest["layers"] = []
for layer_info in self._layers_info:
if layer_info['ext'].endswith('gz') or layer_info['ext'].endswith('gzip'):
media_type = "application/vnd.docker.image.rootfs.diff.tar.gzip"
else:
media_type = "application/vnd.docker.image.rootfs.diff.tar"
layer_data = {
"mediaType": media_type,
"size": layer_info['size'],
"digest": layer_info['digest'],
}
manifest["layers"].append(layer_data)

return json.dumps(manifest)
43 changes: 0 additions & 43 deletions core/manifest_creator.py

This file was deleted.

122 changes: 115 additions & 7 deletions core/processor.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
import tempfile
import subprocess
import multiprocessing.pool
import time
import os.path
import pathlib
import shutil
import json

import humanfriendly
Expand All @@ -14,10 +17,13 @@ class Processor(object):
def __init__(
self,
logger,
tmp_dir,
tmp_dir_override,
parallel,
registry_url,
archive_path,
stream=False,
gzip_layers=False,
login=None,
password=None,
ssl_verify=True,
Expand All @@ -26,6 +32,9 @@ def __init__(
):
self._logger = logger
self._parallel = parallel
self._tmp_dir = tmp_dir
self._tmp_dir_override = tmp_dir_override
self._gzip_layers = gzip_layers

if parallel > 1 and stream:
self._logger.info(
Expand Down Expand Up @@ -55,8 +64,14 @@ def process(self):
"""
start_time = time.time()
results = []
with tempfile.TemporaryDirectory() as tmp_dir_name:

if self._tmp_dir_override:
tmp_dir_name = self._tmp_dir_override
os.mkdir(tmp_dir_name, 0o700)
else:
tmp_dir_name = tempfile.mkdtemp(dir=self._tmp_dir)

# since we're not always using TemporaryDirectory we're making and cleaning up ourselves
try:
self._logger.info(
'Processing archive',
archive_path=self._extractor.archive_path,
Expand All @@ -67,6 +82,9 @@ def process(self):
# extract the whole thing
self._extractor.extract_all(tmp_dir_name)

# pre-process layers in place - for kaniko
self._pre_process_contents(tmp_dir_name)

manifest = self._get_manifest(tmp_dir_name)
self._logger.debug('Extracted archive manifest', manifest=manifest)

Expand All @@ -83,9 +101,12 @@ def process(self):
pool.close()
pool.join()

# this will throw if any pool worker caught an exception
for res in results:
res.get()
# this will throw if any pool worker caught an exception
for res in results:
res.get()
finally:
shutil.rmtree(tmp_dir_name)
self._logger.verbose('Removed workdir', tmp_dir_name=tmp_dir_name)

elapsed = time.time() - start_time
self._logger.info(
Expand All @@ -94,12 +115,99 @@ def process(self):
elapsed=humanfriendly.format_timespan(elapsed),
)

def _pre_process_contents(self, root_dir):
start_time = time.time()
if not self._gzip_layers:
return

self._logger.debug('Preprocessing extracted contents')

# for Kaniko compatibility - must be real tar.gzip and not just tar
# self._correct_symlinks(root_dir)
gzipped_layers = self._compress_layers(root_dir)
self._update_manifests(root_dir, gzipped_layers)

elapsed = time.time() - start_time
self._logger.info(
'Finished preprocessing archive contents',
elapsed=humanfriendly.format_timespan(elapsed),
)

def _compress_layers(self, root_dir):
"""
we do this in 2 passes, because some layers are symlinked and we re-pointed them to tar.gz earlier, we must
skip them first, since they are broken symlinks. first gzip all non-linked layers, then do another pass and
gzip the symlinked ones.
"""
self._logger.debug(
'Compressing all layer files (pre-processing)', processes=self._parallel
)
gzipped_paths = []
results = []
with multiprocessing.pool.ThreadPool(processes=self._parallel) as pool:
for element in pathlib.Path(root_dir).iterdir():
if not element.is_dir():
continue

res = pool.apply_async(
self._compress_layer,
(element,),
)
results.append(res)

pool.close()
pool.join()

# this will throw if any pool worker caught an exception
for res in results:
gzipped_paths.append(res.get())

self._logger.debug('Finished compressing all layer files')
return gzipped_paths

def _compress_layer(self, layer_dir_path):
gzipped_layer_path = str(layer_dir_path.absolute()) + '.tar.gz'

# gzip and keep original (to control the output name)
tar_cmd = f'tar -czf {gzipped_layer_path} -C {layer_dir_path.parents[0].absolute()} {layer_dir_path.name}'
self._logger.info('Compressing layer dir', tar_cmd=tar_cmd)
subprocess.check_call(tar_cmd, shell=True)

self._logger.debug(
'Successfully gzipped layer',
gzipped_layer_path=gzipped_layer_path,
file_path=layer_dir_path,
)

# remove original
shutil.rmtree(layer_dir_path.absolute())

return gzipped_layer_path

def _update_manifests(self, root_dir, gzipped_layers):
self._logger.debug('Correcting image manifests')
manifest = self._get_manifest(root_dir)

for manifest_image_section in manifest:
for idx, layer in enumerate(manifest_image_section["Layers"]):
if layer.endswith('.tar'):
manifest_image_section["Layers"][idx] = gzipped_layers[idx]

# write modified image config
self._write_manifest(root_dir, manifest)
self._logger.debug('Updated image manifests', manifest=manifest)

@staticmethod
def _get_manifest(tmp_dir_name):
with open(os.path.join(tmp_dir_name, 'manifest.json'), 'r') as fh:
def _get_manifest(archive_dir):
with open(os.path.join(archive_dir, 'manifest.json'), 'r') as fh:
manifest = json.loads(fh.read())
return manifest

@staticmethod
def _write_manifest(archive_dir, contents):
with open(os.path.join(archive_dir, 'manifest.json'), 'w') as fh:
json.dump(contents, fh)


#
# Global wrappers to use with multiprocessing.pool.Pool which can't pickle instance methods
Expand Down
Loading