Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Prune update #839

Merged
merged 2 commits into from
Nov 1, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 34 additions & 0 deletions .cci.jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,40 @@ coreos.pod([image: 'registry.fedoraproject.org/fedora:30', runAsUser: 0, kvm: tr
cosa_cmd("buildupload --dry-run s3 --acl=public-read my-nonexistent-bucket/my/prefix")
}
}

stage("Pruning test") {
// Test that first build has been pruned
cosa_cmd("build ostree --force-image")
cosa_cmd("build ostree --force-image")
cosa_cmd("build ostree --force-image")
coreos.shwrap("cat /srv/builds/builds.json")
coreos.shwrap('jq -e ".builds|length == 3" /srv/builds/builds.json')
coreos.shwrap('jq -e ".builds[2].id | endswith(\\"0-1\\")" /srv/builds/builds.json')

// Test --skip-prune
cosa_cmd("build ostree --force-image --skip-prune")
coreos.shwrap("cat /srv/builds/builds.json")
coreos.shwrap('jq -e ".builds|length == 4" /srv/builds/builds.json')
coreos.shwrap('jq -e ".builds[3].id | endswith(\\"0-1\\")" /srv/builds/builds.json')

// Test prune --dry-run
cosa_cmd("prune --workdir /srv --dry-run")
coreos.shwrap("cat /srv/builds/builds.json")
coreos.shwrap('jq -e ".builds|length == 4" /srv/builds/builds.json')
coreos.shwrap('jq -e ".builds[3].id | endswith(\\"0-1\\")" /srv/builds/builds.json')

// Test --keep-last-n=0 skips pruning
cosa_cmd("prune --workdir /srv --keep-last-n=0")
coreos.shwrap("cat /srv/builds/builds.json")
coreos.shwrap('jq -e ".builds|length == 4" /srv/builds/builds.json')
coreos.shwrap('jq -e ".builds[3].id | endswith(\\"0-1\\")" /srv/builds/builds.json')

// Test prune --keep-last-n=1
cosa_cmd("prune --workdir /srv --keep-last-n=1")
coreos.shwrap("cat /srv/builds/builds.json")
coreos.shwrap('jq -e ".builds|length == 1" /srv/builds/builds.json')
coreos.shwrap('jq -e ".builds[0].id | endswith(\\"0-4\\")" /srv/builds/builds.json')
}
}

def cosa_cmd(args) {
Expand Down
9 changes: 3 additions & 6 deletions src/cmd-build
Original file line number Diff line number Diff line change
Expand Up @@ -409,14 +409,11 @@ mkdir -p "${builddir}"
mv -T "${tmp_builddir}" "${builddir}"
# Replace the latest link
ln -Tsf "${buildid}" builds/latest
# Update builds.json
# the variables passed to `prune_builds` end up single quoted and
# python treats them as literals, so we workaround this by duplicating
# the command ¯\_(ツ)_/¯

if [ "${SKIP_PRUNE}" == 1 ]; then
"${dn}"/prune_builds --workdir "${workdir}" --insert-only "${buildid}"
insert_build "${buildid}" "${workdir}"
else
"${dn}"/prune_builds --workdir "${workdir}"
"${dn}"/cmd-prune --workdir "${workdir}"
fi
rm builds/.build-commit

Expand Down
192 changes: 114 additions & 78 deletions src/cmd-prune
Original file line number Diff line number Diff line change
@@ -1,78 +1,114 @@
#!/usr/bin/env bash
set -euo pipefail

# This is just a thin wrapper around prune_builds. That way we still get the
# preflight checks to make sure the workdir looks sane.

dn=$(dirname "$0")
# shellcheck source=src/cmdlib.sh
. "${dn}"/cmdlib.sh

print_help() {
cat 1>&2 <<'EOF'
Usage: coreos-assembler prune --help
coreos-assembler prune [--keep=N] [--keep-last-days=N]

Delete older untagged build artifacts. By default, only the last 3 untagged
builds are kept. This can be overridden with the `--keep` option.
EOF
}

# Parse options
KEEP_LAST_N=
KEEP_LAST_DAYS=
rc=0
options=$(getopt --options h --longoptions help,keep:,keep-last-days: -- "$@") || rc=$?
[ $rc -eq 0 ] || {
print_help
exit 1
}
eval set -- "$options"
while true; do
case "$1" in
-h | --help)
print_help
exit 0
;;
--keep)
shift
KEEP_LAST_N="$1"
;;
--keep-last-days)
shift
KEEP_LAST_DAYS="$1"
;;
--)
shift
break
;;
*)
fatal "$0: unrecognized option: $1"
exit 1
;;
esac
shift
done

if [ $# -ne 0 ]; then
print_help
fatal "ERROR: Too many arguments"
exit 1
fi

# just support one of the two for now
if [ -n "${KEEP_LAST_N:-}" ] && [ -n "${KEEP_LAST_DAYS:-}" ]; then
fatal "ERROR: Only one of --keep or --keep-last-days allowed"
elif [ -z "${KEEP_LAST_N:-}" ] && [ -z "${KEEP_LAST_DAYS:-}" ]; then
KEEP_LAST_N=3
fi

if [ -n "${KEEP_LAST_DAYS:-}" ]; then
set -- --keep-last-days "${KEEP_LAST_DAYS}"
else
set -- --keep-last-n "${KEEP_LAST_N}"
fi

prepare_build

"${dn}"/prune_builds --workdir "${workdir:?}" "$@"
#!/usr/bin/python3 -u

'''
This script removes previous builds. DO NOT USE on production pipelines
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Mmmm. I think it's a bit more nuanced than that. For RHCOS we can prune bootimages not pinned by the installer, for example.

But...this gets to an interesting topic. I've been thinking we should explicitly copy/promote bootimages we want to pin to a separate "stream". But anyways, just noting this.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Prune won't touch "tagged" builds - seems this field should be updated with pinned images used in installer

'''

import argparse
import os
import sys


from datetime import timedelta, datetime, timezone
from shutil import rmtree

sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
from cosalib.builds import Builds, get_local_builds

# Let's just hardcode this here for now
DEFAULT_KEEP_LAST_N = 3
DEFAULT_KEEP_LAST_DAYS = 7


parser = argparse.ArgumentParser(prog="coreos-assembler prune")
parser.add_argument("--workdir", default='.', help="Path to workdir")
parser.add_argument("--dry-run", help="Don't actually delete anything",
action='store_true')
keep_options = parser.add_mutually_exclusive_group()
keep_options.add_argument("--keep-last-n", type=int, metavar="N",
default=DEFAULT_KEEP_LAST_N,
help="Number of untagged builds to keep (0 for all)")
keep_options.add_argument("--keep-last-days", metavar="N", type=int,
default=DEFAULT_KEEP_LAST_DAYS,
help="Keep untagged builds within number of days")
args = parser.parse_args()

keep_younger_than = None
if args.keep_last_days != DEFAULT_KEEP_LAST_DAYS:
if args.keep_last_days <= 0:
raise argparse.ArgumentTypeError("value must be positive: %d" %
args.keep_last_days)
keep_younger_than = (datetime.now(timezone.utc) -
timedelta(days=args.keep_last_days))

skip_pruning = (not keep_younger_than and args.keep_last_n == 0)

builds = Builds(args.workdir)
# collect all builds being pointed to by tags
tagged_builds = set([tag['target'] for tag in builds.raw().get('tags', [])])

builds_dir = os.path.join(args.workdir, "builds")
scanned_builds = get_local_builds(builds_dir)

# sort by timestamp, newest first
scanned_builds = sorted(scanned_builds,
key=lambda x: x.timestamp,
reverse=True)

new_builds = []
builds_to_delete = []

# Don't prune known builds
if skip_pruning:
new_builds = scanned_builds
else:
if keep_younger_than:
for build in scanned_builds:
if build.id in tagged_builds:
print(f"Skipping tagged build {build.id}")
new_builds.append(build)
continue

if build.timestamp < keep_younger_than:
builds_to_delete.append(build)
else:
new_builds.append(build)
else:
n = args.keep_last_n
assert(n > 0)
for build in scanned_builds:
if n == 0:
builds_to_delete.append(build)
else:
new_builds.append(build)
n = n - 1

print(f"prune: new builds: {new_builds}")

if args.dry_run:
print(f"prune: not removing any builds")
sys.exit(0)

# create a new builds list
builds.raw()['builds'] = []
for build in reversed(new_builds):
for arch in build.basearches:
builds.insert_build(build.id, arch)

builds.bump_timestamp()

buildids = [x.id for x in builds_to_delete]
print(f"prune: removing {' '.join(buildids)}")

# now delete other build dirs not in the manifest
error_during_pruning = False
for build in builds_to_delete:
print(f"Pruning {build}")
try:
rmtree(os.path.join(builds_dir, build.id))
except Exception as e:
error_during_pruning = True
print(f"{e}")

if error_during_pruning:
sys.exit(1)
55 changes: 55 additions & 0 deletions src/cmd-remote-prune
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
#!/usr/bin/python3 -u

'''
This script removes unreferenced builds from s3 bucket
'''

import argparse
import sys

from cosalib.builds import Builds
from cosalib.prune import fetch_build_meta, get_unreferenced_s3_builds, delete_build

parser = argparse.ArgumentParser(prog="coreos-assembler remote-prune")
parser.add_argument("--workdir", default='.', help="Path to workdir")
parser.add_argument("--dry-run", help="Don't actually delete anything",
action='store_true')

subparsers = parser.add_subparsers(dest='cmd', title='subcommands')
subparsers.required = True

s3 = subparsers.add_parser('s3', help='Prune s3 buckets')
s3.add_argument("--bucket", help="Bucket name")
s3.add_argument("--prefix", help="Key prefix")

args = parser.parse_args()

builds = Builds(args.workdir)

scanned_builds = []
for build in builds.raw()["builds"]:
for arch in build['arches']:
build = fetch_build_meta(build['id'], arch)
if build:
scanned_builds.append(build)

new_builds = []
builds_to_delete = []

# Find unreferenced builds in the bucket and remove them
buildids = [x['id'] for x in scanned_builds]
unreferenced_s3_builds = get_unreferenced_s3_builds(buildids, args.bucket, args.prefix)

error_during_pruning = False
for unmatched_build_id in unreferenced_s3_builds:
# TODO: fetch arches from s3
build = fetch_build_meta(unmatched_build_id, 'x86_64')
if build and not args.dry_run:
try:
delete_build(build)
except Exception as e:
error_during_pruning = True
print(f"{e}")

if error_during_pruning:
sys.exit(1)
13 changes: 13 additions & 0 deletions src/cmdlib.sh
Original file line number Diff line number Diff line change
Expand Up @@ -685,3 +685,16 @@ get_latest_qemu() {
ls ${builddir}/*-qemu.qcow2*
fi
}

insert_build() {
local buildid=$1; shift
local dir=$1; shift
(python3 -c "
import sys
sys.path.insert(0, '${DIR}')
from cosalib.builds import Builds
builds = Builds('${workdir:-$(pwd)}')
builds.insert_build('${buildid}')
builds.bump_timestamp()
print('Build ${buildid} was inserted')")
}
2 changes: 1 addition & 1 deletion src/coreos-assembler
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ build_commands="init fetch build run prune clean"
# commands more likely to be used in a prod pipeline only
advanced_build_commands="buildprep buildupload oscontainer"
buildextend_commands="qemu aws azure gcp openstack installer live vmware metal"
utility_commands="tag sign compress koji-upload kola aws-replicate"
utility_commands="tag sign compress koji-upload kola aws-replicate remote-prune"
other_commands="shell"
if [ -z "${cmd}" ]; then
echo Usage: "coreos-assembler CMD ..."
Expand Down
39 changes: 39 additions & 0 deletions src/cosalib/builds.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,20 @@
import os
import semver
import gi
import collections

gi.require_version('OSTree', '1.0')
from gi.repository import Gio, OSTree

from cosalib.cmdlib import (
get_basearch,
rfc3339_time,
get_timestamp,
load_json,
write_json)

Build = collections.namedtuple('Build', ['id', 'timestamp', 'basearches'])

BUILDFILES = {
# The list of builds.
'list': 'builds/builds.json',
Expand Down Expand Up @@ -139,3 +143,38 @@ def raw(self):

def flush(self):
write_json(self._fn, self._data)


def get_local_builds(builds_dir):
scanned_builds = []
with os.scandir(builds_dir) as it:
for entry in it:
# ignore non-dirs
if not entry.is_dir(follow_symlinks=False):
# those are really the only two non-dir things we expect there
if entry.name not in ['builds.json', 'latest']:
print(f"Ignoring non-directory {entry.path}")
continue

# scan all per-arch builds, pick up the most recent build of those as
# the overall "build" timestamp for pruning purposes
with os.scandir(entry.path) as basearch_it:
multiarch_build = None
for basearch_entry in basearch_it:
# ignore non-dirs
if not basearch_entry.is_dir(follow_symlinks=False):
print(f"Ignoring non-directory {basearch_entry.path}")
continue
ts = get_timestamp(basearch_entry)
if not ts:
continue
if not multiarch_build:
multiarch_build = Build(id=entry.name, timestamp=ts,
basearches=[basearch_entry.name])
else:
multiarch_build.basearches += [basearch_entry.name]
multiarch_build.timestamp = max(
multiarch_build.timestamp, ts)
if multiarch_build:
scanned_builds.append(multiarch_build)
return scanned_builds
Loading