Skip to content

Commit aff4b07

Browse files
committed
Rework cmd-prune
* Move prune functions is cosalib.prune * Run bump-timestamp command during build * Create a new subcommand to prune local builds * Create different subcommand for s3 bucket pruning
1 parent f99e113 commit aff4b07

8 files changed

+369
-278
lines changed

src/bump-timestamp

+74
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
#!/usr/bin/python3 -u
2+
3+
'''
4+
This script is called by `build` command and updates builds timestamp.
5+
'''
6+
7+
import argparse
8+
import collections
9+
import os
10+
import sys
11+
12+
from cosalib.builds import Builds
13+
from cosalib.cmdlib import get_timestamp
14+
15+
Build = collections.namedtuple('Build', ['id', 'timestamp', 'basearches'])
16+
17+
parser = argparse.ArgumentParser()
18+
parser.add_argument("--workdir", default='.', help="Path to workdir")
19+
args = parser.parse_args()
20+
21+
builds = Builds(args.workdir)
22+
23+
scanned_builds = []
24+
builds_dir = os.path.join(args.workdir, "builds")
25+
26+
# first, pick up all the builds from the dir itself
27+
with os.scandir(builds_dir) as it:
28+
for entry in it:
29+
# ignore non-dirs
30+
if not entry.is_dir(follow_symlinks=False):
31+
# those are really the only two non-dir things we expect there
32+
if entry.name not in ['builds.json', 'latest']:
33+
print(f"Ignoring non-directory {entry.path}")
34+
continue
35+
36+
# scan all per-arch builds, pick up the most recent build of those as
37+
# the overall "build" timestamp for pruning purposes
38+
with os.scandir(entry.path) as basearch_it:
39+
multiarch_build = None
40+
for basearch_entry in basearch_it:
41+
# ignore non-dirs
42+
if not basearch_entry.is_dir(follow_symlinks=False):
43+
print(f"Ignoring non-directory {basearch_entry.path}")
44+
continue
45+
ts = get_timestamp(basearch_entry)
46+
if not ts:
47+
continue
48+
if not multiarch_build:
49+
multiarch_build = Build(id=entry.name, timestamp=ts,
50+
basearches=[basearch_entry.name])
51+
else:
52+
multiarch_build.basearches += [basearch_entry.name]
53+
multiarch_build.timestamp = max(
54+
multiarch_build.timestamp, ts)
55+
if multiarch_build:
56+
scanned_builds.append(multiarch_build)
57+
58+
# just get the trivial case out of the way
59+
if len(scanned_builds) == 0:
60+
print("No builds found!")
61+
sys.exit(0)
62+
63+
# sort by timestamp, newest first
64+
scanned_builds = sorted(scanned_builds,
65+
key=lambda x: x.timestamp,
66+
reverse=True)
67+
68+
builds.raw()['builds'] = []
69+
for build in reversed(scanned_builds):
70+
for basearch in build.basearches:
71+
builds.insert_build(build.id, basearch)
72+
73+
builds.bump_timestamp()
74+
print("Build timestamp was updated")

src/cmd-build

+1-13
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@ EOF
2525
# Parse options
2626
FORCE=
2727
FORCE_IMAGE=
28-
SKIP_PRUNE=0
2928
VERSION=
3029
PARENT=
3130
rc=0
@@ -47,9 +46,6 @@ while true; do
4746
--force-image)
4847
FORCE_IMAGE=1
4948
;;
50-
--skip-prune)
51-
SKIP_PRUNE=1
52-
;;
5349
--version)
5450
shift
5551
VERSION=$1
@@ -403,15 +399,7 @@ mkdir -p "${builddir}"
403399
mv -T "${tmp_builddir}" "${builddir}"
404400
# Replace the latest link
405401
ln -Tsf "${buildid}" builds/latest
406-
# Update builds.json
407-
# the variables passed to `prune_builds` end up single quoted and
408-
# python treats them as literals, so we workaround this by duplicating
409-
# the command ¯\_(ツ)_/¯
410-
if [ "${SKIP_PRUNE}" == 1 ]; then
411-
"${dn}"/prune_builds --workdir "${workdir}" --insert-only "${buildid}"
412-
else
413-
"${dn}"/prune_builds --workdir "${workdir}"
414-
fi
402+
"${dn}"/bump-timestamp --workdir "${workdir}"
415403
rm builds/.build-commit
416404

417405
# and finally, build the specified targets

src/cmd-cleanup-bucket

+49
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
#!/usr/bin/python3 -u
2+
3+
'''
4+
This script removes unreferenced builds from s3 bucket
5+
'''
6+
7+
import argparse
8+
import sys
9+
10+
from cosalib.builds import Builds
11+
from cosalib.prune import fetch_build_meta, get_unreferenced_s3_builds, delete_build
12+
13+
parser = argparse.ArgumentParser(prog="coreos-assembler cleanup-bucket")
14+
parser.add_argument("--workdir", default='.', help="Path to workdir")
15+
parser.add_argument("--dry-run", help="Don't actually delete anything",
16+
action='store_true')
17+
parser.add_argument("--bucket", help="S3 bucket")
18+
parser.add_argument("--prefix", help="S3 prefix")
19+
args = parser.parse_args()
20+
21+
builds = Builds(args.workdir)
22+
23+
scanned_builds = []
24+
for build in builds.raw()["builds"]:
25+
for arch in build['arches']:
26+
build = fetch_build_meta(build['id'], arch)
27+
if build:
28+
scanned_builds.append(build)
29+
30+
new_builds = []
31+
builds_to_delete = []
32+
33+
# Find unreferenced builds in the bucket and remove them
34+
buildids = [x['id'] for x in scanned_builds]
35+
unreferenced_s3_builds = get_unreferenced_s3_builds(buildids, args.bucket, args.prefix)
36+
37+
error_during_pruning = False
38+
for unmatched_build_id in unreferenced_s3_builds:
39+
# TODO: fetch arches from s3
40+
build = fetch_build_meta(unmatched_build_id, 'x86_64')
41+
if build and not args.dry_run:
42+
try:
43+
delete_build(build)
44+
except Exception as e:
45+
error_during_pruning = True
46+
print(f"{e}")
47+
48+
if error_during_pruning:
49+
sys.exit(1)

src/cmd-prune

+123-78
Original file line numberDiff line numberDiff line change
@@ -1,78 +1,123 @@
1-
#!/usr/bin/env bash
2-
set -euo pipefail
3-
4-
# This is just a thin wrapper around prune_builds. That way we still get the
5-
# preflight checks to make sure the workdir looks sane.
6-
7-
dn=$(dirname "$0")
8-
# shellcheck source=src/cmdlib.sh
9-
. "${dn}"/cmdlib.sh
10-
11-
print_help() {
12-
cat 1>&2 <<'EOF'
13-
Usage: coreos-assembler prune --help
14-
coreos-assembler prune [--keep=N] [--keep-last-days=N]
15-
16-
Delete older untagged build artifacts. By default, only the last 3 untagged
17-
builds are kept. This can be overridden with the `--keep` option.
18-
EOF
19-
}
20-
21-
# Parse options
22-
KEEP_LAST_N=
23-
KEEP_LAST_DAYS=
24-
rc=0
25-
options=$(getopt --options h --longoptions help,keep:,keep-last-days: -- "$@") || rc=$?
26-
[ $rc -eq 0 ] || {
27-
print_help
28-
exit 1
29-
}
30-
eval set -- "$options"
31-
while true; do
32-
case "$1" in
33-
-h | --help)
34-
print_help
35-
exit 0
36-
;;
37-
--keep)
38-
shift
39-
KEEP_LAST_N="$1"
40-
;;
41-
--keep-last-days)
42-
shift
43-
KEEP_LAST_DAYS="$1"
44-
;;
45-
--)
46-
shift
47-
break
48-
;;
49-
*)
50-
fatal "$0: unrecognized option: $1"
51-
exit 1
52-
;;
53-
esac
54-
shift
55-
done
56-
57-
if [ $# -ne 0 ]; then
58-
print_help
59-
fatal "ERROR: Too many arguments"
60-
exit 1
61-
fi
62-
63-
# just support one of the two for now
64-
if [ -n "${KEEP_LAST_N:-}" ] && [ -n "${KEEP_LAST_DAYS:-}" ]; then
65-
fatal "ERROR: Only one of --keep or --keep-last-days allowed"
66-
elif [ -z "${KEEP_LAST_N:-}" ] && [ -z "${KEEP_LAST_DAYS:-}" ]; then
67-
KEEP_LAST_N=3
68-
fi
69-
70-
if [ -n "${KEEP_LAST_DAYS:-}" ]; then
71-
set -- --keep-last-days "${KEEP_LAST_DAYS}"
72-
else
73-
set -- --keep-last-n "${KEEP_LAST_N}"
74-
fi
75-
76-
prepare_build
77-
78-
"${dn}"/prune_builds --workdir "${workdir:?}" "$@"
1+
#!/usr/bin/python3 -u
2+
3+
'''
4+
This script removes previous builds. DO NOT USE on production pipelines
5+
'''
6+
7+
import os
8+
import sys
9+
import argparse
10+
11+
12+
from datetime import timedelta, datetime, timezone
13+
14+
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
15+
from cosalib.builds import Builds
16+
from cosalib.prune import fetch_build_meta, delete_build
17+
18+
# Let's just hardcode this here for now
19+
DEFAULT_KEEP_LAST_N = 3
20+
DEFAULT_KEEP_LAST_DAYS = 7
21+
22+
23+
parser = argparse.ArgumentParser(prog="coreos-assembler prune")
24+
parser.add_argument("--workdir", default='.', help="Path to workdir")
25+
parser.add_argument("--dry-run", help="Don't actually delete anything",
26+
action='store_true')
27+
parser.add_argument("--insert-only", metavar="BUILDID", action='store',
28+
help="Append a new latest build, do not prune")
29+
keep_options = parser.add_mutually_exclusive_group()
30+
keep_options.add_argument("--keep-last-n", type=int, metavar="N",
31+
default=DEFAULT_KEEP_LAST_N,
32+
help="Number of untagged builds to keep (0 for all)")
33+
keep_options.add_argument("--keep-last-days", metavar="N", type=int,
34+
default=DEFAULT_KEEP_LAST_DAYS,
35+
help="Keep untagged builds within number of days")
36+
args = parser.parse_args()
37+
38+
keep_younger_than = None
39+
if args.keep_last_days is not None:
40+
if args.keep_last_days <= 0:
41+
raise argparse.ArgumentTypeError("value must be positive: %d" %
42+
args.keep_last_days)
43+
keep_younger_than = (datetime.now(timezone.utc) -
44+
timedelta(days=args.keep_last_days))
45+
46+
skip_pruning = (not keep_younger_than and args.keep_last_n == 0)
47+
print("prune: skip_pruning: {skip_pruning}")
48+
49+
builds = Builds(args.workdir)
50+
# collect all builds being pointed to by tags
51+
tagged_builds = set([tag['target'] for tag in builds.raw().get('tags', [])])
52+
53+
# Handle --insert-only
54+
if args.insert_only:
55+
builds.insert_build(args.insert_only)
56+
builds.flush()
57+
print("prune: --insert-only completed")
58+
sys.exit(0)
59+
60+
scanned_builds = []
61+
for build in builds.raw()["builds"]:
62+
for arch in build['arches']:
63+
build = fetch_build_meta(build['id'], arch)
64+
if build:
65+
scanned_builds.append(build)
66+
67+
new_builds = []
68+
builds_to_delete = []
69+
70+
# Don't prune known builds
71+
if skip_pruning:
72+
new_builds = scanned_builds
73+
else:
74+
if keep_younger_than:
75+
for build in scanned_builds:
76+
if build.id in tagged_builds:
77+
print(f"Skipping tagged build {build.id}")
78+
new_builds.append(build)
79+
continue
80+
81+
if build.timestamp < keep_younger_than:
82+
builds_to_delete.append(build)
83+
else:
84+
new_builds.append(build)
85+
else:
86+
n = args.keep_last_n
87+
assert(n > 0)
88+
for build in scanned_builds:
89+
if n == 0:
90+
builds_to_delete.append(build)
91+
else:
92+
new_builds.append(build)
93+
n = n - 1
94+
95+
print(f"prune: new builds: {new_builds}")
96+
97+
# create a new builds list
98+
builds.raw()['builds'] = []
99+
for build in reversed(new_builds):
100+
for arch in build['arches']:
101+
builds.insert_build(build['id'], arch)
102+
103+
builds.bump_timestamp()
104+
105+
if len(builds_to_delete) == 0:
106+
print("prune: not removing any builds")
107+
else:
108+
buildids = [x['id'] for x in builds_to_delete]
109+
print(f"prune: removing {' '.join(buildids)}")
110+
111+
# now delete other build dirs not in the manifest
112+
error_during_pruning = False
113+
for build in builds_to_delete:
114+
print(f"Pruning {build}")
115+
if not args.dry_run:
116+
try:
117+
delete_build(build)
118+
except Exception as e:
119+
error_during_pruning = True
120+
print(f"{e}")
121+
122+
if error_during_pruning:
123+
sys.exit(1)

src/coreos-assembler

+1-1
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ build_commands="init fetch build run prune clean"
3939
# commands more likely to be used in a prod pipeline only
4040
advanced_build_commands="buildprep buildupload oscontainer"
4141
buildextend_commands="qemu aws azure gcp openstack installer live vmware metal"
42-
utility_commands="tag sign compress koji-upload kola aws-replicate"
42+
utility_commands="tag sign compress koji-upload kola aws-replicate cleanup-bucket"
4343
other_commands="shell"
4444
if [ -z "${cmd}" ]; then
4545
echo Usage: "coreos-assembler CMD ..."

0 commit comments

Comments
 (0)