Skip to content

Commit 8a5e488

Browse files
committed
Merge pull request #145 from github/dbussink/failed-backup-alert
Move in progress detection to separate file with pid
2 parents d4d0c2c + d301fd3 commit 8a5e488

File tree

2 files changed

+47
-16
lines changed

2 files changed

+47
-16
lines changed

bin/ghe-backup

Lines changed: 34 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -28,37 +28,56 @@ touch "incomplete"
2828
GHE_MAINTENANCE_MODE_ENABLED=false
2929

3030
# To prevent multiple backup runs happening at the same time, we create a
31-
# in-progress symlink pointing to the snapshot directory. This will fail if
32-
# another backup is already in progress, giving us a form of locking.
31+
# in-progress file with the timestamp and pid of the backup process,
32+
# giving us a form of locking.
3333
#
34-
# Set up a trap to remove the in-progress symlink if we exit for any reason but
35-
# verify that we own the in-progress symlink before doing so.
34+
# Set up a trap to remove the in-progress file if we exit for any reason but
35+
# verify that we are the same process before doing so.
3636
#
3737
# The cleanup trap also handles disabling maintenance mode on the appliance if
3838
# it was automatically enabled.
3939
cleanup () {
40-
if [ "$(readlink ../in-progress)" = "$GHE_SNAPSHOT_TIMESTAMP" ]; then
41-
unlink ../in-progress
40+
if [ -f ../in-progress ]; then
41+
progress=$(cat ../in-progress)
42+
snapshot=$(echo "$progress" | cut -d ' ' -f 1)
43+
pid=$(echo "$progress" | cut -d ' ' -f 2)
44+
if [ "$snapshot" = "$GHE_SNAPSHOT_TIMESTAMP" -a "$$" = $pid ]; then
45+
unlink ../in-progress
4246
fi
47+
fi
4348

44-
if $GHE_MAINTENANCE_MODE_ENABLED; then
45-
ghe-maintenance-mode-disable "$GHE_HOSTNAME"
46-
fi
49+
if $GHE_MAINTENANCE_MODE_ENABLED; then
50+
ghe-maintenance-mode-disable "$GHE_HOSTNAME"
51+
fi
4752
}
4853

4954
# Setup exit traps
5055
trap 'cleanup' EXIT
5156
trap 'exit $?' INT # ^C always terminate
5257

53-
# Mark the snapshot as in-progress by creating the symlink. If this fails, it
54-
# means another ghe-backup run is already in progress and we should exit.
55-
# NOTE: The -n argument to ln is non-POSIX but widely supported.
56-
if ! ln -sn "$GHE_SNAPSHOT_TIMESTAMP" ../in-progress 2>/dev/null; then
57-
snapshot="$(readlink ../in-progress)"
58-
echo "Error: backup of $GHE_HOSTNAME already in progress in snapshot $snapshot. Aborting." 1>&2
58+
if [ -h ../in-progress ]; then
59+
echo "Error: detected a backup already in progress from a previous version of ghe-backup." 1>&2
60+
echo "If there is no backup in progress anymore, please remove" 1>&2
61+
echo "the $GHE_DATA_DIR/in-progress symlink." 1>&2
62+
exit 1
63+
fi
64+
65+
if [ -f ../in-progress ]; then
66+
progress=$(cat ../in-progress)
67+
snapshot=$(echo "$progress" | cut -d ' ' -f 1)
68+
pid=$(echo "$progress" | cut -d ' ' -f 2)
69+
if ! ps -p $pid -o command= | grep ghe-backup; then
70+
# We can safely remove in-progress, ghe-prune-snapshots
71+
# will clean up the failed backup.
72+
unlink ../in-progress
73+
else
74+
echo "Error: backup process $pid of $GHE_HOSTNAME already in progress in snapshot $snapshot. Aborting." 1>&2
5975
exit 1
76+
fi
6077
fi
6178

79+
echo "$GHE_SNAPSHOT_TIMESTAMP $$" > ../in-progress
80+
6281
echo "Starting backup of $GHE_HOSTNAME in snapshot $GHE_SNAPSHOT_TIMESTAMP"
6382

6483
# Perform a host connection check and establish the remote appliance version.

test/test-ghe-backup.sh

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -230,7 +230,7 @@ begin_test "ghe-backup tarball strategy"
230230
)
231231
end_test
232232

233-
begin_test "ghe-backup fails fast when other run in progress"
233+
begin_test "ghe-backup fails fast when old style run in progress"
234234
(
235235
set -e
236236

@@ -241,6 +241,18 @@ begin_test "ghe-backup fails fast when other run in progress"
241241
)
242242
end_test
243243

244+
begin_test "ghe-backup cleans up stale in-progress file"
245+
(
246+
set -e
247+
248+
echo "20150928T153353 99999" > "$GHE_DATA_DIR/in-progress"
249+
ghe-backup
250+
251+
[ ! -f "$GHE_DATA_DIR/in-progress" ]
252+
)
253+
end_test
254+
255+
244256
begin_test "ghe-backup without manage-password file"
245257
(
246258
set -e

0 commit comments

Comments
 (0)