Skip to content

Commit

Permalink
Remove rqd restart feature (#1435)
Browse files Browse the repository at this point in the history
The feature was never really functional. Its implementation relied on a
hardcoded path only valid for the init.d deployment of rqd, and even for
this use case it didn't work as expected.

For the three existing rqd deployment modes (docker, init.d and systemd)
restarting should be accomplished by calling stop and start
sequentially. Out of the three modes, only Docker is not handled by this
PR, as it requires external control of the rqd service on docker, and
service definitions are out of the scope of this repo.
  • Loading branch information
DiegoTavares authored Jul 30, 2024
1 parent 162ba42 commit 85050da
Show file tree
Hide file tree
Showing 9 changed files with 11 additions and 116 deletions.
2 changes: 1 addition & 1 deletion VERSION.in
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.31
0.32
4 changes: 2 additions & 2 deletions proto/rqd.proto
Original file line number Diff line number Diff line change
Expand Up @@ -46,10 +46,10 @@ service RqdInterface {
// Return the HostReport
rpc ReportStatus(RqdStaticReportStatusRequest) returns (RqdStaticReportStatusResponse);

// Restart the rqd process when it becomes idle
// [Deprecated] Restart the rqd process when it becomes idle
rpc RestartRqdIdle(RqdStaticRestartIdleRequest) returns (RqdStaticRestartIdleResponse);

// Restart rqd process now
// [Deprecated] Restart rqd process now
rpc RestartRqdNow(RqdStaticRestartNowRequest) returns (RqdStaticRestartNowResponse);

// Turn off rqd when it becomes idle
Expand Down
1 change: 1 addition & 0 deletions rqd/deploy/opencue-rqd.service
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ Environment=OPTIONS=
Environment=BIN=/usr/bin
EnvironmentFile=-/etc/sysconfig/opencue-rqd
ExecStart=/bin/bash -c "${BIN}/rqd ${OPTIONS}"
ExecStop=/bin/bash -c "${BIN}/rqd --exit_now
LimitNOFILE=500000
LimitNPROC=500000
StandardOutput=syslog+console
Expand Down
11 changes: 0 additions & 11 deletions rqd/deploy/rqd3_init.d
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,6 @@ start()
echo ""
}

idle_restart()
{
echo -n "Requesting idle restart of openrqd services:"
cd ${RQD_PATH}
daemon "rqd/cuerqd.py --restart &>/dev/null || :"
echo ""
}

stop()
{
echo -n "Stopping openrqd services:"
Expand All @@ -52,9 +44,6 @@ case "$1" in
sleep 3
start
;;
idle-restart)
idle_restart
;;
*)
echo $"Usage: $0 {start|stop|restart|idle_restart}"
exit 1
Expand Down
12 changes: 1 addition & 11 deletions rqd/rqd/cuerqd.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,11 +113,6 @@ def restartRqdIdle(self):
print(self.rqdHost, "Sending restartRqdIdle command")
self.stub.RestartRqdIdle(rqd.compiled_proto.rqd_pb2.RqdStaticRestartIdleRequest())

def restartRqdNow(self):
"""Restarts RQD on the host now."""
print(self.rqdHost, "Sending restartRqdNow command")
self.stub.RestartRqdNow(rqd.compiled_proto.rqd_pb2.RqdStaticRestartNowRequest())

def rebootIdle(self):
"""Reboots the host when idle."""
print(self.rqdHost, "Sending rebootIdle command")
Expand Down Expand Up @@ -174,8 +169,6 @@ def main():
'--restart', action='store_true',
help='Lock host, wait until machine is idle, and then restart RQD. Any unlock '
'command cancels this request')
parser.add_argument(
'--restart_now', action='store_true', help='KILL ALL running frames and restart RQD')
parser.add_argument(
'--reboot', action='store_true',
help='Lock host, wait until machine is idle, and then REBOOT machine. Any unlock '
Expand Down Expand Up @@ -237,13 +230,10 @@ def main():
elif args.exit:
rqdHost.shutdownRqdIdle()

if args.restart_now:
rqdHost.restartRqdNow()

elif args.restart:
rqdHost.restartRqdIdle()

if args.reboot_now:
elif args.reboot_now:
rqdHost.rebootNow()

elif args.reboot:
Expand Down
36 changes: 4 additions & 32 deletions rqd/rqd/rqcore.py
Original file line number Diff line number Diff line change
Expand Up @@ -612,7 +612,6 @@ class RqCore(object):
def __init__(self, optNimbyoff=False):
"""RqCore class initialization"""
self.__whenIdle = False
self.__respawn = False
self.__reboot = False

self.__optNimbyoff = optNimbyoff
Expand Down Expand Up @@ -826,22 +825,13 @@ def releaseCores(self, reqRelease, releaseHT=None, releaseGpus=None):
traceback.extract_tb(sys.exc_info()[2]))
# pylint: enable=no-member

@staticmethod
def respawn_rqd():
"""Restarts RQD"""
os.system("/etc/init.d/rqd3 restart")

def shutdown(self):
"""Shuts down all rqd systems,
will call respawn or reboot if requested"""
"""Shuts down all rqd systems"""
self.nimbyOff()
if self.onIntervalThread is not None:
self.onIntervalThread.cancel()
if self.updateRssThread is not None:
self.updateRssThread.cancel()
if self.__respawn:
log.warning("Respawning RQD by request")
self.respawn_rqd()
elif self.__reboot:
log.warning("Rebooting machine by request")
self.machine.reboot()
Expand Down Expand Up @@ -972,22 +962,6 @@ def shutdownRqdIdle(self):
if not self.__cache:
self.shutdownRqdNow()

def restartRqdNow(self):
"""Kill all running frames and restart RQD"""
log.info("RestartRqdNow")
self.__respawn = True
self.shutdownRqdNow()

def restartRqdIdle(self):
"""When machine is idle, restart RQD"""
log.info("RestartRqdIdle")
self.lockAll()
self.__whenIdle = True
self.__respawn = True
self.sendStatusReport()
if not self.__cache:
self.shutdownRqdNow()

def rebootNow(self):
"""Kill all running frames and reboot machine.
This is not available when a user is logged in"""
Expand Down Expand Up @@ -1091,13 +1065,12 @@ def unlock(self, reqUnlock):

sendUpdate = False

if (self.__whenIdle or self.__reboot or self.__respawn
or self.machine.state != rqd.compiled_proto.host_pb2.UP):
if (self.__whenIdle or self.__reboot or
self.machine.state != rqd.compiled_proto.host_pb2.UP):
sendUpdate = True

self.__whenIdle = False
self.__reboot = False
self.__respawn = False
self.machine.state = rqd.compiled_proto.host_pb2.UP

with self.__threadLock:
Expand All @@ -1121,13 +1094,12 @@ def unlockAll(self):

sendUpdate = False

if (self.__whenIdle or self.__reboot or self.__respawn
if (self.__whenIdle or self.__reboot
or self.machine.state != rqd.compiled_proto.host_pb2.UP):
sendUpdate = True

self.__whenIdle = False
self.__reboot = False
self.__respawn = False
self.machine.state = rqd.compiled_proto.host_pb2.UP

with self.__threadLock:
Expand Down
6 changes: 2 additions & 4 deletions rqd/rqd/rqdservicers.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,15 +86,13 @@ def ShutdownRqdIdle(self, request, context):

def RestartRqdNow(self, request, context):
"""RPC call that kills all running frames and restarts rqd"""
log.info("Request received: restartRqdNow")
self.rqCore.restartRqdNow()
log.warning("Deprecated Request received: restartRqdNow. This request has no effect.")
return rqd.compiled_proto.rqd_pb2.RqdStaticRestartNowResponse()

def RestartRqdIdle(self, request, context):
"""RPC call that that locks all cores and restarts rqd when idle.
unlockAll will abort the request."""
log.info("Request received: restartRqdIdle")
self.rqCore.restartRqdIdle()
log.warning("Deprecated Request received: restartRqdIdle. This request has no effect.")
return rqd.compiled_proto.rqd_pb2.RqdStaticRestartIdleResponse()

def RebootNow(self, request, context):
Expand Down
16 changes: 0 additions & 16 deletions rqd/tests/cuerqd_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,22 +140,6 @@ def test_shutdownRqdNow(self, stubMock, frameStubMock):

stubMock.return_value.ShutdownRqdNow.assert_called_with(shutdownNowRequest)

def test_restartRqdIdle(self, stubMock, frameStubMock):
sys.argv = [SCRIPT_NAME, RQD_HOSTNAME, '--restart']
restartIdleRequest = rqd.compiled_proto.rqd_pb2.RqdStaticRestartIdleRequest()

rqd.cuerqd.main()

stubMock.return_value.RestartRqdIdle.assert_called_with(restartIdleRequest)

def test_restartRqdNow(self, stubMock, frameStubMock):
sys.argv = [SCRIPT_NAME, RQD_HOSTNAME, '--restart_now']
restartNowRequest = rqd.compiled_proto.rqd_pb2.RqdStaticRestartNowRequest()

rqd.cuerqd.main()

stubMock.return_value.RestartRqdNow.assert_called_with(restartNowRequest)

def test_rebootIdle(self, stubMock, frameStubMock):
sys.argv = [SCRIPT_NAME, RQD_HOSTNAME, '--reboot']
rebootIdleRequest = rqd.compiled_proto.rqd_pb2.RqdStaticRebootIdleRequest()
Expand Down
39 changes: 0 additions & 39 deletions rqd/tests/rqcore_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -370,45 +370,6 @@ def test_getRunningFrame(self):
self.assertEqual(frame, self.rqcore.getRunningFrame(frameId))
self.assertIsNone(self.rqcore.getRunningFrame('some-unknown-frame-id'))

@mock.patch.object(rqd.rqcore.RqCore, 'respawn_rqd')
def test_restartRqdNowNoFrames(self, respawnMock):
self.nimbyMock.return_value.active = False

self.rqcore.restartRqdNow()

respawnMock.assert_called_with()

@mock.patch.object(rqd.rqcore.RqCore, 'killAllFrame', autospec=True)
def test_restartRqdNowWithFrames(self, killAllFrameMock):
frame1Id = 'frame1'
frame1 = rqd.rqnetwork.RunningFrame(
self.rqcore, rqd.compiled_proto.rqd_pb2.RunFrame(frame_id=frame1Id))
self.rqcore.storeFrame(frame1Id, frame1)

self.rqcore.restartRqdNow()

killAllFrameMock.assert_called_with(self.rqcore, mock.ANY)

@mock.patch.object(rqd.rqcore.RqCore, 'respawn_rqd')
def test_restartRqdIdleNoFrames(self, respawnMock):
self.nimbyMock.return_value.active = False

self.rqcore.restartRqdIdle()

respawnMock.assert_called_with()

@mock.patch.object(rqd.rqcore.RqCore, 'respawn_rqd')
def test_restartRqdIdleWithFrames(self, respawnMock):
frame1Id = 'frame1'
frame1 = rqd.rqnetwork.RunningFrame(
self.rqcore, rqd.compiled_proto.rqd_pb2.RunFrame(frame_id=frame1Id))
self.rqcore.storeFrame(frame1Id, frame1)

self.rqcore.restartRqdIdle()

self.assertTrue(self.rqcore.isWaitingForIdle())
respawnMock.assert_not_called()

def test_rebootNowNoUser(self):
self.machineMock.return_value.isUserLoggedIn.return_value = False
self.nimbyMock.return_value.active = False
Expand Down

0 comments on commit 85050da

Please sign in to comment.