Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

rec: make quit-nicely wait on actual quit and start using it for stopping by systemd #14976

Draft
wants to merge 6 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pdns/recursordist/pdns-recursor.service.in
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ After=network-online.target time-sync.target

[Service]
ExecStart=@sbindir@/pdns_recursor --daemon=no --write-pid=no --disable-syslog --log-timestamp=no
ExecStop=@bindir@/rec_control --socket-dir=%t/pdns-recursor quit-nicely
User=@service_user@
Group=@service_group@
Type=notify
Expand Down
25 changes: 20 additions & 5 deletions pdns/recursordist/rec-main.cc
Original file line number Diff line number Diff line change
Expand Up @@ -279,10 +279,9 @@ int RecThreadInfo::runThreads(Logr::log_t log)
RecThreadInfo::setThreadId(currentThreadId);
recursorThread();

for (unsigned int thread = 0; thread < RecThreadInfo::numRecursorThreads(); thread++) {
if (thread == 1) {
continue;
}
// Skip handler thread (it might be still handling the quite-nicely) and 1, which is actually the main thread in this case
// handler thread (0) will be handled in main().
for (unsigned int thread = 2; thread < RecThreadInfo::numRecursorThreads(); thread++) {
auto& tInfo = RecThreadInfo::info(thread);
tInfo.thread.join();
if (tInfo.exitCode != 0) {
Expand Down Expand Up @@ -351,6 +350,9 @@ int RecThreadInfo::runThreads(Logr::log_t log)
info.start(currentThreadId, "web+stat", cpusMap, log);

for (auto& tInfo : RecThreadInfo::infos()) {
if (tInfo.getName() == "web+stat") { // XXX testing for isHandler() does not work as expected!
continue;
}
tInfo.thread.join();
if (tInfo.exitCode != 0) {
ret = tInfo.exitCode;
Expand Down Expand Up @@ -2446,8 +2448,13 @@ static void handleRCC(int fileDesc, FDMultiplexer::funcparam_t& /* var */)
RecursorControlParser::func_t* command = nullptr;
auto answer = RecursorControlParser::getAnswer(clientfd, msg, &command);

g_rcc.send(clientfd, answer);
if (command != doExitNicely) {
g_rcc.send(clientfd, answer);
}
command();
if (command == doExitNicely) {
g_rcc.send(clientfd, answer);
}
}
catch (const std::exception& e) {
SLOG(g_log << Logger::Error << "Error dealing with control socket request: " << e.what() << endl,
Expand Down Expand Up @@ -3149,6 +3156,8 @@ static void setupLogging(const string& logname)
}
}

DoneRunning g_doneRunning;

int main(int argc, char** argv)
{
g_argc = argc;
Expand Down Expand Up @@ -3318,6 +3327,12 @@ int main(int argc, char** argv)
}

ret = serviceMain(startupLog);
{
std::lock_guard lock(g_doneRunning.mutex);
g_doneRunning.done = true;
g_doneRunning.condVar.notify_one();
}
RecThreadInfo::joinThread0();
}
catch (const PDNSException& ae) {
SLOG(g_log << Logger::Error << "Exception: " << ae.reason << endl,
Expand Down
12 changes: 12 additions & 0 deletions pdns/recursordist/rec-main.hh
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,13 @@ extern thread_local unique_ptr<FDMultiplexer> t_fdm;
extern uint16_t g_minUdpSourcePort;
extern uint16_t g_maxUdpSourcePort;
extern bool g_regressionTestMode;
struct DoneRunning
{
std::mutex mutex;
std::condition_variable condVar;
std::atomic<bool> done{false};
};
extern DoneRunning g_doneRunning;

// you can ask this class for a UDP socket to send a query from
// this socket is not yours, don't even think about deleting it
Expand Down Expand Up @@ -539,6 +546,11 @@ public:
mt = theMT;
}

static void joinThread0()
{
info(0).thread.join();
}

private:
// FD corresponding to TCP sockets this thread is listening on.
// These FDs are also in deferredAdds when we have one socket per
Expand Down
23 changes: 21 additions & 2 deletions pdns/recursordist/rec_channel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,18 @@

#include "namespaces.hh"

/* g++ defines __SANITIZE_THREAD__
clang++ supports the nice __has_feature(thread_sanitizer),
let's merge them */
#if defined(__has_feature)
#if __has_feature(thread_sanitizer)
#define __SANITIZE_THREAD__ 1
#endif
#if __has_feature(address_sanitizer)
#define __SANITIZE_ADDRESS__ 1
#endif
#endif

std::atomic<bool> RecursorControlChannel::stop = false;

RecursorControlChannel::RecursorControlChannel()
Expand Down Expand Up @@ -188,8 +200,15 @@ RecursorControlChannel::Answer RecursorControlChannel::recv(int fd, unsigned int
const time_t start = time(nullptr);

waitForRead(fd, timeout, start);
int err;
if (::recv(fd, &err, sizeof(err), 0) != sizeof(err)) {
int err{};
auto ret = ::recv(fd, &err, sizeof(err), 0);
if (ret == 0) {
#if defined(__SANITIZE_THREAD__)
return {0, "bye nicely\n"}; // Hack because TSAN enabled build justs _exits on quit-nicely
#endif
throw PDNSException("Unable to receive status over control connection: EOF");
}
if (ret != sizeof(err)) {
throw PDNSException("Unable to receive return status over control channel: " + stringerror());
}

Expand Down
9 changes: 7 additions & 2 deletions pdns/recursordist/rec_channel_rec.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1378,17 +1378,22 @@ void doExitGeneric(bool nicely)
#if defined(__SANITIZE_THREAD__)
_exit(0); // regression test check for exit 0
#endif
g_log << Logger::Error << "Exiting on user request" << endl;
g_rcc.~RecursorControlChannel();
g_slog->withName("runtime")->info(Logr::Notice, "Exiting on user request");

if (!g_pidfname.empty()) {
unlink(g_pidfname.c_str()); // we can at least try..
}

if (nicely) {
RecursorControlChannel::stop = true;
{
std::unique_lock lock(g_doneRunning.mutex);
g_doneRunning.condVar.wait(lock, [] { return g_doneRunning.done.load(); });
}
// g_rcc.~RecursorControlChannel() do not call, caller still needs it!
}
else {
g_rcc.~RecursorControlChannel();
#if defined(__SANITIZE_ADDRESS__) && defined(HAVE_LEAK_SANITIZER_INTERFACE)
clearLuaScript();
pdns::coverage::dumpCoverageData();
Expand Down
Loading