Skip to content

8359820: Improve handshake/safepoint timeout diagnostic messages #26309

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 12 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion src/hotspot/share/runtime/handshake.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
#include "utilities/globalDefinitions.hpp"
#include "utilities/preserveException.hpp"
#include "utilities/systemMemoryBarrier.hpp"
#include "utilities/vmError.hpp"

class HandshakeOperation : public CHeapObj<mtThread> {
friend class HandshakeState;
Expand Down Expand Up @@ -201,14 +202,19 @@ static void handle_timeout(HandshakeOperation* op, JavaThread* target) {
}

if (target != nullptr) {
VMError::set_handshake_timed_out_thread(p2i(target));
if (os::signal_thread(target, SIGILL, "cannot be handshaked")) {
// Give target a chance to report the error and terminate the VM.
os::naked_sleep(3000);
}
} else {
log_error(handshake)("No thread with an unfinished handshake op(" INTPTR_FORMAT ") found.", p2i(op));
}
fatal("Handshake timeout");
if (target != nullptr) {
fatal("Thread " PTR_FORMAT " has not cleared handshake op: " PTR_FORMAT ", then failed to terminate JVM", p2i(target), p2i(op));
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
fatal("Thread " PTR_FORMAT " has not cleared handshake op: " PTR_FORMAT ", then failed to terminate JVM", p2i(target), p2i(op));
fatal("Thread " PTR_FORMAT " has not cleared handshake op %s, and failed to terminate the JVM", p2i(target), op->name());

The earlier logging statement that uses p2i(op) relies on an even earlier logging statement (line 189/190) that reports the name and the p2i value. But the fatal error message can't rely on using logging information to map the p2i value back to a name, so we need the name directly.

} else {
fatal("Handshake timeout");
}
}

static void check_handshake_timeout(jlong start_time, HandshakeOperation* op, JavaThread* target = nullptr) {
Expand Down
2 changes: 2 additions & 0 deletions src/hotspot/share/runtime/safepoint.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@
#include "utilities/events.hpp"
#include "utilities/macros.hpp"
#include "utilities/systemMemoryBarrier.hpp"
#include "utilities/vmError.hpp"

static void post_safepoint_begin_event(EventSafepointBegin& event,
uint64_t safepoint_id,
Expand Down Expand Up @@ -650,6 +651,7 @@ void SafepointSynchronize::print_safepoint_timeout() {
// Send the blocking thread a signal to terminate and write an error file.
for (JavaThreadIteratorWithHandle jtiwh; JavaThread *cur_thread = jtiwh.next(); ) {
if (cur_thread->safepoint_state()->is_running()) {
VMError::set_safepoint_timed_out_thread(p2i(cur_thread));
if (!os::signal_thread(cur_thread, SIGILL, "blocking a safepoint")) {
break; // Could not send signal. Report fatal error.
}
Expand Down
18 changes: 17 additions & 1 deletion src/hotspot/share/utilities/vmError.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,8 @@ int VMError::_lineno;
size_t VMError::_size;
const size_t VMError::_reattempt_required_stack_headroom = 64 * K;
const intptr_t VMError::segfault_address = pd_segfault_address;
volatile intptr_t VMError::handshakeTimedOutThread = p2i(nullptr);
volatile intptr_t VMError::safepointTimedOutThread = p2i(nullptr);
Comment on lines +107 to +108
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
volatile intptr_t VMError::handshakeTimedOutThread = p2i(nullptr);
volatile intptr_t VMError::safepointTimedOutThread = p2i(nullptr);
volatile intptr_t VMError::_handshake_timeout_thread = p2i(nullptr);
volatile intptr_t VMError::_safepoint_timeout_thread = p2i(nullptr);


// List of environment variables that should be reported in error log file.
static const char* env_list[] = {
Expand Down Expand Up @@ -818,7 +820,13 @@ void VMError::report(outputStream* st, bool _verbose) {
st->print(" (0x%x)", _id); // signal number
st->print(" at pc=" PTR_FORMAT, p2i(_pc));
if (_siginfo != nullptr && os::signal_sent_by_kill(_siginfo)) {
st->print(" (sent by kill)");
if (handshakeTimedOutThread == p2i(_thread)) {
st->print(" (sent by handshake timeout handler");
} else if (safepointTimedOutThread == p2i(_thread)) {
st->print(" (sent by safepoint timeout handler");
} else {
st->print(" (sent by kill)");
}
}
} else {
if (should_report_bug(_id)) {
Expand Down Expand Up @@ -1329,6 +1337,14 @@ void VMError::report(outputStream* st, bool _verbose) {
# undef END
}

void VMError::set_handshake_timed_out_thread(intptr_t x) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
void VMError::set_handshake_timed_out_thread(intptr_t x) {
void VMError::set_handshake_timed_out_thread(intptr_t thread_addr) {

handshakeTimedOutThread = x;
}

void VMError::set_safepoint_timed_out_thread(intptr_t x) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
void VMError::set_safepoint_timed_out_thread(intptr_t x) {
void VMError::set_safepoint_timed_out_thread(intptr_t thread_addr) {

safepointTimedOutThread = x;
}

// Report for the vm_info_cmd. This prints out the information above omitting
// crash and thread specific information. If output is added above, it should be added
// here also, if it is safe to call during a running process.
Expand Down
7 changes: 7 additions & 0 deletions src/hotspot/share/utilities/vmError.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,10 @@ class VMError : public AllStatic {
static jlong get_step_start_time();
static void clear_step_start_time();

// Handshake/safepoint timed out threads
static volatile intptr_t handshakeTimedOutThread;
static volatile intptr_t safepointTimedOutThread;

WINDOWS_ONLY([[noreturn]] static void raise_fail_fast(const void* exrecord, const void* context);)

public:
Expand Down Expand Up @@ -218,6 +222,9 @@ class VMError : public AllStatic {
static int prepare_log_file(const char* pattern, const char* default_pattern, bool overwrite_existing, char* buf, size_t buflen);

static bool was_assert_poison_crash(const void* sigInfo);

static void set_handshake_timed_out_thread(intptr_t x);
static void set_safepoint_timed_out_thread(intptr_t x);
};

class VMErrorCallback {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ private static void verifyAbortVmApplied(OutputAnalyzer output) {
} else {
output.shouldContain("SIGILL");
if (Platform.isLinux()) {
output.shouldContain("(sent by kill)");
output.shouldContain("(sent by safepoint timeout handler");
}
}
output.shouldNotHaveExitValue(0);
Expand Down