Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
c1bd96a
fix: interop with managed .NET runtimes
jpnurmi Sep 29, 2025
45dccd6
Update CHANGELOG.md
jpnurmi Sep 29, 2025
02ebf36
Clarify the comment
jpnurmi Sep 29, 2025
81293d8
Remove hard-coded RID
jpnurmi Sep 29, 2025
10ba6bd
Align macros and fix constants
jpnurmi Sep 29, 2025
d9eafcd
Update tests/test_dotnet_signals.py
jpnurmi Sep 30, 2025
1bf0f52
Clarify AOT in CHANGELOG & comment
jpnurmi Sep 30, 2025
060b18b
Separate JIT vs. AOT test cases
jpnurmi Sep 30, 2025
d4dd399
Apply suggestions from code review
jpnurmi Sep 30, 2025
4da4c4a
Add __mips64__ and __s390x__
jpnurmi Sep 30, 2025
345ab4d
Merge remote-tracking branch 'upstream/master' into fix/dotnet-interop
jpnurmi Oct 21, 2025
e8576cf
Avoid double-invoking signal handlers when CHAIN_AT_START (crashes on…
jpnurmi Oct 21, 2025
06c84e1
Minimize test diff
jpnurmi Oct 21, 2025
e92bb00
Less is more - drop mips, risc, s390x
jpnurmi Oct 21, 2025
de6d67c
Move log flush
jpnurmi Oct 22, 2025
4a73907
Update test (native-crash, managed-exception, unhandled-managed-excep…
jpnurmi Oct 22, 2025
72df376
Update tests/fixtures/dotnet_signal/Program.cs
jpnurmi Oct 22, 2025
95c3c9b
AOT test case: add unhandled-managed-exception
jpnurmi Oct 22, 2025
b6e37d1
Skip the crashy libbacktrace fallback from chained signal handlers
jpnurmi Oct 22, 2025
ac6877e
fix build on windows
jpnurmi Oct 22, 2025
8a5c5b6
Merge remote-tracking branch 'upstream/master' into fix/dotnet-interop
jpnurmi Oct 27, 2025
2ef7c5b
Clarify why the fallback is skipped
jpnurmi Oct 27, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
- Add logs flush on crash. This is not available for macOS with the `crashpad` backend. ([#1404](https://github.com/getsentry/sentry-native/pull/1404))
- Make narrow UTF-8 the canonical path encoding on Windows. ([#1413](https://github.com/getsentry/sentry-native/pull/1413))
- Re-add setting thread name for Windows transport. ([#1424](https://github.com/getsentry/sentry-native/pull/1424))
- Fix AOT interop with managed .NET runtimes. ([#1392](https://github.com/getsentry/sentry-native/pull/1392))

**Internal**:

Expand Down
119 changes: 92 additions & 27 deletions src/backends/sentry_backend_inproc.c
Original file line number Diff line number Diff line change
Expand Up @@ -456,9 +456,48 @@ registers_from_uctx(const sentry_ucontext_t *uctx)
return registers;
}

#ifdef SENTRY_PLATFORM_LINUX
static uintptr_t
get_stack_pointer(const sentry_ucontext_t *uctx)
{
# if defined(__i386__)
return uctx->user_context->uc_mcontext.gregs[REG_ESP];
# elif defined(__x86_64__)
return uctx->user_context->uc_mcontext.gregs[REG_RSP];
# elif defined(__arm__)
return uctx->user_context->uc_mcontext.arm_sp;
# elif defined(__aarch64__)
return uctx->user_context->uc_mcontext.sp;
# else
SENTRY_WARN("get_stack_pointer is not implemented for this architecture. "
"Signal chaining may not work as expected.");
return NULL;
# endif
}

static uintptr_t
get_instruction_pointer(const sentry_ucontext_t *uctx)
{
# if defined(__i386__)
return uctx->user_context->uc_mcontext.gregs[REG_EIP];
# elif defined(__x86_64__)
return uctx->user_context->uc_mcontext.gregs[REG_RIP];
# elif defined(__arm__)
return uctx->user_context->uc_mcontext.arm_pc;
# elif defined(__aarch64__)
return uctx->user_context->uc_mcontext.pc;
# else
SENTRY_WARN(
"get_instruction_pointer is not implemented for this architecture. "
"Signal chaining may not work as expected.");
return NULL;
# endif
}
#endif

static sentry_value_t
make_signal_event(
const struct signal_slot *sig_slot, const sentry_ucontext_t *uctx)
make_signal_event(const struct signal_slot *sig_slot,
const sentry_ucontext_t *uctx, sentry_handler_strategy_t strategy)
{
sentry_value_t event = sentry_value_new_event();
sentry_value_set_by_key(
Expand Down Expand Up @@ -496,8 +535,10 @@ make_signal_event(
"captured backtrace from ucontext with %lu frames", frame_count);
// if unwinding from a ucontext didn't yield any results, try again with a
// direct unwind. this is most likely the case when using `libbacktrace`,
// since that does not allow to unwind from a ucontext at all.
if (!frame_count) {
// since that does not allow to unwind from a ucontext at all. the fallback
// is skipped with the "chain at start" strategy because `libbacktrace`
// crashes, and would likely not provide helpful information anyway.
if (!frame_count && strategy != SENTRY_HANDLER_STRATEGY_CHAIN_AT_START) {
frame_count = sentry_unwind_stack(NULL, &backtrace[0], MAX_FRAMES);
}
SENTRY_DEBUGF("captured backtrace with %lu frames", frame_count);
Expand Down Expand Up @@ -534,20 +575,7 @@ handle_ucontext(const sentry_ucontext_t *uctx)

SENTRY_INFO("entering signal handler");

const struct signal_slot *sig_slot = NULL;
for (int i = 0; i < SIGNAL_COUNT; ++i) {
#ifdef SENTRY_PLATFORM_UNIX
if (SIGNAL_DEFINITIONS[i].signum == uctx->signum) {
#elif defined SENTRY_PLATFORM_WINDOWS
if (SIGNAL_DEFINITIONS[i].signum
== uctx->exception_ptrs.ExceptionRecord->ExceptionCode) {
#else
# error Unsupported platform
#endif
sig_slot = &SIGNAL_DEFINITIONS[i];
}
}

sentry_handler_strategy_t strategy = SENTRY_HANDLER_STRATEGY_DEFAULT;
#ifdef SENTRY_PLATFORM_UNIX
// inform the sentry_sync system that we're in a signal handler. This will
// make mutexes spin on a spinlock instead as it's no longer safe to use a
Expand All @@ -556,42 +584,77 @@ handle_ucontext(const sentry_ucontext_t *uctx)
#endif

SENTRY_WITH_OPTIONS (options) {
// Flush logs in a crash-safe manner before crash handling
if (options->enable_logs) {
sentry__logs_flush_crash_safe();
}
#ifdef SENTRY_PLATFORM_LINUX
// On Linux (and thus Android) CLR/Mono converts signals provoked by
// AOT/JIT-generated native code into managed code exceptions. In these
// cases, we shouldn't react to the signal at all and let their handler
// discontinue the signal chain by invoking the runtime handler before
// we process the signal.
if (sentry_options_get_handler_strategy(options)
== SENTRY_HANDLER_STRATEGY_CHAIN_AT_START) {
strategy = sentry_options_get_handler_strategy(options);
if (strategy == SENTRY_HANDLER_STRATEGY_CHAIN_AT_START) {
SENTRY_DEBUG("defer to runtime signal handler at start");
// there is a good chance that we won't return from the previous
// handler and that would mean we couldn't enter this handler with
// the next signal coming in if we didn't "leave" here.
sentry__leave_signal_handler();
if (!options->enable_logging_when_crashed) {
sentry__logger_enable();
}

uintptr_t ip = get_instruction_pointer(uctx);
uintptr_t sp = get_stack_pointer(uctx);

// invoke the previous handler (typically the CLR/Mono
// signal-to-managed-exception handler)
invoke_signal_handler(
uctx->signum, uctx->siginfo, (void *)uctx->user_context);

// If the execution returns here in AOT mode, and the instruction
// or stack pointer were changed, it means CLR/Mono converted the
// signal into a managed exception and transferred execution to a
// managed exception handler.
// https://github.com/dotnet/runtime/blob/6d96e28597e7da0d790d495ba834cc4908e442cd/src/mono/mono/mini/exceptions-arm64.c#L538
if (ip != get_instruction_pointer(uctx)
|| sp != get_stack_pointer(uctx)) {
SENTRY_DEBUG("runtime converted the signal to a managed "
"exception, we do not handle the signal");
return;
}
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: Signal Chaining Logic Fails on Some Architectures

The new signal chaining logic has two issues: on architectures where instruction and stack pointers aren't retrieved, it incorrectly proceeds with crash handling even after a runtime converts a signal. Additionally, when a signal conversion is detected and the function returns early, the internal signal handler state becomes unbalanced.

Fix in Cursor Fix in Web

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is absolutely correct, but the only side-effect currently visible is for the logging toggle. Similarly, to how we "leave" the signal handler before chaining, we must also re-enable logging immediately after "leaving" and disable it again before re-entering, because if it were a managed code exception, we want logging to remain enabled.

We can also move the entire sig_slot assignment down below the CHAIN_AT_START code, to make the path dependencies more obvious.

However, I think both have a lower priority than figuring out the signaling sequence of both runtimes and how we can align them.


// let's re-enter because it means this was an actual native crash
if (!options->enable_logging_when_crashed) {
sentry__logger_disable();
}
sentry__enter_signal_handler();
SENTRY_DEBUG(
"return from runtime signal handler, we handle the signal");
}
#endif

const struct signal_slot *sig_slot = NULL;
for (int i = 0; i < SIGNAL_COUNT; ++i) {
#ifdef SENTRY_PLATFORM_UNIX
if (SIGNAL_DEFINITIONS[i].signum == uctx->signum) {
#elif defined SENTRY_PLATFORM_WINDOWS
if (SIGNAL_DEFINITIONS[i].signum
== uctx->exception_ptrs.ExceptionRecord->ExceptionCode) {
#else
# error Unsupported platform
#endif
sig_slot = &SIGNAL_DEFINITIONS[i];
}
}

#ifdef SENTRY_PLATFORM_UNIX
// use a signal-safe allocator before we tear down.
sentry__page_allocator_enable();
#endif
// Flush logs in a crash-safe manner before crash handling
if (options->enable_logs) {
sentry__logs_flush_crash_safe();
}

sentry_value_t event = make_signal_event(sig_slot, uctx);
sentry_value_t event = make_signal_event(sig_slot, uctx, strategy);
bool should_handle = true;
sentry__write_crash_marker(options);

Expand Down Expand Up @@ -647,8 +710,10 @@ handle_ucontext(const sentry_ucontext_t *uctx)
// forward as we're not restoring the page allocator.
reset_signal_handlers();
sentry__leave_signal_handler();
invoke_signal_handler(
uctx->signum, uctx->siginfo, (void *)uctx->user_context);
if (strategy != SENTRY_HANDLER_STRATEGY_CHAIN_AT_START) {
invoke_signal_handler(
uctx->signum, uctx->siginfo, (void *)uctx->user_context);
}
#endif
}

Expand Down
11 changes: 7 additions & 4 deletions tests/fixtures/dotnet_signal/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ static void Main(string[] args)
{
native_crash();
}
else
else if (args.Contains("managed-exception"))
{
try
{
Expand All @@ -55,10 +55,13 @@ static void Main(string[] args)
}
catch (NullReferenceException exception)
{
Console.WriteLine("dereference another NULL object from managed code");
var s = default(string);
var c = s.Length;
}
}
else if (args.Contains("unhandled-managed-exception"))
{
Console.WriteLine("dereference a NULL object from managed code (unhandled)");
var s = default(string);
var c = s.Length;
}
}
}
139 changes: 138 additions & 1 deletion tests/test_dotnet_signals.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,11 @@ def run_dotnet(tmp_path, args):


def run_dotnet_managed_exception(tmp_path):
return run_dotnet(tmp_path, ["dotnet", "run"])
return run_dotnet(tmp_path, ["dotnet", "run", "managed-exception"])


def run_dotnet_unhandled_managed_exception(tmp_path):
return run_dotnet(tmp_path, ["dotnet", "run", "unhandled-managed-exception"])


def run_dotnet_native_crash(tmp_path):
Expand Down Expand Up @@ -84,9 +88,25 @@ def test_dotnet_signals_inproc(cmake):
)

# this runs the dotnet program with the Native SDK and chain-at-start, when managed code raises a signal that CLR convert to an exception.
# raising a signal that CLR converts to a managed exception, which is then handled by the managed code and
# not leaked out to the native code so no crash is registered.
dotnet_run = run_dotnet_managed_exception(tmp_path)
dotnet_run_stdout, dotnet_run_stderr = dotnet_run.communicate()

# the program handles the `NullReferenceException`, so the Native SDK won't register a crash.
assert dotnet_run.returncode == 0
assert not (
"NullReferenceException" in dotnet_run_stderr
), f"Managed exception run failed.\nstdout:\n{dotnet_run_stdout}\nstderr:\n{dotnet_run_stderr}"
database_path = project_fixture_path / ".sentry-native"
assert database_path.exists(), "No database-path exists"
assert not (database_path / "last_crash").exists(), "A crash was registered"
assert_empty_run_dir(database_path)

# this runs the dotnet program with the Native SDK and chain-at-start, when managed code raises a signal that CLR convert to an exception.
dotnet_run = run_dotnet_unhandled_managed_exception(tmp_path)
dotnet_run_stdout, dotnet_run_stderr = dotnet_run.communicate()

# the program will fail with a `NullReferenceException`, but the Native SDK won't register a crash.
assert dotnet_run.returncode != 0
assert (
Expand All @@ -112,3 +132,120 @@ def test_dotnet_signals_inproc(cmake):
shutil.rmtree(project_fixture_path / ".sentry-native", ignore_errors=True)
shutil.rmtree(project_fixture_path / "bin", ignore_errors=True)
shutil.rmtree(project_fixture_path / "obj", ignore_errors=True)


def run_aot(tmp_path, args=None):
if args is None:
args = []
env = os.environ.copy()
env["LD_LIBRARY_PATH"] = str(tmp_path) + ":" + env.get("LD_LIBRARY_PATH", "")
return subprocess.Popen(
[str(tmp_path / "bin/test_dotnet")] + args,
cwd=tmp_path,
env=env,
text=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
)


def run_aot_managed_exception(tmp_path):
return run_aot(tmp_path, ["managed-exception"])


def run_aot_unhandled_managed_exception(tmp_path):
return run_aot(tmp_path, ["unhandled-managed-exception"])


def run_aot_native_crash(tmp_path):
return run_aot(tmp_path, ["native-crash"])


@pytest.mark.skipif(
sys.platform != "linux" or is_x86 or is_asan or is_tsan,
reason="dotnet AOT signal handling is currently only supported on 64-bit Linux without sanitizers",
)
def test_aot_signals_inproc(cmake):
try:
# build native client library with inproc and the example for crash dumping
tmp_path = cmake(
["sentry"],
{"SENTRY_BACKEND": "inproc", "SENTRY_TRANSPORT": "none"},
)

# build the crashing native library
subprocess.run(
[
"gcc",
"-Wall",
"-Wextra",
"-fPIC",
"-shared",
str(project_fixture_path / "crash.c"),
"-o",
str(tmp_path / "libcrash.so"),
],
check=True,
)

# AOT-compile the dotnet program
subprocess.run(
[
"dotnet",
"publish",
"-p:PublishAot=true",
"-p:Configuration=Release",
"-o",
str(tmp_path / "bin"),
],
cwd=project_fixture_path,
check=True,
)

# this runs the dotnet program in AOT mode with the Native SDK and chain-at-start, and triggers a `NullReferenceException`
# raising a signal that CLR converts to a managed exception, which is then handled by the managed code and
# not leaked out to the native code so no crash is registered.
dotnet_run = run_aot_managed_exception(tmp_path)
dotnet_run_stdout, dotnet_run_stderr = dotnet_run.communicate()

# the program handles the `NullReferenceException`, so the Native SDK won't register a crash.
assert dotnet_run.returncode == 0
assert not (
"NullReferenceException" in dotnet_run_stderr
), f"Managed exception run failed.\nstdout:\n{dotnet_run_stdout}\nstderr:\n{dotnet_run_stderr}"
database_path = tmp_path / ".sentry-native"
assert database_path.exists(), "No database-path exists"
assert not (database_path / "last_crash").exists(), "A crash was registered"
assert_empty_run_dir(database_path)

# this runs the dotnet program in AOT mode with the Native SDK and chain-at-start, and triggers a `NullReferenceException`
# raising a signal that CLR converts to a managed exception, which is then not handled by the managed code but
# leaked out to the native code so a crash is registered.
dotnet_run = run_aot_unhandled_managed_exception(tmp_path)
dotnet_run_stdout, dotnet_run_stderr = dotnet_run.communicate()

# the program will fail with a `NullReferenceException`, so the Native SDK will register a crash.
assert dotnet_run.returncode != 0
assert (
"NullReferenceException" in dotnet_run_stderr
), f"Managed exception run failed.\nstdout:\n{dotnet_run_stdout}\nstderr:\n{dotnet_run_stderr}"
database_path = tmp_path / ".sentry-native"
assert database_path.exists(), "No database-path exists"
assert (database_path / "last_crash").exists()
assert_run_dir_with_envelope(database_path)

# this runs the dotnet program with the Native SDK and chain-at-start, when an actual native crash raises a signal
dotnet_run = run_aot_native_crash(tmp_path)
dotnet_run_stdout, dotnet_run_stderr = dotnet_run.communicate()

# the program will fail with a SIGSEGV, that has been processed by the Native SDK which produced a crash envelope
assert dotnet_run.returncode != 0
assert (
"crash has been captured" in dotnet_run_stderr
), f"Native exception run failed.\nstdout:\n{dotnet_run_stdout}\nstderr:\n{dotnet_run_stderr}"
assert (database_path / "last_crash").exists()
assert_run_dir_with_envelope(database_path)
finally:
shutil.rmtree(tmp_path / ".sentry-native", ignore_errors=True)
shutil.rmtree(project_fixture_path / "bin", ignore_errors=True)
shutil.rmtree(project_fixture_path / "obj", ignore_errors=True)
Loading