Skip to content

Commit eda4e08

Browse files
authored
CONT stack overflow postmortem (#9083)
- check in cont_run() and cont_suspend() whether a1 is out of bounds - in case a1 is broken, postmortem will still report proper context in proper stack boundaries additionally - as suggested in #9069, change stack smashing to a single line that does not mention any Exceptions - reduce overall stack dump length when there are know garbage values i.e. cont stackguard - decoder.py addr search regexp would no longer skip stack lines with '<' - fix decoder.py parsing so it notices both stack smashing and alloc errors
1 parent 41ecd65 commit eda4e08

File tree

7 files changed

+112
-48
lines changed

7 files changed

+112
-48
lines changed

Diff for: cores/esp8266/cont.S

+17-3
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,14 @@
2626
cont_suspend:
2727
/* a1: sp */
2828
/* a2: void* cont_ctx */
29-
/* adjust stack and save registers */
29+
/* adjust stack */
3030
addi a1, a1, -24
31+
32+
/* make sure that a1 points after cont_ctx.stack[] */
33+
addi a4, a2, 32
34+
bltu a1, a4, cont_overflow
35+
36+
/* save registers */
3137
s32i a12, a1, 0
3238
s32i a13, a1, 4
3339
s32i a14, a1, 8
@@ -47,6 +53,11 @@ cont_suspend:
4753
l32i a1, a2, 4
4854
jx a0
4955

56+
cont_overflow:
57+
mov.n a3, a1
58+
movi a4, __stack_overflow
59+
jx a4
60+
5061
cont_continue:
5162
l32i a12, a1, 0
5263
l32i a13, a1, 4
@@ -113,20 +124,23 @@ cont_run:
113124
bnez a4, cont_resume
114125
/* else */
115126
/* set new stack*/
116-
l32i a1, a2, 16;
127+
l32i a1, a2, 16
117128
/* goto pfn */
118129
movi a2, cont_wrapper
119130
jx a2
120131

121132
cont_resume:
122133
/* a1 <- cont_ctx.sp_suspend */
123134
l32i a1, a2, 12
135+
/* make sure that a1 points after cont_ctx.stack[] */
136+
addi a5, a2, 32
137+
bltu a1, a5, cont_overflow
124138
/* reset yield flag, 0 -> cont_ctx.pc_suspend */
125139
movi a3, 0
126140
s32i a3, a2, 8
127141
/* jump to saved cont_ctx.pc_suspend */
128142
movi a0, cont_ret
129-
jx a4
143+
jx a4
130144

131145
cont_norm:
132146
/* calculate pointer to cont_ctx.struct_start from sp */

Diff for: cores/esp8266/cont.h

+9-2
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,16 @@
2222
#define CONT_H_
2323

2424
#include <stdbool.h>
25+
#include <stdint.h>
2526

2627
#ifndef CONT_STACKSIZE
2728
#define CONT_STACKSIZE 4096
2829
#endif
2930

31+
#ifndef CONT_STACKGUARD
32+
#define CONT_STACKGUARD 0xfeefeffe
33+
#endif
34+
3035
#ifdef __cplusplus
3136
extern "C" {
3237
#endif
@@ -62,8 +67,11 @@ void cont_run(cont_t*, void (*pfn)(void));
6267
// execution state (registers and stack)
6368
void cont_suspend(cont_t*);
6469

70+
// Check that cont resume state is valid. Immediately panics on failure.
71+
void cont_check_overflow(cont_t*);
72+
6573
// Check guard bytes around the stack. Immediately panics on failure.
66-
void cont_check(cont_t*);
74+
void cont_check_guard(cont_t*);
6775

6876
// Go through stack and check how many bytes are most probably still unchanged
6977
// and thus weren't used by the user code. i.e. that stack space is free. (high water mark)
@@ -78,7 +86,6 @@ bool cont_can_suspend(cont_t* cont);
7886
// free, running the routine, then checking the max free
7987
void cont_repaint_stack(cont_t *cont);
8088

81-
8289
#ifdef __cplusplus
8390
}
8491
#endif

Diff for: cores/esp8266/cont_util.cpp

+17-10
Original file line numberDiff line numberDiff line change
@@ -23,38 +23,45 @@
2323
#include <stddef.h>
2424
#include <string.h>
2525

26-
#include "cont.h"
26+
#include "core_esp8266_features.h"
2727
#include "debug.h"
2828

29+
#include "cont.h"
30+
2931
extern "C"
3032
{
3133

32-
static constexpr unsigned int CONT_STACKGUARD { 0xfeefeffe };
34+
static constexpr uint32_t CONT_STACKSIZE_U32 { sizeof(cont_t::stack) / sizeof(*cont_t::stack) };
3335

3436
void cont_init(cont_t* cont) {
3537
memset(cont, 0, sizeof(cont_t));
3638

3739
cont->stack_guard1 = CONT_STACKGUARD;
3840
cont->stack_guard2 = CONT_STACKGUARD;
39-
cont->stack_end = cont->stack + (sizeof(cont->stack) / 4);
41+
cont->stack_end = &cont->stack[0] + CONT_STACKSIZE_U32;
4042
cont->struct_start = (unsigned*) cont;
4143

4244
// fill stack with magic values to check high water mark
43-
for(int pos = 0; pos < (int)(sizeof(cont->stack) / 4); pos++)
45+
for(int pos = 0; pos < (int)(CONT_STACKSIZE_U32); pos++)
4446
{
4547
cont->stack[pos] = CONT_STACKGUARD;
4648
}
4749
}
4850

49-
void IRAM_ATTR cont_check(cont_t* cont) {
50-
if ((cont->stack_guard1 == CONT_STACKGUARD)
51-
&& (cont->stack_guard2 == CONT_STACKGUARD))
51+
void IRAM_ATTR cont_check_guard(cont_t* cont) {
52+
if ((cont->stack_guard1 != CONT_STACKGUARD)
53+
|| (cont->stack_guard2 != CONT_STACKGUARD))
5254
{
53-
return;
55+
__stack_chk_fail();
56+
__builtin_unreachable();
5457
}
58+
}
5559

56-
__stack_chk_fail();
57-
__builtin_unreachable();
60+
void IRAM_ATTR cont_check_overflow(cont_t* cont) {
61+
if (cont->sp_suspend && (cont->sp_suspend < &cont->stack[0])) {
62+
__stack_overflow(cont, cont->sp_suspend);
63+
__builtin_unreachable();
64+
}
5865
}
5966

6067
// No need for this to be in IRAM, not expected to be IRQ called

Diff for: cores/esp8266/core_esp8266_main.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -261,7 +261,7 @@ static void loop_wrapper() {
261261
}
262262
loop();
263263
loop_end();
264-
cont_check(g_pcont);
264+
cont_check_guard(g_pcont);
265265
if (serialEventRun) {
266266
serialEventRun();
267267
}

Diff for: cores/esp8266/core_esp8266_postmortem.cpp

+49-15
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ static const char* s_unhandled_exception = NULL;
4848

4949
// Common way to notify about where the stack smashing happened
5050
// (but, **only** if caller uses our handler function)
51-
static uint32_t s_stacksmash_addr = 0;
51+
static uint32_t s_stack_chk_addr = 0;
5252

5353
void abort() __attribute__((noreturn));
5454
static void uart_write_char_d(char c);
@@ -59,6 +59,7 @@ static void print_stack(uint32_t start, uint32_t end);
5959
// using numbers different from "REASON_" in user_interface.h (=0..6)
6060
enum rst_reason_sw
6161
{
62+
REASON_USER_STACK_OVERFLOW = 252,
6263
REASON_USER_STACK_SMASH = 253,
6364
REASON_USER_SWEXCEPTION_RST = 254
6465
};
@@ -188,7 +189,7 @@ static void postmortem_report(uint32_t sp_dump) {
188189
}
189190
else if (rst_info.reason == REASON_SOFT_WDT_RST) {
190191
ets_printf_P(PSTR("\nSoft WDT reset"));
191-
const char infinite_loop[] = { 0x06, 0xff, 0xff }; // loop: j loop
192+
const uint8_t infinite_loop[] = { 0x06, 0xff, 0xff }; // loop: j loop
192193
if (is_pc_valid(rst_info.epc1) && 0 == memcmp_P(infinite_loop, (PGM_VOID_P)rst_info.epc1, 3u)) {
193194
// The SDK is riddled with these. They are usually preceded by an ets_printf.
194195
ets_printf_P(PSTR(" - deliberate infinite loop detected"));
@@ -198,17 +199,23 @@ static void postmortem_report(uint32_t sp_dump) {
198199
rst_info.exccause, /* Address executing at time of Soft WDT level-1 interrupt */ rst_info.epc1, 0, 0, 0, 0);
199200
}
200201
else if (rst_info.reason == REASON_USER_STACK_SMASH) {
201-
ets_printf_P(PSTR("\nStack smashing detected.\n"));
202-
ets_printf_P(PSTR("\nException (%d):\nepc1=0x%08x epc2=0x%08x epc3=0x%08x excvaddr=0x%08x depc=0x%08x\n"),
203-
5 /* Alloca exception, closest thing to stack fault*/, s_stacksmash_addr, 0, 0, 0, 0);
204-
}
202+
ets_printf_P(PSTR("\nStack smashing detected at 0x%08x\n"), s_stack_chk_addr);
203+
}
204+
else if (rst_info.reason == REASON_USER_STACK_OVERFLOW) {
205+
ets_printf_P(PSTR("\nStack overflow detected\n"));
206+
}
205207
else {
206208
ets_printf_P(PSTR("\nGeneric Reset\n"));
207209
}
208210

209-
uint32_t cont_stack_start = (uint32_t) &(g_pcont->stack);
210-
uint32_t cont_stack_end = (uint32_t) g_pcont->stack_end;
211-
uint32_t stack_end;
211+
uint32_t cont_stack_start;
212+
if (rst_info.reason == REASON_USER_STACK_SMASH) {
213+
cont_stack_start = s_stack_chk_addr;
214+
} else {
215+
cont_stack_start = (uint32_t) (&g_pcont->stack[0]);
216+
}
217+
218+
uint32_t cont_stack_end = cont_stack_start + CONT_STACKSIZE;
212219

213220
// amount of stack taken by interrupt or exception handler
214221
// and everything up to __wrap_system_restart_local
@@ -249,15 +256,21 @@ static void postmortem_report(uint32_t sp_dump) {
249256
sp_dump = stack_thunk_get_cont_sp();
250257
}
251258

252-
if (sp_dump > cont_stack_start && sp_dump < cont_stack_end) {
259+
uint32_t stack_end;
260+
261+
// above and inside of cont, dump from the sp to the bottom of the stack
262+
if ((rst_info.reason == REASON_USER_STACK_OVERFLOW)
263+
|| ((sp_dump > cont_stack_start) && (sp_dump < cont_stack_end)))
264+
{
253265
ets_printf_P(PSTR("\nctx: cont\n"));
254266
stack_end = cont_stack_end;
255267
}
268+
// in system, reposition to a known address
269+
// it's actually 0x3ffffff0, but the stuff below ets_run
270+
// is likely not really relevant to the crash
256271
else {
257272
ets_printf_P(PSTR("\nctx: sys\n"));
258273
stack_end = 0x3fffffb0;
259-
// it's actually 0x3ffffff0, but the stuff below ets_run
260-
// is likely not really relevant to the crash
261274
}
262275

263276
ets_printf_P(PSTR("sp: %08x end: %08x offset: %04x\n"), sp_dump, stack_end, offset);
@@ -296,11 +309,20 @@ static void print_stack(uint32_t start, uint32_t end) {
296309
for (uint32_t pos = start; pos < end; pos += 0x10) {
297310
uint32_t* values = (uint32_t*)(pos);
298311

312+
// avoid printing irrelevant data
313+
if ((values[0] == CONT_STACKGUARD)
314+
&& (values[0] == values[1])
315+
&& (values[1] == values[2])
316+
&& (values[2] == values[3]))
317+
{
318+
continue;
319+
}
320+
299321
// rough indicator: stack frames usually have SP saved as the second word
300-
bool looksLikeStackFrame = (values[2] == pos + 0x10);
322+
const bool looksLikeStackFrame = (values[2] == pos + 0x10);
301323

302324
ets_printf_P(PSTR("%08x: %08x %08x %08x %08x %c\n"),
303-
pos, values[0], values[1], values[2], values[3], (looksLikeStackFrame)?'<':' ');
325+
pos, values[0], values[1], values[2], values[3], (looksLikeStackFrame) ? '<' : ' ');
304326
}
305327
}
306328

@@ -370,7 +392,7 @@ void __panic_func(const char* file, int line, const char* func) {
370392
uintptr_t __stack_chk_guard = 0x08675309 ^ RANDOM_REG32;
371393
void __stack_chk_fail(void) {
372394
s_user_reset_reason = REASON_USER_STACK_SMASH;
373-
s_stacksmash_addr = (uint32_t)__builtin_return_address(0);
395+
s_stack_chk_addr = (uint32_t)__builtin_return_address(0);
374396

375397
if (gdb_present())
376398
__asm__ __volatile__ ("syscall"); // triggers GDB when enabled
@@ -382,4 +404,16 @@ void __stack_chk_fail(void) {
382404
__builtin_unreachable(); // never reached, needed to satisfy "noreturn" attribute
383405
}
384406

407+
void __stack_overflow(cont_t* cont, uint32_t* sp) {
408+
s_user_reset_reason = REASON_USER_STACK_OVERFLOW;
409+
s_stack_chk_addr = (uint32_t)&cont->stack[0];
410+
411+
if (gdb_present())
412+
__asm__ __volatile__ ("syscall"); // triggers GDB when enabled
413+
414+
postmortem_report((uint32_t)sp);
415+
416+
__builtin_unreachable(); // never reached, needed to satisfy "noreturn" attribute
417+
}
418+
385419
} // extern "C"

Diff for: cores/esp8266/debug.h

+3
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
#include <stddef.h>
55
#include <stdint.h>
66

7+
#include "cont.h"
8+
79
#define _DEBUG_LEAF_FUNCTION(...) __asm__ __volatile__("" ::: "a0", "memory")
810

911
#ifdef DEBUG_ESP_CORE
@@ -32,6 +34,7 @@ extern "C"
3234
{
3335
#endif
3436
void __stack_chk_fail(void) __attribute__((noreturn));
37+
void __stack_overflow(cont_t*, uint32_t*) __attribute__((noreturn));
3538
void __unhandled_exception(const char* str) __attribute__((noreturn));
3639
void __panic_func(const char* file, int line, const char* func) __attribute__((noreturn));
3740
#define panic() __panic_func(PSTR(__FILE__), __LINE__, __func__)

Diff for: tools/decoder.py

+16-17
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@
5252
"permit stores",
5353
)
5454

55+
5556
# similar to java version, which used `list` and re-formatted it
5657
# instead, simply use an already short-format `info line`
5758
# TODO `info symbol`? revert to `list`?
@@ -96,12 +97,12 @@ def addresses_addr2line(addr2line, elf, addresses):
9697

9798

9899
def decode_lines(format_addresses, elf, lines):
99-
STACK_RE = re.compile(r"^[0-9a-f]{8}:\s+([0-9a-f]{8} ?)+ *$")
100+
ANY_ADDR_RE = re.compile(r"0x[0-9a-fA-F]{8}|[0-9a-fA-F]{8}")
101+
HEX_ADDR_RE = re.compile(r"0x[0-9a-f]{8}")
100102

101-
LAST_ALLOC_RE = re.compile(
102-
r"last failed alloc call: ([0-9a-fA-F]{8})\(([0-9]+)\).*"
103-
)
104-
LAST_ALLOC = "last failed alloc"
103+
MEM_ERR_LINE_RE = re.compile(r"^(Stack|last failed alloc call)")
104+
105+
STACK_LINE_RE = re.compile(r"^[0-9a-f]{8}:\s\s+")
105106

106107
CUT_HERE_STRING = "CUT HERE FOR EXCEPTION DECODER"
107108
EXCEPTION_STRING = "Exception ("
@@ -131,13 +132,11 @@ def format_address(address):
131132
stack_addresses = print_all_addresses(stack_addresses)
132133
last_stack = line.strip()
133134
# 3fffffb0: feefeffe feefeffe 3ffe85d8 401004ed
134-
elif in_stack and STACK_RE.match(line):
135-
stack, addrs = line.split(":")
136-
addrs = addrs.strip()
137-
addrs = addrs.split(" ")
135+
elif in_stack and STACK_LINE_RE.match(line):
136+
_, addrs = line.split(":")
137+
addrs = ANY_ADDR_RE.findall(addrs)
138138
stack_addresses.setdefault(last_stack, [])
139-
for addr in addrs:
140-
stack_addresses[last_stack].append(addr)
139+
stack_addresses[last_stack].extend(addrs)
141140
# epc1=0xfffefefe epc2=0xfefefefe epc3=0xefefefef excvaddr=0xfefefefe depc=0xfefefefe
142141
elif EPC_STRING in line:
143142
pairs = line.split()
@@ -152,13 +151,13 @@ def format_address(address):
152151
elif EXCEPTION_STRING in line:
153152
number = line.strip()[len(EXCEPTION_STRING) : -2]
154153
print(f"Exception ({number}) - {EXCEPTION_CODES[int(number)]}")
154+
# stack smashing detected at <ADDR>
155155
# last failed alloc call: <ADDR>(<NUMBER>)[@<maybe file loc>]
156-
elif LAST_ALLOC in line:
157-
values = LAST_ALLOC_RE.match(line)
158-
if values:
159-
addr, size = values.groups()
160-
print()
161-
print(f"Allocation of {size} bytes failed: {format_address(addr)}")
156+
elif MEM_ERR_LINE_RE.match(line):
157+
for addr in ANY_ADDR_RE.findall(line):
158+
line = line.replace(addr, format_address(addr))
159+
print()
160+
print(line.strip())
162161
# postmortem guards our actual stack dump values with these
163162
elif ">>>stack>>>" in line:
164163
in_stack = True

0 commit comments

Comments
 (0)