Skip to content

Commit 20a7208

Browse files
authored
[BOLT] Gadget scanner: improve handling of unreachable basic blocks (#136183)
Instead of refusing to analyze an instruction completely when it is unreachable according to the CFG reconstructed by BOLT, use pessimistic assumption of register state when possible. Nevertheless, unreachable basic blocks found in optimized code likely means imprecise CFG reconstruction, thus report a warning once per function.
1 parent 29f8dca commit 20a7208

File tree

3 files changed

+188
-17
lines changed

3 files changed

+188
-17
lines changed

bolt/lib/Passes/PAuthGadgetScanner.cpp

Lines changed: 103 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,22 @@ namespace PAuthGadgetScanner {
8282
dbgs() << "\n";
8383
}
8484

85+
// Iterates over BinaryFunction's instructions like a range-based for loop:
86+
//
87+
// iterateOverInstrs(BF, [&](MCInstReference Inst) {
88+
// // loop body
89+
// });
90+
template <typename T> static void iterateOverInstrs(BinaryFunction &BF, T Fn) {
91+
if (BF.hasCFG()) {
92+
for (BinaryBasicBlock &BB : BF)
93+
for (int64_t I = 0, E = BB.size(); I < E; ++I)
94+
Fn(MCInstInBBReference(&BB, I));
95+
} else {
96+
for (auto I : BF.instrs())
97+
Fn(MCInstInBFReference(&BF, I.first));
98+
}
99+
}
100+
85101
// This class represents mapping from a set of arbitrary physical registers to
86102
// consecutive array indexes.
87103
class TrackedRegisters {
@@ -342,6 +358,29 @@ class SrcSafetyAnalysis {
342358
return S;
343359
}
344360

361+
/// Computes a reasonably pessimistic estimation of the register state when
362+
/// the previous instruction is not known for sure. Takes the set of registers
363+
/// which are trusted at function entry and removes all registers that can be
364+
/// clobbered inside this function.
365+
SrcState computePessimisticState(BinaryFunction &BF) {
366+
BitVector ClobberedRegs(NumRegs);
367+
iterateOverInstrs(BF, [&](MCInstReference Inst) {
368+
BC.MIB->getClobberedRegs(Inst, ClobberedRegs);
369+
370+
// If this is a call instruction, no register is safe anymore, unless
371+
// it is a tail call. Ignore tail calls for the purpose of estimating the
372+
// worst-case scenario, assuming no instructions are executed in the
373+
// caller after this point anyway.
374+
if (BC.MIB->isCall(Inst) && !BC.MIB->isTailCall(Inst))
375+
ClobberedRegs.set();
376+
});
377+
378+
SrcState S = createEntryState();
379+
S.SafeToDerefRegs.reset(ClobberedRegs);
380+
S.TrustedRegs.reset(ClobberedRegs);
381+
return S;
382+
}
383+
345384
BitVector getClobberedRegs(const MCInst &Point) const {
346385
BitVector Clobbered(NumRegs);
347386
// Assume a call can clobber all registers, including callee-saved
@@ -545,6 +584,10 @@ class DataflowSrcSafetyAnalysis
545584
using SrcSafetyAnalysis::BC;
546585
using SrcSafetyAnalysis::computeNext;
547586

587+
// Pessimistic initial state for basic blocks without any predecessors
588+
// (not needed for most functions, thus initialized lazily).
589+
SrcState PessimisticState;
590+
548591
public:
549592
DataflowSrcSafetyAnalysis(BinaryFunction &BF,
550593
MCPlusBuilder::AllocatorIdTy AllocId,
@@ -585,6 +628,18 @@ class DataflowSrcSafetyAnalysis
585628
if (BB.isEntryPoint())
586629
return createEntryState();
587630

631+
// If a basic block without any predecessors is found in an optimized code,
632+
// this likely means that some CFG edges were not detected. Pessimistically
633+
// assume any register that can ever be clobbered in this function to be
634+
// unsafe before this basic block.
635+
// Warn about this fact in FunctionAnalysis::findUnsafeUses(), as it likely
636+
// means imprecise CFG information.
637+
if (BB.pred_empty()) {
638+
if (PessimisticState.empty())
639+
PessimisticState = computePessimisticState(*BB.getParent());
640+
return PessimisticState;
641+
}
642+
588643
return SrcState();
589644
}
590645

@@ -1344,17 +1399,6 @@ shouldReportAuthOracle(const BinaryContext &BC, const MCInstReference &Inst,
13441399
return make_gadget_report(AuthOracleKind, Inst, *AuthReg);
13451400
}
13461401

1347-
template <typename T> static void iterateOverInstrs(BinaryFunction &BF, T Fn) {
1348-
if (BF.hasCFG()) {
1349-
for (BinaryBasicBlock &BB : BF)
1350-
for (int64_t I = 0, E = BB.size(); I < E; ++I)
1351-
Fn(MCInstInBBReference(&BB, I));
1352-
} else {
1353-
for (auto I : BF.instrs())
1354-
Fn(MCInstInBFReference(&BF, I.first));
1355-
}
1356-
}
1357-
13581402
static SmallVector<MCPhysReg>
13591403
collectRegsToTrack(ArrayRef<PartialReport<MCPhysReg>> Reports) {
13601404
SmallSet<MCPhysReg, 4> RegsToTrack;
@@ -1375,17 +1419,60 @@ void FunctionAnalysisContext::findUnsafeUses(
13751419
BF.dump();
13761420
});
13771421

1422+
bool UnreachableBBReported = false;
1423+
if (BF.hasCFG()) {
1424+
// Warn on basic blocks being unreachable according to BOLT (at most once
1425+
// per BinaryFunction), as this likely means the CFG reconstructed by BOLT
1426+
// is imprecise. A basic block can be
1427+
// * reachable from an entry basic block - a hopefully correct non-empty
1428+
// state is propagated to that basic block sooner or later. All basic
1429+
// blocks are expected to belong to this category under normal conditions.
1430+
// * reachable from a "directly unreachable" BB (a basic block that has no
1431+
// direct predecessors and this is not because it is an entry BB) - *some*
1432+
// non-empty state is propagated to this basic block sooner or later, as
1433+
// the initial state of directly unreachable basic blocks is
1434+
// pessimistically initialized to "all registers are unsafe"
1435+
// - a warning can be printed for the "directly unreachable" basic block
1436+
// * neither reachable from an entry nor from a "directly unreachable" BB
1437+
// (such as if this BB is in an isolated loop of basic blocks) - the final
1438+
// state is computed to be empty for this basic block
1439+
// - a warning can be printed for this basic block
1440+
for (BinaryBasicBlock &BB : BF) {
1441+
MCInst *FirstInst = BB.getFirstNonPseudoInstr();
1442+
// Skip empty basic block early for simplicity.
1443+
if (!FirstInst)
1444+
continue;
1445+
1446+
bool IsDirectlyUnreachable = BB.pred_empty() && !BB.isEntryPoint();
1447+
bool HasNoStateComputed = Analysis->getStateBefore(*FirstInst).empty();
1448+
if (!IsDirectlyUnreachable && !HasNoStateComputed)
1449+
continue;
1450+
1451+
// Arbitrarily attach the report to the first instruction of BB.
1452+
// This is printed as "[message] in function [name], basic block ...,
1453+
// at address ..." when the issue is reported to the user.
1454+
Reports.push_back(make_generic_report(
1455+
MCInstReference::get(FirstInst, BF),
1456+
"Warning: possibly imprecise CFG, the analysis quality may be "
1457+
"degraded in this function. According to BOLT, unreachable code is "
1458+
"found" /* in function [name]... */));
1459+
UnreachableBBReported = true;
1460+
break; // One warning per function.
1461+
}
1462+
}
1463+
// FIXME: Warn the user about imprecise analysis when the function has no CFG
1464+
// information at all.
1465+
13781466
iterateOverInstrs(BF, [&](MCInstReference Inst) {
13791467
if (BC.MIB->isCFI(Inst))
13801468
return;
13811469

13821470
const SrcState &S = Analysis->getStateBefore(Inst);
1383-
1384-
// If non-empty state was never propagated from the entry basic block
1385-
// to Inst, assume it to be unreachable and report a warning.
13861471
if (S.empty()) {
1387-
Reports.push_back(
1388-
make_generic_report(Inst, "Warning: unreachable instruction found"));
1472+
LLVM_DEBUG(
1473+
{ traceInst(BC, "Instruction has no state, skipping", Inst); });
1474+
assert(UnreachableBBReported && "Should be reported at least once");
1475+
(void)UnreachableBBReported;
13891476
return;
13901477
}
13911478

bolt/test/binary-analysis/AArch64/gs-pacret-autiasp.s

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -215,7 +215,7 @@ f_callclobbered_calleesaved:
215215
.globl f_unreachable_instruction
216216
.type f_unreachable_instruction,@function
217217
f_unreachable_instruction:
218-
// CHECK-LABEL: GS-PAUTH: Warning: unreachable instruction found in function f_unreachable_instruction, basic block {{[0-9a-zA-Z.]+}}, at address
218+
// CHECK-LABEL: GS-PAUTH: Warning: possibly imprecise CFG, the analysis quality may be degraded in this function. According to BOLT, unreachable code is found in function f_unreachable_instruction, basic block {{[0-9a-zA-Z.]+}}, at address
219219
// CHECK-NEXT: The instruction is {{[0-9a-f]+}}: add x0, x1, x2
220220
// CHECK-NOT: instructions that write to the affected registers after any authentication are:
221221
b 1f

bolt/test/binary-analysis/AArch64/gs-pauth-calls.s

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1428,6 +1428,90 @@ printed_instrs_nocfg:
14281428
br x0
14291429
.size printed_instrs_nocfg, .-printed_instrs_nocfg
14301430

1431+
// Test handling of unreachable basic blocks.
1432+
//
1433+
// Basic blocks without any predecessors were observed in real-world optimized
1434+
// code. At least sometimes they were actually reachable via jump table, which
1435+
// was not detected, but the function was processed as if its CFG was
1436+
// reconstructed successfully.
1437+
//
1438+
// As a more predictable model example, let's use really unreachable code
1439+
// for testing.
1440+
1441+
.globl bad_unreachable_call
1442+
.type bad_unreachable_call,@function
1443+
bad_unreachable_call:
1444+
// CHECK-LABEL: GS-PAUTH: Warning: possibly imprecise CFG, the analysis quality may be degraded in this function. According to BOLT, unreachable code is found in function bad_unreachable_call, basic block {{[^,]+}}, at address
1445+
// CHECK-NEXT: The instruction is {{[0-9a-f]+}}: blr x0
1446+
// CHECK-NOT: instructions that write to the affected registers after any authentication are:
1447+
// CHECK-LABEL: GS-PAUTH: non-protected call found in function bad_unreachable_call, basic block {{[^,]+}}, at address
1448+
// CHECK-NEXT: The instruction is {{[0-9a-f]+}}: blr x0
1449+
// CHECK-NEXT: The 0 instructions that write to the affected registers after any authentication are:
1450+
paciasp
1451+
stp x29, x30, [sp, #-16]!
1452+
mov x29, sp
1453+
1454+
b 1f
1455+
// unreachable basic block:
1456+
blr x0
1457+
1458+
1: // reachable basic block:
1459+
ldp x29, x30, [sp], #16
1460+
autiasp
1461+
ret
1462+
.size bad_unreachable_call, .-bad_unreachable_call
1463+
1464+
.globl good_unreachable_call
1465+
.type good_unreachable_call,@function
1466+
good_unreachable_call:
1467+
// CHECK-NOT: non-protected call{{.*}}good_unreachable_call
1468+
// CHECK-LABEL: GS-PAUTH: Warning: possibly imprecise CFG, the analysis quality may be degraded in this function. According to BOLT, unreachable code is found in function good_unreachable_call, basic block {{[^,]+}}, at address
1469+
// CHECK-NEXT: The instruction is {{[0-9a-f]+}}: autia x0, x1
1470+
// CHECK-NOT: instructions that write to the affected registers after any authentication are:
1471+
// CHECK-NOT: non-protected call{{.*}}good_unreachable_call
1472+
paciasp
1473+
stp x29, x30, [sp, #-16]!
1474+
mov x29, sp
1475+
1476+
b 1f
1477+
// unreachable basic block:
1478+
autia x0, x1
1479+
blr x0 // <-- this call is definitely protected provided at least
1480+
// basic block boundaries are detected correctly
1481+
1482+
1: // reachable basic block:
1483+
ldp x29, x30, [sp], #16
1484+
autiasp
1485+
ret
1486+
.size good_unreachable_call, .-good_unreachable_call
1487+
1488+
.globl unreachable_loop_of_bbs
1489+
.type unreachable_loop_of_bbs,@function
1490+
unreachable_loop_of_bbs:
1491+
// CHECK-NOT: unreachable basic blocks{{.*}}unreachable_loop_of_bbs
1492+
// CHECK-NOT: non-protected call{{.*}}unreachable_loop_of_bbs
1493+
// CHECK-LABEL: GS-PAUTH: Warning: possibly imprecise CFG, the analysis quality may be degraded in this function. According to BOLT, unreachable code is found in function unreachable_loop_of_bbs, basic block {{[^,]+}}, at address
1494+
// CHECK-NEXT: The instruction is {{[0-9a-f]+}}: blr x0
1495+
// CHECK-NOT: unreachable basic blocks{{.*}}unreachable_loop_of_bbs
1496+
// CHECK-NOT: non-protected call{{.*}}unreachable_loop_of_bbs
1497+
paciasp
1498+
stp x29, x30, [sp, #-16]!
1499+
mov x29, sp
1500+
b .Lreachable_epilogue_bb
1501+
1502+
.Lfirst_unreachable_bb:
1503+
blr x0 // <-- this call is not analyzed
1504+
b .Lsecond_unreachable_bb
1505+
.Lsecond_unreachable_bb:
1506+
blr x1 // <-- this call is not analyzed
1507+
b .Lfirst_unreachable_bb
1508+
1509+
.Lreachable_epilogue_bb:
1510+
ldp x29, x30, [sp], #16
1511+
autiasp
1512+
ret
1513+
.size unreachable_loop_of_bbs, .-unreachable_loop_of_bbs
1514+
14311515
.globl main
14321516
.type main,@function
14331517
main:

0 commit comments

Comments
 (0)