Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 16 additions & 7 deletions bolt/include/bolt/Profile/DataAggregator.h
Original file line number Diff line number Diff line change
Expand Up @@ -99,16 +99,17 @@ class DataAggregator : public DataReader {
uint64_t Addr;
};

/// Container for the unit of branch data.
/// Backwards compatible with legacy use for branches and fall-throughs:
/// - if \p Branch is FT_ONLY or FT_EXTERNAL_ORIGIN, the trace only
/// contains fall-through data,
/// - if \p To is BR_ONLY, the trace only contains branch data.
/// Container for the unit of branch data, matching pre-aggregated trace type.
/// Backwards compatible with branch and fall-through types:
/// - if \p To is < 0, the trace only contains branch data (BR_ONLY),
/// - if \p Branch is < 0, the trace only contains fall-through data
/// (FT_ONLY, FT_EXTERNAL_ORIGIN, or FT_EXTERNAL_RETURN).
struct Trace {
static constexpr const uint64_t EXTERNAL = 0ULL;
static constexpr const uint64_t BR_ONLY = -1ULL;
static constexpr const uint64_t FT_ONLY = -1ULL;
static constexpr const uint64_t FT_EXTERNAL_ORIGIN = -2ULL;
static constexpr const uint64_t FT_EXTERNAL_RETURN = -3ULL;

uint64_t Branch;
uint64_t From;
Expand Down Expand Up @@ -388,9 +389,9 @@ class DataAggregator : public DataReader {
/// File format syntax:
/// E <event>
/// S <start> <count>
/// T <start> <end> <ft_end> <count>
/// [TR] <start> <end> <ft_end> <count>
/// B <start> <end> <count> <mispred_count>
/// [Ff] <start> <end> <count>
/// [Ffr] <start> <end> <count>
///
/// where <start>, <end>, <ft_end> have the format [<id>:]<offset>
///
Expand All @@ -401,8 +402,11 @@ class DataAggregator : public DataReader {
/// f - an aggregated fall-through with external origin - used to disambiguate
/// between a return hitting a basic block head and a regular internal
/// jump to the block
/// r - an aggregated fall-through originating at an external return, no
/// checks are performed for a fallthrough start
/// T - an aggregated trace: branch from <start> to <end> with a fall-through
/// to <ft_end>
/// R - an aggregated trace originating at a return
///
/// <id> - build id of the object containing the address. We can skip it for
/// the main binary and use "X" for an unknown object. This will save some
Expand Down Expand Up @@ -530,7 +534,12 @@ inline raw_ostream &operator<<(raw_ostream &OS,
const DataAggregator::Trace &T) {
switch (T.Branch) {
case DataAggregator::Trace::FT_ONLY:
break;
case DataAggregator::Trace::FT_EXTERNAL_ORIGIN:
OS << "X:0 -> ";
break;
case DataAggregator::Trace::FT_EXTERNAL_RETURN:
OS << "X:R -> ";
break;
default:
OS << Twine::utohexstr(T.Branch) << " -> ";
Expand Down
59 changes: 36 additions & 23 deletions bolt/lib/Profile/DataAggregator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -524,8 +524,7 @@ Error DataAggregator::preprocessProfile(BinaryContext &BC) {

heatmap:
// Sort parsed traces for faster processing.
if (!opts::BasicAggregation)
llvm::sort(Traces, llvm::less_first());
llvm::sort(Traces, llvm::less_first());

if (!opts::HeatmapMode)
return Error::success();
Expand Down Expand Up @@ -870,13 +869,9 @@ DataAggregator::getFallthroughsInTrace(BinaryFunction &BF, const Trace &Trace,

// Adjust FromBB if the first LBR is a return from the last instruction in
// the previous block (that instruction should be a call).
if (IsReturn) {
if (From)
FromBB = BF.getBasicBlockContainingOffset(From - 1);
else
LLVM_DEBUG(dbgs() << "return to the function start: " << Trace << '\n');
} else if (Trace.Branch == Trace::EXTERNAL && From == FromBB->getOffset() &&
!FromBB->isEntryPoint() && !FromBB->isLandingPad()) {
if (Trace.Branch != Trace::FT_ONLY && !BF.containsAddress(Trace.Branch) &&
From == FromBB->getOffset() &&
(IsReturn ? From : !(FromBB->isEntryPoint() || FromBB->isLandingPad()))) {
const BinaryBasicBlock *PrevBB =
BF.getLayout().getBlock(FromBB->getIndex() - 1);
if (PrevBB->getSuccessor(FromBB->getLabel())) {
Expand Down Expand Up @@ -1202,12 +1197,14 @@ ErrorOr<Location> DataAggregator::parseLocationOrOffset() {
std::error_code DataAggregator::parseAggregatedLBREntry() {
enum AggregatedLBREntry : char {
INVALID = 0,
EVENT_NAME, // E
TRACE, // T
SAMPLE, // S
BRANCH, // B
FT, // F
FT_EXTERNAL_ORIGIN // f
EVENT_NAME, // E
TRACE, // T
RETURN, // R
SAMPLE, // S
BRANCH, // B
FT, // F
FT_EXTERNAL_ORIGIN, // f
FT_EXTERNAL_RETURN // r
} Type = INVALID;

/// The number of fields to parse, set based on \p Type.
Expand Down Expand Up @@ -1235,20 +1232,22 @@ std::error_code DataAggregator::parseAggregatedLBREntry() {

Type = StringSwitch<AggregatedLBREntry>(Str)
.Case("T", TRACE)
.Case("R", RETURN)
.Case("S", SAMPLE)
.Case("E", EVENT_NAME)
.Case("B", BRANCH)
.Case("F", FT)
.Case("f", FT_EXTERNAL_ORIGIN)
.Case("r", FT_EXTERNAL_RETURN)
.Default(INVALID);

if (Type == INVALID) {
reportError("expected T, S, E, B, F or f");
reportError("expected T, R, S, E, B, F, f or r");
return make_error_code(llvm::errc::io_error);
}

using SSI = StringSwitch<int>;
AddrNum = SSI(Str).Case("T", 3).Case("S", 1).Case("E", 0).Default(2);
AddrNum = SSI(Str).Cases("T", "R", 3).Case("S", 1).Case("E", 0).Default(2);
CounterNum = SSI(Str).Case("B", 2).Case("E", 0).Default(1);
}

Expand Down Expand Up @@ -1305,17 +1304,30 @@ std::error_code DataAggregator::parseAggregatedLBREntry() {
if (ToFunc)
ToFunc->setHasProfileAvailable();

/// For legacy fall-through types, adjust locations to match Trace container.
if (Type == FT || Type == FT_EXTERNAL_ORIGIN) {
/// For fall-through types, adjust locations to match Trace container.
if (Type == FT || Type == FT_EXTERNAL_ORIGIN || Type == FT_EXTERNAL_RETURN) {
Addr[2] = Location(Addr[1]->Offset); // Trace To
Addr[1] = Location(Addr[0]->Offset); // Trace From
// Put a magic value into Trace Branch to differentiate from a full trace.
Addr[0] = Location(Type == FT ? Trace::FT_ONLY : Trace::FT_EXTERNAL_ORIGIN);
// Put a magic value into Trace Branch to differentiate from a full trace:
if (Type == FT)
Addr[0] = Location(Trace::FT_ONLY);
else if (Type == FT_EXTERNAL_ORIGIN)
Addr[0] = Location(Trace::FT_EXTERNAL_ORIGIN);
else if (Type == FT_EXTERNAL_RETURN)
Addr[0] = Location(Trace::FT_EXTERNAL_RETURN);
else
llvm_unreachable("Unexpected fall-through type");
}

/// For legacy branch type, mark Trace To to differentite from a full trace.
if (Type == BRANCH) {
/// For branch type, mark Trace To to differentiate from a full trace.
if (Type == BRANCH)
Addr[2] = Location(Trace::BR_ONLY);

if (Type == RETURN) {
if (!Addr[0]->Offset)
Addr[0]->Offset = Trace::FT_EXTERNAL_RETURN;
else
Returns.emplace(Addr[0]->Offset);
}

/// Record a trace.
Expand Down Expand Up @@ -1565,6 +1577,7 @@ void DataAggregator::processBranchEvents() {
NamedRegionTimer T("processBranch", "Processing branch events",
TimerGroupName, TimerGroupDesc, opts::TimeAggregator);

Returns.emplace(Trace::FT_EXTERNAL_RETURN);
for (const auto &[Trace, Info] : Traces) {
bool IsReturn = checkReturn(Trace.Branch);
// Ignore returns.
Expand Down
23 changes: 23 additions & 0 deletions bolt/test/X86/callcont-fallthru.s
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@
# RUN: link_fdata %s %t %t.pa-ret PREAGG-RET
# Trace from an external location to a landing pad/entry point call continuation
# RUN: link_fdata %s %t %t.pa-ext PREAGG-EXT
# Return trace to a landing pad/entry point call continuation
# RUN: link_fdata %s %t %t.pa-pret PREAGG-PRET
# External return to a landing pad/entry point call continuation
# RUN: link_fdata %s %t %t.pa-eret PREAGG-ERET
# RUN-DISABLED: link_fdata %s %t %t.pa-plt PREAGG-PLT

# RUN: llvm-strip --strip-unneeded %t -o %t.strip
Expand Down Expand Up @@ -38,6 +42,21 @@
# RUN: llvm-bolt %t.strip --pa -p %t.pa-ext -o %t.out \
# RUN: --print-cfg --print-only=main | FileCheck %s --check-prefix=CHECK-SKIP

## Check pre-aggregated return traces from external location attach call
## continuation fallthrough count to secondary entry point (unstripped)
# RUN: llvm-bolt %t --pa -p %t.pa-pret -o %t.out \
# RUN: --print-cfg --print-only=main | FileCheck %s --check-prefix=CHECK-ATTACH
## Check pre-aggregated return traces from external location attach call
## continuation fallthrough count to landing pad (stripped, landing pad)
# RUN: llvm-bolt %t.strip --pa -p %t.pa-pret -o %t.out \
# RUN: --print-cfg --print-only=main | FileCheck %s --check-prefix=CHECK-ATTACH

## Same for external return type
# RUN: llvm-bolt %t --pa -p %t.pa-eret -o %t.out \
# RUN: --print-cfg --print-only=main | FileCheck %s --check-prefix=CHECK-ATTACH
# RUN: llvm-bolt %t.strip --pa -p %t.pa-eret -o %t.out \
# RUN: --print-cfg --print-only=main | FileCheck %s --check-prefix=CHECK-ATTACH

## Check pre-aggregated traces don't report zero-sized PLT fall-through as
## invalid trace
# RUN-DISABLED: llvm-bolt %t.strip --pa -p %t.pa-plt -o %t.out | FileCheck %s \
Expand Down Expand Up @@ -92,6 +111,10 @@ Ltmp4_br:
# PREAGG-RET: T #Lfoo_ret# #Ltmp3# #Ltmp3_br# 1
## Target is a secondary entry point (unstripped) or a landing pad (stripped)
# PREAGG-EXT: T X:0 #Ltmp3# #Ltmp3_br# 1
## Pre-aggregated return trace
# PREAGG-PRET: R X:0 #Ltmp3# #Ltmp3_br# 1
## External return
# PREAGG-ERET: r #Ltmp3# #Ltmp3_br# 1

# CHECK-ATTACH: callq foo
# CHECK-ATTACH-NEXT: count: 1
Expand Down
4 changes: 2 additions & 2 deletions bolt/test/link_fdata.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,9 @@
fdata_pat = re.compile(r"([01].*) (?P<mispred>\d+) (?P<exec>\d+)")

# Pre-aggregated profile:
# {T|S|E|B|F|f} <start> [<end>] [<ft_end>] <count> [<mispred_count>]
# {T|R|S|E|B|F|f|r} <start> [<end>] [<ft_end>] <count> [<mispred_count>]
# <loc>: [<id>:]<offset>
preagg_pat = re.compile(r"(?P<type>[TSBFf]) (?P<offsets_count>.*)")
preagg_pat = re.compile(r"(?P<type>[TRSBFfr]) (?P<offsets_count>.*)")

# No-LBR profile:
# <is symbol?> <closest elf symbol or DSO name> <relative address> <count>
Expand Down
Loading