Skip to content

Commit e57923e

Browse files
aaupovJaddyen
authored andcommitted
[BOLT][NFCI] Simplify DataAggregator using traces (llvm#143289)
Consistently apply traces as defined in llvm#127125 for branch profile aggregation. This combines branches and fall-through records into one. With large input binaries/profiles, the speed up in aggregation time (`-time-aggr`, wall time): - perf.data, pre-BOLT input: 154.5528s -> 144.0767s - pre-aggregated data, pre-BOLT input: 15.1026s -> 9.0711s - pre-aggregated data, BOLTed input: 15.4871s -> 10.0077s Test Plan: NFC
1 parent 50d6263 commit e57923e

File tree

2 files changed

+104
-132
lines changed

2 files changed

+104
-132
lines changed

bolt/include/bolt/Profile/DataAggregator.h

Lines changed: 36 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -99,24 +99,28 @@ class DataAggregator : public DataReader {
9999
uint64_t Addr;
100100
};
101101

102+
/// Container for the unit of branch data.
103+
/// Backwards compatible with legacy use for branches and fall-throughs:
104+
/// - if \p Branch is FT_ONLY or FT_EXTERNAL_ORIGIN, the trace only
105+
/// contains fall-through data,
106+
/// - if \p To is BR_ONLY, the trace only contains branch data.
102107
struct Trace {
108+
static constexpr const uint64_t EXTERNAL = 0ULL;
109+
static constexpr const uint64_t BR_ONLY = -1ULL;
110+
static constexpr const uint64_t FT_ONLY = -1ULL;
111+
static constexpr const uint64_t FT_EXTERNAL_ORIGIN = -2ULL;
112+
113+
uint64_t Branch;
103114
uint64_t From;
104115
uint64_t To;
105-
Trace(uint64_t From, uint64_t To) : From(From), To(To) {}
106-
bool operator==(const Trace &Other) const {
107-
return From == Other.From && To == Other.To;
108-
}
116+
auto tie() const { return std::tie(Branch, From, To); }
117+
bool operator==(const Trace &Other) const { return tie() == Other.tie(); }
118+
bool operator<(const Trace &Other) const { return tie() < Other.tie(); }
109119
};
120+
friend raw_ostream &operator<<(raw_ostream &OS, const Trace &);
110121

111122
struct TraceHash {
112-
size_t operator()(const Trace &L) const {
113-
return std::hash<uint64_t>()(L.From << 32 | L.To);
114-
}
115-
};
116-
117-
struct FTInfo {
118-
uint64_t InternCount{0};
119-
uint64_t ExternCount{0};
123+
size_t operator()(const Trace &L) const { return hash_combine(L.tie()); }
120124
};
121125

122126
struct TakenBranchInfo {
@@ -126,8 +130,8 @@ class DataAggregator : public DataReader {
126130

127131
/// Intermediate storage for profile data. We save the results of parsing
128132
/// and use them later for processing and assigning profile.
129-
std::unordered_map<Trace, TakenBranchInfo, TraceHash> BranchLBRs;
130-
std::unordered_map<Trace, FTInfo, TraceHash> FallthroughLBRs;
133+
std::unordered_map<Trace, TakenBranchInfo, TraceHash> TraceMap;
134+
std::vector<std::pair<Trace, TakenBranchInfo>> Traces;
131135
std::unordered_map<uint64_t, uint64_t> BasicSamples;
132136
std::vector<PerfMemSample> MemSamples;
133137

@@ -200,8 +204,8 @@ class DataAggregator : public DataReader {
200204
/// Return a vector of offsets corresponding to a trace in a function
201205
/// if the trace is valid, std::nullopt otherwise.
202206
std::optional<SmallVector<std::pair<uint64_t, uint64_t>, 16>>
203-
getFallthroughsInTrace(BinaryFunction &BF, const LBREntry &First,
204-
const LBREntry &Second, uint64_t Count = 1) const;
207+
getFallthroughsInTrace(BinaryFunction &BF, const Trace &Trace,
208+
uint64_t Count) const;
205209

206210
/// Record external entry into the function \p BF.
207211
///
@@ -265,8 +269,7 @@ class DataAggregator : public DataReader {
265269
bool doBranch(uint64_t From, uint64_t To, uint64_t Count, uint64_t Mispreds);
266270

267271
/// Register a trace between two LBR entries supplied in execution order.
268-
bool doTrace(const LBREntry &First, const LBREntry &Second,
269-
uint64_t Count = 1);
272+
bool doTrace(const Trace &Trace, uint64_t Count);
270273

271274
/// Parser helpers
272275
/// Return false if we exhausted our parser buffer and finished parsing
@@ -516,6 +519,21 @@ inline raw_ostream &operator<<(raw_ostream &OS,
516519
OS << formatv("{0:x} -> {1:x}/{2}", L.From, L.To, L.Mispred ? 'M' : 'P');
517520
return OS;
518521
}
522+
523+
inline raw_ostream &operator<<(raw_ostream &OS,
524+
const DataAggregator::Trace &T) {
525+
switch (T.Branch) {
526+
case DataAggregator::Trace::FT_ONLY:
527+
case DataAggregator::Trace::FT_EXTERNAL_ORIGIN:
528+
break;
529+
default:
530+
OS << Twine::utohexstr(T.Branch) << " -> ";
531+
}
532+
OS << Twine::utohexstr(T.From);
533+
if (T.To != DataAggregator::Trace::BR_ONLY)
534+
OS << " ... " << Twine::utohexstr(T.To);
535+
return OS;
536+
}
519537
} // namespace bolt
520538
} // namespace llvm
521539

0 commit comments

Comments
 (0)