Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion src/cpu/minor/BaseMinorCPU.py
Original file line number Diff line number Diff line change
Expand Up @@ -426,7 +426,10 @@ def support_take_over(cls):
)

branchPred = Param.BranchPredictor(
TournamentBP(numThreads=Parent.numThreads), "Branch Predictor"
BranchPredictor(
conditionalBranchPred=TournamentBP(numThreads=Parent.numThreads)
),
"Branch Predictor",
)

def addCheckerCpu(self):
Expand Down
2 changes: 1 addition & 1 deletion src/cpu/minor/fetch2.cc
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ Fetch2::predictBranch(MinorDynInstPtr inst, BranchData &branch)
DPRINTF(Branch, "Trying to predict for inst: %s\n", *inst);

if (branchPredictor.predict(inst->staticInst,
inst->id.fetchSeqNum, *inst_pc, inst->id.threadId)) {
inst->id.fetchSeqNum, *inst_pc, inst->id.threadId).taken) {
set(branch.target, *inst_pc);
inst->predictedTaken = true;
set(inst->predictedTarget, inst_pc);
Expand Down
5 changes: 4 additions & 1 deletion src/cpu/o3/BaseO3CPU.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,10 @@ def support_take_over(cls):
smtCommitPolicy = Param.CommitPolicy("RoundRobin", "SMT Commit Policy")

branchPred = Param.BranchPredictor(
TournamentBP(numThreads=Parent.numThreads), "Branch Predictor"
BranchPredictor(
conditionalBranchPred=TournamentBP(numThreads=Parent.numThreads)
),
"Branch Predictor",
)
needsTSO = Param.Bool(False, "Enable TSO Memory model")

Expand Down
53 changes: 44 additions & 9 deletions src/cpu/o3/bac.cc
Original file line number Diff line number Diff line change
Expand Up @@ -75,14 +75,15 @@ BAC::BAC(CPU *_cpu, const BaseO3CPUParams &params)
fetchTargetWidth(params.fetchTargetWidth),
minInstSize(params.minInstSize),
numThreads(params.numThreads),
stats(_cpu,this)
stats(_cpu, this)
{
fatal_if(decoupledFrontEnd && (fetchTargetWidth < params.fetchBufferSize),
"Fetch target width should be larger than fetch buffer size!");

for (int i = 0; i < MaxThreads; i++) {
bacPC[i].reset(params.isa[0]->newPCState());
stalls[i] = {false, false, false};
branchPredictRemaining[i] = Cycles(0);
}

assert(bpu!=nullptr);
Expand Down Expand Up @@ -401,12 +402,24 @@ BAC::checkSignalsAndUpdate(ThreadID tid)
return true;
}

if (branchPredictRemaining[tid] > Cycles(0)) {
--branchPredictRemaining[tid];
DPRINTF(BAC,
"[global] Stalling for Branch Predictor for %i more cycles.\n",
branchPredictRemaining[tid]
);
stalls[tid].bpu = true;
} else {
stalls[tid].bpu = false;
}

if (checkStall(tid)) {
// return block(tid);
bacStatus[tid] = Blocked;
return false;
}


// If at this point the FTQ is still invalid we need to wait for
// A resteer/squash signal.
if (!ftq->isValid(tid) && bacStatus[tid] != Idle) {
Expand Down Expand Up @@ -446,6 +459,15 @@ BAC::checkSignalsAndUpdate(ThreadID tid)
return true;
}

if (ftq->isFull(tid)) {
// If the FTQ is full, we need to block the BAC.
if (bacStatus[tid] != FTQFull) {
DPRINTF(BAC, "[tid:%i] FTQ is full. Blocking BAC.\n", tid);
bacStatus[tid] = FTQFull;
}
return true;
}

// Now all stall/squash conditions are checked.
// Attempt to run the BAC if not already running.
if (ftq->isValid(tid) &&
Expand All @@ -469,7 +491,7 @@ BAC::squashBpuHistories(ThreadID tid)
{
if (!decoupledFrontEnd) return;

DPRINTF(BAC, "%s(tid:%i): FTQ sz: %i\n", tid, __func__, ftq->size(tid));
DPRINTF(BAC, "%s(tid:%i): FTQ sz: %i\n", __func__, tid, ftq->size(tid));

unsigned n_fts = ftq->size(tid);
if (n_fts == 0) return;
Expand Down Expand Up @@ -577,14 +599,16 @@ BAC::newFetchTarget(ThreadID tid, const PCStateBase &start_pc)
return ft;
}

bool
Prediction
BAC::predict(ThreadID tid, const StaticInstPtr &inst,
const FetchTargetPtr &ft, PCStateBase &pc)
{

/** Perform the prediction. */
BPredUnit::PredictorHistory* bpu_history = nullptr;
bool taken = bpu->predict(inst, ft->ftNum(), pc, tid, bpu_history);
Prediction pred = bpu->predict(
inst, ft->ftNum(), pc, tid, bpu_history
);

/** Push the prediction history to the fetch target.
* The postFetch() function will move the history from the FTQ to the
Expand All @@ -593,7 +617,7 @@ BAC::predict(ThreadID tid, const StaticInstPtr &inst,
ft->bpu_history = static_cast<void*>(bpu_history);

DPRINTF(Branch,"[tid:%i, ftn:%llu] History added.\n", tid, ft->ftNum());
return taken;
return pred;
}


Expand Down Expand Up @@ -678,7 +702,9 @@ BAC::generateFetchTargets(ThreadID tid, bool &status_change)

// Now make the actual prediction. Note the BPU will advance
// the PC to the next instruction.
predict_taken = predict(tid, staticInst, curFT, *next_pc);
Prediction pred = predict(tid, staticInst, curFT, *next_pc);
predict_taken = pred.taken;
branchPredictRemaining[tid] = Cycles(pred.latency);

DPRINTF(BAC, "[tid:%i, ftn:%llu] Branch found at PC %#x "
"taken?:%i, target:%#x\n",
Expand Down Expand Up @@ -869,7 +895,7 @@ BAC::updatePreDecode(ThreadID tid, const InstSeqNum seqNum,
hist = new BPredUnit::PredictorHistory(tid, seqNum,
pc.instAddr(), inst);
bpu->branchPlaceholder(tid, pc.instAddr(), inst->isUncondCtrl(),
hist->bpHistory);
hist);

hist->predTaken = hist->condPred = false;
hist->targetProvider = BPredUnit::TargetProvider::NoTarget;
Expand Down Expand Up @@ -926,8 +952,12 @@ BAC::updatePC(const DynInstPtr &inst,
} else {
// With a coupled front-end we need to make the branch prediction
// here.
predict_taken = bpu->predict(inst->staticInst, inst->seqNum,
fetch_pc, tid);
//
// Latency is ignored in coupled mode
Prediction pred = bpu->predict(
inst->staticInst, inst->seqNum, fetch_pc, tid
);
predict_taken = pred.taken;
}

DPRINTF(BAC, "[tid:%i] [sn:%llu] Branch at PC %#x "
Expand Down Expand Up @@ -989,6 +1019,9 @@ BAC::profileCycle(ThreadID tid)
case Squashing:
stats.squashCycles++;
break;
case Blocked:
stats.blockedCycles++;
break;
case FTQFull:
stats.ftqFullCycles++;
break;
Expand All @@ -1008,6 +1041,8 @@ BAC::BACStats::BACStats(o3::CPU *cpu, BAC *bac)
"Number of cycles BAC is running"),
ADD_STAT(squashCycles, statistics::units::Cycle::get(),
"Number of cycles BAC is squashing"),
ADD_STAT(blockedCycles, statistics::units::Cycle::get(),
"Number of cycles BAC is blocked"),
ADD_STAT(ftqFullCycles, statistics::units::Cycle::get(),
"Number of cycles BAC has spent waiting for FTQ to become free"),

Expand Down
11 changes: 9 additions & 2 deletions src/cpu/o3/bac.hh
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ namespace gem5
{

struct BaseO3CPUParams;
typedef branch_prediction::Prediction Prediction;

namespace o3
{
Expand Down Expand Up @@ -93,6 +94,7 @@ typedef std::shared_ptr<FetchTarget> FetchTargetPtr;
class BAC
{
typedef branch_prediction::BranchType BranchType;
typedef branch_prediction::BPredUnit BPredUnit;

public:
/** Overall decoupled BPU stage status. Used to determine if the CPU can
Expand Down Expand Up @@ -245,9 +247,9 @@ class BAC
* @param inst The branch instruction.
* @param ft The fetch target that is currently processed.
* @param PC The predicted PC is passed back through this parameter.
* @return Returns if the branch is taken or not.
* @return Returns the prediction result from the BPU.
*/
bool predict(ThreadID tid, const StaticInstPtr &inst,
Prediction predict(ThreadID tid, const StaticInstPtr &inst,
const FetchTargetPtr &ft, PCStateBase &pc);


Expand Down Expand Up @@ -373,6 +375,9 @@ class BAC
*/
bool wroteToTimeBuffer;

/** Tracks remaining cycles that the branch predictor stalls BAC */
Cycles branchPredictRemaining[MaxThreads];

/** Source of possible stalls. */
struct Stalls
{
Expand Down Expand Up @@ -428,6 +433,8 @@ class BAC
statistics::Scalar runCycles;
/** Stat for total number of squashing cycles. */
statistics::Scalar squashCycles;
/** Stat for total number of blocked cycles. */
statistics::Scalar blockedCycles;
/** Stat for total number of cycles the FTQ was full. */
statistics::Scalar ftqFullCycles;

Expand Down
1 change: 1 addition & 0 deletions src/cpu/o3/ftq.cc
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,7 @@ FTQ::forAllBackward(ThreadID tid, std::function<void(FetchTargetPtr&)> f)
void
FTQ::insert(ThreadID tid, FetchTargetPtr fetchTarget)
{
assert(ftq[tid].size() < numEntries);
ftq[tid].push_back(fetchTarget);
ppFTQInsert->notify(fetchTarget);
stats.inserts++;
Expand Down
5 changes: 5 additions & 0 deletions src/cpu/o3/lsq_unit.cc
Original file line number Diff line number Diff line change
Expand Up @@ -601,6 +601,11 @@ LSQUnit::executeLoad(const DynInstPtr &inst)

assert(!inst->isSquashed());

if (inst->isExecuted()) {
DPRINTF(LSQUnit, "Load [sn:%lli] already executed\n", inst->seqNum);
return NoFault;
}

load_fault = inst->initiateAcc();

if (load_fault == NoFault && !inst->readMemAccPredicate()) {
Expand Down
12 changes: 9 additions & 3 deletions src/cpu/pred/2bit_local.cc
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ namespace branch_prediction
{

LocalBP::LocalBP(const LocalBPParams &params)
: BPredUnit(params),
: ConditionalPredictor(params),
localPredictorSize(params.localPredictorSize),
localCtrBits(params.localCtrBits),
localPredictorSets(localPredictorSize / localCtrBits),
Expand All @@ -78,6 +78,12 @@ LocalBP::LocalBP(const LocalBPParams &params)
instShiftAmt);
}

void LocalBP::branchPlaceholder(ThreadID tid, Addr pc,
bool uncond, void * &bpHistory)
{
// Placeholder for a function that only returns history items
}

void
LocalBP::updateHistories(ThreadID tid, Addr pc, bool uncond, bool taken,
Addr target, const StaticInstPtr &inst,
Expand All @@ -87,7 +93,7 @@ LocalBP::updateHistories(ThreadID tid, Addr pc, bool uncond, bool taken,
}


bool
Prediction
LocalBP::lookup(ThreadID tid, Addr branch_addr, void * &bp_history)
{
bool taken;
Expand All @@ -103,7 +109,7 @@ LocalBP::lookup(ThreadID tid, Addr branch_addr, void * &bp_history)

taken = getPrediction(counter_val);

return taken;
return staticPrediction(taken);
}

void
Expand Down
10 changes: 7 additions & 3 deletions src/cpu/pred/2bit_local.hh
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,8 @@

#include "base/sat_counter.hh"
#include "base/types.hh"
#include "cpu/pred/bpred_unit.hh"
#include "cpu/pred/branch_type.hh"
#include "cpu/pred/conditional.hh"
#include "params/LocalBP.hh"

namespace gem5
Expand All @@ -62,7 +63,7 @@ namespace branch_prediction
* predictor state that needs to be recorded or updated; the update can be
* determined solely by the branch being taken or not taken.
*/
class LocalBP : public BPredUnit
class LocalBP : public ConditionalPredictor
{
public:
/**
Expand All @@ -71,7 +72,10 @@ class LocalBP : public BPredUnit
LocalBP(const LocalBPParams &params);

// Overriding interface functions
bool lookup(ThreadID tid, Addr pc, void * &bp_history) override;
Prediction lookup(ThreadID tid, Addr pc, void * &bp_history) override;

void branchPlaceholder(ThreadID tid, Addr pc, bool uncond,
void * &bpHistory) override;

void updateHistories(ThreadID tid, Addr pc, bool uncond, bool taken,
Addr target, const StaticInstPtr &inst,
Expand Down
Loading