Skip to content

Commit 4dd4b5b

Browse files
authored
Merge pull request #1255 from undingen/bjit_opt7
rewriter+bjit: remove duplicate getattrs, misc codegen improvements
2 parents 5edb898 + a93836f commit 4dd4b5b

File tree

4 files changed

+129
-74
lines changed

4 files changed

+129
-74
lines changed

src/asm_writing/rewriter.cpp

Lines changed: 93 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -416,6 +416,20 @@ void Rewriter::_addAttrGuard(RewriterVar* var, int offset, RewriterVar* val_cons
416416
RewriterVar* RewriterVar::getAttr(int offset, Location dest, assembler::MovType type) {
417417
STAT_TIMER(t0, "us_timer_rewriter", 10);
418418

419+
// if no changing action happened we can reuse get attributes
420+
if (!rewriter->added_changing_action) {
421+
RewriterVar*& result = getattrs[std::make_pair(offset, (int)type)];
422+
if (result) {
423+
if (dest != Location::any())
424+
result->getInReg(dest, true /* allow_constant_in_reg */);
425+
} else {
426+
result = rewriter->createNewVar();
427+
rewriter->addAction([=]() { rewriter->_getAttr(result, this, offset, dest, type); }, { this },
428+
ActionType::NORMAL);
429+
}
430+
return result;
431+
}
432+
419433
RewriterVar* result = rewriter->createNewVar();
420434
rewriter->addAction([=]() { rewriter->_getAttr(result, this, offset, dest, type); }, { this }, ActionType::NORMAL);
421435
return result;
@@ -510,17 +524,11 @@ void RewriterVar::incref() {
510524
}
511525

512526
void RewriterVar::decref() {
513-
rewriter->addAction([=]() {
514-
rewriter->_decref(this);
515-
this->bumpUse();
516-
}, { this }, ActionType::MUTATION);
527+
rewriter->addAction([=]() { rewriter->_decref(this, { this }); }, { this }, ActionType::MUTATION);
517528
}
518529

519530
void RewriterVar::xdecref() {
520-
rewriter->addAction([=]() {
521-
rewriter->_xdecref(this);
522-
this->bumpUse();
523-
}, { this }, ActionType::MUTATION);
531+
rewriter->addAction([=]() { rewriter->_xdecref(this, { this }); }, { this }, ActionType::MUTATION);
524532
}
525533

526534
void Rewriter::_incref(RewriterVar* var, int num_refs) {
@@ -565,22 +573,21 @@ void Rewriter::_incref(RewriterVar* var, int num_refs) {
565573
// (ie the caller should call bumpUse)
566574
}
567575

568-
void Rewriter::_decref(RewriterVar* var) {
576+
void Rewriter::_decref(RewriterVar* var, llvm::ArrayRef<RewriterVar*> vars_to_bump) {
569577
assert(!var->nullable);
570578
// assembler->trap();
571579

572-
// this->_call(NULL, true, false /* can't throw */, (void*)Helper::decref, { var });
580+
// this->_call(NULL, true, false /* can't throw */, (void*)Helper::decref, { var }, {}, vars_to_bump);
573581

574582
#ifdef Py_REF_DEBUG
575583
// assembler->trap();
576584
assembler->decq(assembler::Immediate(&_Py_RefTotal));
577585
#endif
578-
_setupCall(true, { var }, {}, assembler::RAX);
586+
_setupCall(true, { var }, {}, assembler::RAX, vars_to_bump);
579587

580588

581589
#ifdef Py_REF_DEBUG
582-
assembler->mov(assembler::Immediate((void*)assertAlive), assembler::R11);
583-
assembler->callq(assembler::R11);
590+
_callOptimalEncoding(assembler::R11, (void*)assertAlive);
584591
assembler->mov(assembler::RAX, assembler::RDI);
585592
#endif
586593
// _setupCall doesn't remember that it added the arg regs to the location set
@@ -591,8 +598,7 @@ void Rewriter::_decref(RewriterVar* var) {
591598
{
592599
assembler::ForwardJump jnz(*assembler, assembler::COND_NOT_ZERO);
593600
#ifdef Py_TRACE_REFS
594-
assembler->mov(assembler::Immediate((void*)_Py_Dealloc), assembler::R11);
595-
assembler->callq(assembler::R11);
601+
_callOptimalEncoding(assembler::R11, (void*)_Py_Dealloc);
596602
#else
597603
assembler->movq(assembler::Indirect(reg, offsetof(Box, cls)), assembler::RAX);
598604
assembler->callq(assembler::Indirect(assembler::RAX, offsetof(BoxedClass, tp_dealloc)));
@@ -603,13 +609,16 @@ void Rewriter::_decref(RewriterVar* var) {
603609

604610
// Doesn't call bumpUse, since this function is designed to be callable from other emitting functions.
605611
// (ie the caller should call bumpUse)
612+
for (auto&& use : vars_to_bump) {
613+
use->bumpUseLateIfNecessary();
614+
}
606615
}
607616

608-
void Rewriter::_xdecref(RewriterVar* var) {
617+
void Rewriter::_xdecref(RewriterVar* var, llvm::ArrayRef<RewriterVar*> vars_to_bump) {
609618
assert(var->nullable);
610619
// assembler->trap();
611620

612-
this->_call(NULL, true, false /* can't throw */, (void*)Helper::xdecref, { var });
621+
this->_call(NULL, true, false /* can't throw */, (void*)Helper::xdecref, { var }, {}, vars_to_bump);
613622

614623
// Doesn't call bumpUse, since this function is designed to be callable from other emitting functions.
615624
// (ie the caller should call bumpUse)
@@ -716,27 +725,39 @@ void Rewriter::_setAttr(RewriterVar* ptr, int offset, RewriterVar* val) {
716725
if (LOG_IC_ASSEMBLY)
717726
assembler->comment("_setAttr");
718727

719-
assembler::Register ptr_reg = ptr->getInReg();
728+
if (ptr->isScratchAllocation()) {
729+
auto dest_loc = indirectFor(ptr->getScratchLocation(offset));
730+
bool is_immediate;
731+
assembler::Immediate imm = val->tryGetAsImmediate(&is_immediate);
732+
if (is_immediate) {
733+
assembler->movq(imm, dest_loc);
734+
} else {
735+
assembler::Register val_reg = val->getInReg(Location::any(), false);
736+
assembler->mov(val_reg, dest_loc);
737+
}
738+
} else {
739+
assembler::Register ptr_reg = ptr->getInReg();
720740

721-
bool is_immediate;
722-
assembler::Immediate imm = val->tryGetAsImmediate(&is_immediate);
741+
bool is_immediate;
742+
assembler::Immediate imm = val->tryGetAsImmediate(&is_immediate);
723743

724-
if (is_immediate) {
725-
assembler->movq(imm, assembler::Indirect(ptr_reg, offset));
726-
} else {
727-
assembler::Register val_reg = val->getInReg(Location::any(), false, /* otherThan */ ptr_reg);
728-
assert(ptr_reg != val_reg);
744+
if (is_immediate) {
745+
assembler->movq(imm, assembler::Indirect(ptr_reg, offset));
746+
} else {
747+
assembler::Register val_reg = val->getInReg(Location::any(), false, /* otherThan */ ptr_reg);
748+
assert(ptr_reg != val_reg);
729749

730-
assembler->mov(val_reg, assembler::Indirect(ptr_reg, offset));
750+
assembler->mov(val_reg, assembler::Indirect(ptr_reg, offset));
751+
}
731752
}
732753

733754
ptr->bumpUse();
734755

735756
// If the value is a scratch allocated memory array we have to make sure we won't release it immediately.
736757
// Because this setAttr stored a reference to it inside a field and the rewriter can't currently track this uses and
737758
// will think it's unused.
738-
if (val->hasScratchAllocation())
739-
val->resetHasScratchAllocation();
759+
if (val->isScratchAllocation())
760+
val->resetIsScratchAllocation();
740761
val->bumpUse();
741762

742763
assertConsistent();
@@ -782,6 +803,14 @@ assembler::Register RewriterVar::getInReg(Location dest, bool allow_constant_in_
782803
return reg;
783804
}
784805

806+
if (locations.size() == 0 && isScratchAllocation()) {
807+
assembler::Register reg = rewriter->allocReg(dest, otherThan);
808+
auto addr = rewriter->indirectFor(getScratchLocation());
809+
rewriter->assembler->lea(addr, reg);
810+
rewriter->addLocationToVar(this, reg);
811+
return reg;
812+
}
813+
785814
assert(locations.size());
786815

787816
// Not sure if this is worth it,
@@ -1007,7 +1036,7 @@ void Rewriter::_setupCall(bool has_side_effects, llvm::ArrayRef<RewriterVar*> ar
10071036
uintptr_t counter_addr = (uintptr_t)(&picked_slot->num_inside);
10081037
if (isLargeConstant(counter_addr)) {
10091038
assembler::Register reg = allocReg(Location::any(), preserve);
1010-
assembler->mov(assembler::Immediate(counter_addr), reg);
1039+
const_loader.loadConstIntoReg(counter_addr, reg);
10111040
assembler->incl(assembler::Indirect(reg, 0));
10121041
} else {
10131042
assembler->incl(assembler::Immediate(counter_addr));
@@ -1137,6 +1166,20 @@ void Rewriter::_setupCall(bool has_side_effects, llvm::ArrayRef<RewriterVar*> ar
11371166
#endif
11381167
}
11391168

1169+
void Rewriter::_callOptimalEncoding(assembler::Register tmp_reg, void* func_addr) {
1170+
assert(vars_by_location.count(tmp_reg) == 0);
1171+
uint64_t asm_address = (uint64_t)assembler->curInstPointer() + 5;
1172+
uint64_t real_asm_address = asm_address + (uint64_t)rewrite->getSlotStart() - (uint64_t)assembler->startAddr();
1173+
int64_t offset = (int64_t)((uint64_t)func_addr - real_asm_address);
1174+
if (isLargeConstant(offset)) {
1175+
const_loader.loadConstIntoReg((uint64_t)func_addr, tmp_reg);
1176+
assembler->callq(tmp_reg);
1177+
} else {
1178+
assembler->call(assembler::Immediate(offset));
1179+
assert(assembler->hasFailed() || asm_address == (uint64_t)assembler->curInstPointer());
1180+
}
1181+
}
1182+
11401183
void Rewriter::_call(RewriterVar* result, bool has_side_effects, bool can_throw, void* func_addr,
11411184
llvm::ArrayRef<RewriterVar*> args, llvm::ArrayRef<RewriterVar*> args_xmm,
11421185
llvm::ArrayRef<RewriterVar*> vars_to_bump) {
@@ -1152,19 +1195,7 @@ void Rewriter::_call(RewriterVar* result, bool has_side_effects, bool can_throw,
11521195

11531196
assertConsistent();
11541197

1155-
// make sure setupCall doesn't use R11
1156-
assert(vars_by_location.count(assembler::R11) == 0);
1157-
1158-
uint64_t asm_address = (uint64_t)assembler->curInstPointer() + 5;
1159-
uint64_t real_asm_address = asm_address + (uint64_t)rewrite->getSlotStart() - (uint64_t)assembler->startAddr();
1160-
int64_t offset = (int64_t)((uint64_t)func_addr - real_asm_address);
1161-
if (isLargeConstant(offset)) {
1162-
const_loader.loadConstIntoReg((uint64_t)func_addr, r);
1163-
assembler->callq(r);
1164-
} else {
1165-
assembler->call(assembler::Immediate(offset));
1166-
assert(assembler->hasFailed() || asm_address == (uint64_t)assembler->curInstPointer());
1167-
}
1198+
_callOptimalEncoding(r, func_addr);
11681199

11691200
if (can_throw)
11701201
registerDecrefInfoHere();
@@ -1220,12 +1251,16 @@ std::vector<Location> Rewriter::getDecrefLocations() {
12201251
// If you forget to call deregisterOwnedAttr(), and then later do something that needs to emit decref info,
12211252
// we will try to emit the info for that owned attr even though the rewriter has decided that it doesn't need
12221253
// to keep it alive any more.
1223-
ASSERT(var->locations.size() > 0,
1254+
ASSERT(var->locations.size() > 0 || var->isScratchAllocation(),
12241255
"owned variable not accessible any more -- maybe forgot to call deregisterOwnedAttr?");
1225-
ASSERT(var->locations.size() == 1, "this code only looks at one location");
1226-
Location l = *var->locations.begin();
1227-
1228-
assert(l.type == Location::Scratch || l.type == Location::Stack);
1256+
ASSERT(var->locations.size() == 1 || var->isScratchAllocation(), "this code only looks at one location");
1257+
Location l;
1258+
if (var->locations.size()) {
1259+
l = *var->locations.begin();
1260+
assert(l.type == Location::Scratch || l.type == Location::Stack);
1261+
} else {
1262+
l = var->getScratchLocation();
1263+
}
12291264

12301265
int offset1 = indirectFor(l).offset;
12311266
int offset2 = p.second;
@@ -1287,13 +1322,13 @@ void RewriterVar::_release() {
12871322
}
12881323

12891324
// releases allocated scratch space
1290-
if (hasScratchAllocation()) {
1325+
if (isScratchAllocation()) {
12911326
for (int i = 0; i < scratch_allocation.second; ++i) {
1292-
Location l = Location(Location::Scratch, (scratch_allocation.first + i) * 8);
1327+
Location l = getScratchLocation(i * 8);
12931328
assert(rewriter->vars_by_location[l] == LOCATION_PLACEHOLDER);
12941329
rewriter->vars_by_location.erase(l);
12951330
}
1296-
resetHasScratchAllocation();
1331+
resetIsScratchAllocation();
12971332
}
12981333

12991334
this->locations.clear();
@@ -1344,6 +1379,11 @@ void RewriterVar::deregisterOwnedAttr(int byte_offset) {
13441379
}, { this }, ActionType::NORMAL);
13451380
}
13461381

1382+
Location RewriterVar::getScratchLocation(int additional_offset_in_bytes) {
1383+
assert(isScratchAllocation());
1384+
return Location(Location::Scratch, scratch_allocation.first * sizeof(void*) + additional_offset_in_bytes);
1385+
}
1386+
13471387
void RewriterVar::bumpUse() {
13481388
rewriter->assertPhaseEmitting();
13491389

@@ -1500,7 +1540,7 @@ void Rewriter::commit() {
15001540
uintptr_t counter_addr = (uintptr_t)(&picked_slot->num_inside);
15011541
if (isLargeConstant(counter_addr)) {
15021542
assembler::Register reg = allocReg(Location::any(), getReturnDestination());
1503-
assembler->mov(assembler::Immediate(counter_addr), reg);
1543+
const_loader.loadConstIntoReg(counter_addr, reg);
15041544
assembler->decl(assembler::Indirect(reg, 0));
15051545
} else {
15061546
assembler->decl(assembler::Immediate(counter_addr));
@@ -1794,12 +1834,6 @@ int Rewriter::_allocate(RewriterVar* result, int n) {
17941834
assert(result->scratch_allocation == std::make_pair(0, 0));
17951835
result->scratch_allocation = std::make_pair(a, n);
17961836

1797-
assembler::Register r = result->initializeInReg();
1798-
1799-
// TODO we could do something like we do for constants and only load
1800-
// this when necessary, so it won't spill. Is that worth?
1801-
assembler->lea(assembler::Indirect(assembler::RSP, 8 * a + rewrite->getScratchRspOffset()), r);
1802-
18031837
assertConsistent();
18041838
result->releaseIfNoUses();
18051839
return a;
@@ -1909,8 +1943,7 @@ void Rewriter::_checkAndThrowCAPIException(RewriterVar* r, int64_t exc_val, asse
19091943
_setupCall(false, {});
19101944
{
19111945
assembler::ForwardJump jnz(*assembler, assembler::COND_NOT_ZERO);
1912-
assembler->mov(assembler::Immediate((void*)throwCAPIException), assembler::R11);
1913-
assembler->callq(assembler::R11);
1946+
_callOptimalEncoding(assembler::R11, (void*)throwCAPIException);
19141947

19151948
registerDecrefInfoHere();
19161949
}
@@ -1937,7 +1970,7 @@ void Rewriter::spillRegister(assembler::Register reg, Location preserve) {
19371970

19381971
// There may be no need to spill if the var is held in a different location already.
19391972
// There is no need to spill if it is a constant
1940-
if (var->locations.size() > 1 || var->is_constant) {
1973+
if (var->locations.size() > 1 || var->is_constant || var->isScratchAllocation()) {
19411974
removeLocationFromVar(var, reg);
19421975
return;
19431976
}

src/asm_writing/rewriter.h

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -202,7 +202,8 @@ class RewriterVar {
202202

203203
template <typename Src, typename Dst> inline RewriterVar* getAttrCast(int offset, Location loc = Location::any());
204204

205-
bool isConstant() { return is_constant; }
205+
bool isConstant() const { return is_constant; }
206+
bool isContantNull() const { return isConstant() && constant_value == 0; }
206207

207208
protected:
208209
void incref();
@@ -236,11 +237,11 @@ class RewriterVar {
236237
// /* some code */
237238
// bumpUseLateIfNecessary();
238239
void bumpUseEarlyIfPossible() {
239-
if (reftype != RefType::OWNED && !hasScratchAllocation())
240+
if (reftype != RefType::OWNED && !isScratchAllocation())
240241
bumpUse();
241242
}
242243
void bumpUseLateIfNecessary() {
243-
if (reftype == RefType::OWNED || hasScratchAllocation())
244+
if (reftype == RefType::OWNED || isScratchAllocation())
244245
bumpUse();
245246
}
246247

@@ -251,8 +252,9 @@ class RewriterVar {
251252
// Don't call it directly: call bumpUse and releaseIfNoUses instead.
252253
void _release();
253254
bool isDoneUsing() { return next_use == uses.size(); }
254-
bool hasScratchAllocation() const { return scratch_allocation.second > 0; }
255-
void resetHasScratchAllocation() { scratch_allocation = std::make_pair(0, 0); }
255+
bool isScratchAllocation() const { return scratch_allocation.second > 0; }
256+
void resetIsScratchAllocation() { scratch_allocation = std::make_pair(0, 0); }
257+
Location getScratchLocation(int additional_offset_in_bytes = 0);
256258
bool needsDecref(int current_action_index);
257259

258260
// Indicates if this variable is an arg, and if so, what location the arg is from.
@@ -263,7 +265,8 @@ class RewriterVar {
263265
Location arg_loc;
264266
std::pair<int /*offset*/, int /*size*/> scratch_allocation;
265267

266-
llvm::SmallSet<std::tuple<int, uint64_t, bool>, 4> attr_guards; // used to detect duplicate guards
268+
llvm::SmallSet<std::tuple<int, uint64_t, bool>, 4> attr_guards; // used to detect duplicate guards
269+
llvm::SmallDenseMap<std::pair<int, int>, RewriterVar*> getattrs; // used to detect duplicate getAttrs
267270

268271
// Gets a copy of this variable in a register, spilling/reloading if necessary.
269272
// TODO have to be careful with the result since the interface doesn't guarantee
@@ -565,8 +568,13 @@ class Rewriter : public ICSlotRewrite::CommitHook {
565568

566569
// These do not call bumpUse on their arguments:
567570
void _incref(RewriterVar* var, int num_refs = 1);
568-
void _decref(RewriterVar* var);
569-
void _xdecref(RewriterVar* var);
571+
void _decref(RewriterVar* var, llvm::ArrayRef<RewriterVar*> vars_to_bump = {});
572+
void _xdecref(RewriterVar* var, llvm::ArrayRef<RewriterVar*> vars_to_bump = {});
573+
574+
// emits a call instruction using the smallest encoding.
575+
// either doing a relative call if or otherwise loading the address into the supplied register and calling it.
576+
// the caller of this function must make sure that the supplied register can be safely overwriten.
577+
void _callOptimalEncoding(assembler::Register tmp_reg, void* func_addr);
570578

571579
void assertConsistent() {
572580
#ifndef NDEBUG

0 commit comments

Comments
 (0)