Skip to content

Commit e75c707

Browse files
committed
-cpu/drcbearm64.cpp: More fixes and optimisations:
* Correctly identify valid immediate constants for add/sub/cmp (it was too conservative). * Don't unnecessarily copy UML register values kept in host registers for CMP. * Fixed detection of TST against immediate zero and optimised generated code. * Optimised TST against immediate with all bits set. -cpu/alto2: Follow the same pattern as the other things that have been altered to avoid problematic memsets in this device. -cpu/powerpc: Realigned some comments that had drifted.
1 parent 14d11c4 commit e75c707

File tree

4 files changed

+80
-66
lines changed

4 files changed

+80
-66
lines changed

src/devices/cpu/alto2/a2disp.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -499,7 +499,7 @@ void alto2_cpu_device::f2_late_evenfield()
499499
*/
500500
void alto2_cpu_device::init_disp()
501501
{
502-
m_dsp.clear();
502+
m_dsp = decltype(m_dsp)();
503503
save_item(NAME(m_dsp.state));
504504
save_item(NAME(m_dsp.hlc));
505505
save_item(NAME(m_dsp.setmode));

src/devices/cpu/alto2/a2disp.h

Lines changed: 18 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -193,35 +193,24 @@
193193
#ifndef MAME_CPU_ALTO2_A2DISP_H
194194
#define MAME_CPU_ALTO2_A2DISP_H
195195
struct {
196-
197-
void clear()
198-
{
199-
state = hlc = setmode = inverse = scanline = 0;
200-
halfclock = vblank = false;
201-
std::fill(std::begin(fifo), std::end(fifo), 0);
202-
wa = ra = a63 = a66 = 0;
203-
dht_blocks = dwt_blocks = curt_blocks = curt_wakeup = false;
204-
xpreg = csr = 0;
205-
}
206-
207-
uint32_t state; //!< current state of the display_state_machine()
208-
uint32_t hlc; //!< horizontal line counter
209-
uint32_t setmode; //!< value written by last SETMODE<-
210-
uint32_t inverse; //!< set to 0xffff if line is inverse, 0x0000 otherwise
211-
uint32_t scanline; //!< current scanline
212-
bool halfclock; //!< false for normal pixel clock, true for half pixel clock
213-
bool vblank; //!< true during vblank, false otherwise
214-
uint16_t fifo[A2_DISP_FIFO]; //!< display word fifo
215-
uint32_t wa; //!< fifo input pointer (write address; 4-bit)
216-
uint32_t ra; //!< fifo output pointer (read address; 4-bit)
217-
uint32_t a63; //!< most recent value read from the PROM a63
218-
uint32_t a66; //!< most recent value read from the PROM a66
219-
bool dht_blocks; //!< set true, if the DHT executed BLOCK
220-
bool dwt_blocks; //!< set true, if the DWT executed BLOCK
221-
bool curt_blocks; //!< set true, if the CURT executed BLOCK
222-
bool curt_wakeup; //!< set true, if CURT wakeups are generated
223-
uint32_t xpreg; //!< cursor cursor x position register (10-bit)
224-
uint32_t csr; //!< cursor shift register (16-bit)
196+
uint32_t state = 0; //!< current state of the display_state_machine()
197+
uint32_t hlc = 0; //!< horizontal line counter
198+
uint32_t setmode = 0; //!< value written by last SETMODE<-
199+
uint32_t inverse = 0; //!< set to 0xffff if line is inverse, 0x0000 otherwise
200+
uint32_t scanline = 0; //!< current scanline
201+
bool halfclock = false; //!< false for normal pixel clock, true for half pixel clock
202+
bool vblank = false; //!< true during vblank, false otherwise
203+
uint16_t fifo[A2_DISP_FIFO] = { }; //!< display word fifo
204+
uint32_t wa = 0; //!< fifo input pointer (write address; 4-bit)
205+
uint32_t ra = 0; //!< fifo output pointer (read address; 4-bit)
206+
uint32_t a63 = 0; //!< most recent value read from the PROM a63
207+
uint32_t a66 = 0; //!< most recent value read from the PROM a66
208+
bool dht_blocks = false; //!< set true, if the DHT executed BLOCK
209+
bool dwt_blocks = false; //!< set true, if the DWT executed BLOCK
210+
bool curt_blocks = false; //!< set true, if the CURT executed BLOCK
211+
bool curt_wakeup = false; //!< set true, if CURT wakeups are generated
212+
uint32_t xpreg = 0; //!< cursor cursor x position register (10-bit)
213+
uint32_t csr = 0; //!< cursor shift register (16-bit)
225214
std::unique_ptr<uint16_t[]> framebuf; //!< array of words of the raw bitmap that is displayed
226215
std::unique_ptr<uint8_t[]> patterns; //!< array of 65536 patterns (16 bytes) with 1 byte per pixel
227216
std::unique_ptr<bitmap_ind16> bitmap; //!< MAME bitmap with 16 bit indices

src/devices/cpu/drcbearm64.cpp

Lines changed: 48 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -251,6 +251,12 @@ inline bool is_valid_immediate(uint64_t val, size_t bits)
251251
return val < (uint64_t(1) << bits);
252252
}
253253

254+
constexpr bool is_valid_immediate_addsub(uint64_t val)
255+
{
256+
// 12-bit unsigned immediate value, optionally left-shifted by 12 bits
257+
return !(val & ~util::make_bitmask<uint64_t>(12)) || !(val & ~(util::make_bitmask<uint64_t>(12) << 12));
258+
}
259+
254260
inline constexpr bool is_valid_immediate_signed(int64_t val, size_t bits)
255261
{
256262
return util::sext(val, bits) == val;
@@ -752,18 +758,20 @@ a64::Vec drcbe_arm64::be_parameter::select_register(a64::Vec const &reg, uint32_
752758
{
753759
if (m_type == PTYPE_FLOAT_REGISTER)
754760
return get_register_float(regsize);
755-
if (regsize == 4)
761+
else if (regsize == 4)
756762
return reg.s();
757-
return reg.d();
763+
else
764+
return reg.d();
758765
}
759766

760767
a64::Gp drcbe_arm64::be_parameter::select_register(a64::Gp const &reg, uint32_t regsize) const
761768
{
762769
if (m_type == PTYPE_INT_REGISTER)
763770
return get_register_int(regsize);
764-
if (regsize == 4)
771+
else if (regsize == 4)
765772
return reg.w();
766-
return reg.x();
773+
else
774+
return reg.x();
767775
}
768776

769777
void drcbe_arm64::get_imm_relative(a64::Assembler &a, const a64::Gp &reg, const uint64_t val) const
@@ -3234,7 +3242,7 @@ template <a64::Inst::Id Opcode> void drcbe_arm64::op_add(a64::Assembler &a, cons
32343242
if (Opcode == a64::Inst::kIdAdcs)
32353243
load_carry(a);
32363244

3237-
if (src1p.is_immediate() && is_valid_immediate(src1p.immediate(), 11))
3245+
if (src1p.is_immediate() && is_valid_immediate_addsub(src1p.immediate()))
32383246
{
32393247
const a64::Gp src = src2p.select_register(TEMP_REG2, inst.size());
32403248

@@ -3245,7 +3253,7 @@ template <a64::Inst::Id Opcode> void drcbe_arm64::op_add(a64::Assembler &a, cons
32453253
a.emit(Opcode, output, src, src1p.immediate());
32463254
mov_param_reg(a, inst.size(), dstp, output);
32473255
}
3248-
else if (src2p.is_immediate() && is_valid_immediate(src2p.immediate(), 11))
3256+
else if (src2p.is_immediate() && is_valid_immediate_addsub(src2p.immediate()))
32493257
{
32503258
const a64::Gp src = src1p.select_register(TEMP_REG1, inst.size());
32513259

@@ -3285,7 +3293,7 @@ template <a64::Inst::Id Opcode> void drcbe_arm64::op_sub(a64::Assembler &a, cons
32853293

32863294
const a64::Gp output = dstp.select_register(TEMP_REG3, inst.size());
32873295

3288-
if (src2p.is_immediate() && is_valid_immediate(src2p.immediate(), 11))
3296+
if (src2p.is_immediate() && is_valid_immediate_addsub(src2p.immediate()))
32893297
{
32903298
const a64::Gp src = select_register(TEMP_REG1, inst.size());
32913299

@@ -3319,22 +3327,23 @@ void drcbe_arm64::op_cmp(a64::Assembler &a, const uml::instruction &inst)
33193327
be_parameter src1p(*this, inst.param(0), PTYPE_MRI);
33203328
be_parameter src2p(*this, inst.param(1), PTYPE_MRI);
33213329

3322-
const a64::Gp temp = select_register(TEMP_REG1, inst.size());
3323-
const a64::Gp temp2 = select_register(TEMP_REG2, inst.size());
3330+
const a64::Gp src1 = src1p.select_register(TEMP_REG1, inst.size());
33243331

3325-
mov_reg_param(a, inst.size(), temp, src1p);
3332+
mov_reg_param(a, inst.size(), src1, src1p);
33263333

3327-
if (src2p.is_immediate() && is_valid_immediate(src2p.immediate(), 11))
3334+
if (src2p.is_immediate() && is_valid_immediate_addsub(src2p.immediate()))
33283335
{
33293336
if (src2p.is_immediate_value(0))
3330-
a.cmp(temp, select_register(a64::xzr, inst.size()));
3337+
a.cmp(src1, select_register(a64::xzr, inst.size()));
33313338
else
3332-
a.cmp(temp, src2p.immediate());
3339+
a.cmp(src1, src2p.immediate());
33333340
}
33343341
else
33353342
{
3336-
mov_reg_param(a, inst.size(), temp2, src2p);
3337-
a.cmp(temp, temp2);
3343+
const a64::Gp src2 = src2p.select_register(TEMP_REG2, inst.size());
3344+
3345+
mov_reg_param(a, inst.size(), src2, src2p);
3346+
a.cmp(src1, src2);
33383347
}
33393348

33403349
store_carry(a, true);
@@ -3728,17 +3737,35 @@ void drcbe_arm64::op_test(a64::Assembler &a, const uml::instruction &inst)
37283737
const a64::Gp src1 = src1p.select_register(TEMP_REG1, inst.size());
37293738
const a64::Gp src2 = src2p.select_register(TEMP_REG2, inst.size());
37303739

3731-
mov_reg_param(a, inst.size(), src1, src1p);
3740+
if (src1p.is_immediate_value(0) || src2p.is_immediate_value(0))
3741+
{
3742+
const a64::Gp zero = select_register(a64::xzr, inst.size());
37323743

3733-
if (src2p.is_immediate() && is_valid_immediate_mask(src2p.immediate(), inst.size()))
3744+
a.tst(zero, zero);
3745+
}
3746+
else if (src2p.is_immediate_value(util::make_bitmask<uint64_t>(inst.size() * 8)))
37343747
{
3735-
if (src2p.is_immediate_value(0))
3736-
a.tst(src1, select_register(a64::xzr, inst.size()));
3737-
else
3738-
a.tst(src1, src2p.immediate());
3748+
mov_reg_param(a, inst.size(), src1, src1p);
3749+
a.tst(src1, src1);
3750+
}
3751+
else if (src1p.is_immediate_value(util::make_bitmask<uint64_t>(inst.size() * 8)))
3752+
{
3753+
mov_reg_param(a, inst.size(), src2, src2p);
3754+
a.tst(src2, src2);
3755+
}
3756+
else if (src2p.is_immediate() && is_valid_immediate_mask(src2p.immediate(), inst.size()))
3757+
{
3758+
mov_reg_param(a, inst.size(), src1, src1p);
3759+
a.tst(src1, src2p.immediate());
3760+
}
3761+
else if (src1p.is_immediate() && is_valid_immediate_mask(src1p.immediate(), inst.size()))
3762+
{
3763+
mov_reg_param(a, inst.size(), src2, src2p);
3764+
a.tst(src2, src1p.immediate());
37393765
}
37403766
else
37413767
{
3768+
mov_reg_param(a, inst.size(), src1, src1p);
37423769
mov_reg_param(a, inst.size(), src2, src2p);
37433770
a.tst(src1, src2);
37443771
}

src/devices/cpu/powerpc/ppcdrc.cpp

Lines changed: 13 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1021,45 +1021,43 @@ void ppc_device::static_generate_memory_accessor(int mode, int size, int iswrite
10211021
/* check for unaligned accesses and break into two */
10221022
if (!ismasked && size != 1)
10231023
{
1024-
/* in little-endian mode, anything misaligned generates an exception */
10251024
if ((mode & MODE_LITTLE_ENDIAN) || masked == nullptr || !(m_cap & PPCCAP_MISALIGNED))
10261025
{
1026+
/* in little-endian mode, anything misaligned generates an exception */
10271027
UML_TEST(block, I0, size - 1); // test i0,size-1
1028-
UML_JMPc(block, COND_NZ, alignex = label++); // jmp alignex,nz
1028+
UML_JMPc(block, COND_NZ, alignex = label++); // jmp alignex,nz
10291029
}
1030-
1031-
/* in big-endian mode, it's more complicated */
10321030
else
10331031
{
1034-
/* 8-byte accesses must be word-aligned */
1032+
/* in big-endian mode, it's more complicated */
10351033
if (size == 8)
10361034
{
1035+
/* 8-byte accesses must be word-aligned */
10371036
UML_TEST(block, I0, 3); // test i0,3
1038-
UML_JMPc(block, COND_NZ, alignex = label++); // jmp alignex,nz
1037+
UML_JMPc(block, COND_NZ, alignex = label++); // jmp alignex,nz
10391038

10401039
/* word aligned accesses need to be broken up */
10411040
UML_TEST(block, I0, 4); // test i0,4
10421041
UML_JMPc(block, COND_NZ, unaligned = label++); // jmp unaligned, nz
10431042
}
1044-
1045-
/* unaligned 2 and 4 byte accesses need to be broken up */
10461043
else
10471044
{
1045+
/* unaligned 2 and 4 byte accesses need to be broken up */
10481046
UML_TEST(block, I0, size - 1); // test i0,size-1
1049-
UML_JMPc(block, COND_NZ, unaligned = label++); // jmp unaligned,nz
1047+
UML_JMPc(block, COND_NZ, unaligned = label++); // jmp unaligned,nz
10501048
}
10511049
}
10521050
}
10531051

10541052
/* general case: assume paging and perform a translation */
10551053
if (((m_cap & PPCCAP_OEA) && (mode & MODE_DATA_TRANSLATION)) || (iswrite && (m_cap & PPCCAP_4XX) && (mode & MODE_PROTECTION)))
10561054
{
1057-
UML_SHR(block, I3, I0, 12); // shr i3,i0,12
1058-
UML_LOAD(block, I3, (void *)vtlb_table(), I3, SIZE_DWORD, SCALE_x4);// load i3,[vtlb],i3,dword
1059-
UML_TEST(block, I3, (uint64_t)1 << translate_type); // test i3,1 << translate_type
1060-
UML_JMPc(block, COND_Z, tlbmiss = label++); // jmp tlbmiss,z
1061-
UML_LABEL(block, tlbreturn = label++); // tlbreturn:
1062-
UML_ROLINS(block, I0, I3, 0, 0xfffff000); // rolins i0,i3,0,0xfffff000
1055+
UML_SHR(block, I3, I0, 12); // shr i3,i0,12
1056+
UML_LOAD(block, I3, (void *)vtlb_table(), I3, SIZE_DWORD, SCALE_x4); // load i3,[vtlb],i3,dword
1057+
UML_TEST(block, I3, (uint64_t)1 << translate_type); // test i3,1 << translate_type
1058+
UML_JMPc(block, COND_Z, tlbmiss = label++); // jmp tlbmiss,z
1059+
UML_LABEL(block, tlbreturn = label++); // tlbreturn:
1060+
UML_ROLINS(block, I0, I3, 0, 0xfffff000); // rolins i0,i3,0,0xfffff000
10631061
}
10641062
else if (m_cap & PPCCAP_4XX)
10651063
UML_AND(block, I0, I0, 0x7fffffff); // and i0,i0,0x7fffffff

0 commit comments

Comments
 (0)