Skip to content

Commit 61b0c90

Browse files
calc84maniacfleroviux
authored andcommitted
ARM: implement multiplication carry flag algorithm
1 parent aa9005b commit 61b0c90

File tree

3 files changed

+138
-13
lines changed

3 files changed

+138
-13
lines changed

src/nba/src/arm/handlers/arithmetic.inl

+105-1
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,18 @@
33
*
44
* Licensed under GPLv3 or any later version.
55
* Refer to the included LICENSE file.
6+
*
7+
* Multiplication carry flag algorithm has been altered from its original form according to its GPL-compatible license, as follows:
8+
*
9+
* Copyright (C) 2024 zaydlang, calc84maniac
10+
*
11+
* This software is provided 'as-is', without any express or implied warranty. In no event will the authors be held liable for any damages arising from the use of this software.
12+
*
13+
* Permission is granted to anyone to use this software for any purpose, including commercial applications, and to alter it and redistribute it freely, subject to the following restrictions:
14+
*
15+
* 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
16+
* 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
17+
* 3. This notice may not be removed or altered from any source distribution.
618
*/
719

820
void SetZeroAndSignFlag(u32 value) {
@@ -11,7 +23,7 @@ void SetZeroAndSignFlag(u32 value) {
1123
}
1224

1325
template<bool is_signed = true>
14-
void TickMultiply(u32 multiplier) {
26+
bool TickMultiply(u32 multiplier) {
1527
u32 mask = 0xFFFFFF00;
1628

1729
bus.Idle();
@@ -28,6 +40,98 @@ void TickMultiply(u32 multiplier) {
2840
mask <<= 8;
2941
bus.Idle();
3042
}
43+
44+
// Return true if full ticks used.
45+
return mask == 0;
46+
}
47+
48+
bool MultiplyCarrySimple(u32 multiplier) {
49+
// Carry comes directly from final injected booth carry bit.
50+
// Final booth addend is negative only if upper 2 bits are 10.
51+
return (multiplier >> 30) == 2;
52+
}
53+
54+
bool MultiplyCarryLo(u32 multiplicand, u32 multiplier, u32 accum = 0) {
55+
// Set low bit of multiplicand to cause negation to invert the upper bits.
56+
// This bit cannot propagate to the resulting carry bit.
57+
multiplicand |= 1;
58+
59+
// Optimized first iteration.
60+
u32 booth = (s32)(multiplier << 31) >> 31;
61+
u32 carry = multiplicand * booth;
62+
u32 sum = carry + accum;
63+
64+
int shift = 29;
65+
do {
66+
// Process 8 multiplier bits using 4 booth iterations.
67+
for (int i = 0; i < 4; i++, shift -= 2) {
68+
// Get next booth factor (-2 to 2, shifted left by 30-shift).
69+
u32 next_booth = (s32)(multiplier << shift) >> shift;
70+
u32 factor = next_booth - booth;
71+
booth = next_booth;
72+
// Get scaled value of booth addend.
73+
u32 addend = multiplicand * factor;
74+
// Accumulate addend with carry-save add.
75+
accum ^= carry ^ addend;
76+
sum += addend;
77+
carry = sum - accum;
78+
}
79+
} while (booth != multiplier);
80+
81+
// Carry flag comes from bit 31 of carry-save adder's final carry.
82+
return carry >> 31;
83+
}
84+
85+
template<bool sign_extend>
86+
bool MultiplyCarryHi(u32 multiplicand, u32 multiplier, u32 accum_hi = 0) {
87+
// Only last 3 booth iterations are relevant to output carry.
88+
// Reduce scale of both inputs to get upper bits of 64-bit booth addends
89+
// in upper bits of 32-bit values, while handling sign extension.
90+
if (sign_extend) {
91+
multiplicand = (s32)multiplicand >> 6;
92+
multiplier = (s32)multiplier >> 26;
93+
} else {
94+
multiplicand >>= 6;
95+
multiplier >>= 26;
96+
}
97+
// Set low bit of multiplicand to cause negation to invert the upper bits.
98+
// This bit cannot propagate to the resulting carry bit.
99+
multiplicand |= 1;
100+
101+
// Pre-populate magic bit 61 for carry.
102+
u32 carry = ~accum_hi & 0x20000000;
103+
// Pre-populate magic bits 63-60 for accum (with carry magic pre-added).
104+
u32 accum = accum_hi - 0x08000000;
105+
106+
// Get factors for last 3 booth iterations.
107+
u32 booth0 = (s32)(multiplier << 27) >> 27;
108+
u32 booth1 = (s32)(multiplier << 29) >> 29;
109+
u32 booth2 = (s32)(multiplier << 31) >> 31;
110+
u32 factor0 = multiplier - booth0;
111+
u32 factor1 = booth0 - booth1;
112+
u32 factor2 = booth1 - booth2;
113+
114+
// Get scaled value of 3rd-last booth addend.
115+
u32 addend = multiplicand * factor2;
116+
// Finalize bits 61-60 of accum magic using its sign.
117+
accum -= addend & 0x10000000;
118+
// Get scaled value of 2nd-last booth addend.
119+
addend = multiplicand * factor1;
120+
// Finalize bits 63-62 of accum magic using its sign.
121+
accum -= addend & 0x40000000;
122+
123+
// Get carry from carry-save add in bit 61 and propagate it to bit 62.
124+
u32 sum = accum + (addend & 0x20000000);
125+
// Subtract out carry magic to get actual accum magic.
126+
accum -= carry;
127+
128+
// Get scaled value of last booth addend.
129+
addend = multiplicand * factor0;
130+
// Add to bit 62 and propagate carry.
131+
sum += addend & 0x40000000;
132+
133+
// Cancel out accum magic bit 63 to get carry bit 63.
134+
return (sum ^ accum) >> 31;
31135
}
32136

33137
u32 ADD(u32 op1, u32 op2, bool set_flags) {

src/nba/src/arm/handlers/handler16.inl

+9-3
Original file line numberDiff line numberDiff line change
@@ -183,12 +183,18 @@ void Thumb_ALU(u16 instruction) {
183183
break;
184184
}
185185
case ThumbDataOp::MUL: {
186-
TickMultiply(state.reg[dst]);
186+
u32 lhs = state.reg[src];
187+
u32 rhs = state.reg[dst];
188+
bool full = TickMultiply(rhs);
187189
pipe.access = Access::Code | Access::Nonsequential;
188190

189-
state.reg[dst] *= state.reg[src];
191+
state.reg[dst] = lhs * rhs;
190192
SetZeroAndSignFlag(state.reg[dst]);
191-
state.cpsr.f.c = 0;
193+
if (full) {
194+
state.cpsr.f.c = MultiplyCarrySimple(rhs);
195+
} else {
196+
state.cpsr.f.c = MultiplyCarryLo(lhs, rhs);
197+
}
192198
break;
193199
}
194200
case ThumbDataOp::BIC: {

src/nba/src/arm/handlers/handler32.inl

+24-9
Original file line numberDiff line numberDiff line change
@@ -247,15 +247,22 @@ void ARM_Multiply(u32 instruction) {
247247
auto rhs = GetReg(op2);
248248
auto result = lhs * rhs;
249249

250-
TickMultiply(rhs);
250+
bool full = TickMultiply(rhs);
251251

252+
u32 accum = 0;
252253
if (accumulate) {
253-
result += GetReg(op3);
254+
accum = GetReg(op3);
255+
result += accum;
254256
bus.Idle();
255257
}
256258

257259
if (set_flags) {
258260
SetZeroAndSignFlag(result);
261+
if (full) {
262+
state.cpsr.f.c = MultiplyCarrySimple(rhs);
263+
} else {
264+
state.cpsr.f.c = MultiplyCarryLo(lhs, rhs, accum);
265+
}
259266
}
260267

261268
SetReg(dst, result);
@@ -273,7 +280,7 @@ void ARM_MultiplyLong(u32 instruction) {
273280
int dst_lo = (instruction >> 12) & 0xF;
274281
int dst_hi = (instruction >> 16) & 0xF;
275282

276-
s64 result;
283+
u64 result;
277284

278285
pipe.access = Access::Code | Access::Nonsequential;
279286
state.r15 += 4;
@@ -284,18 +291,21 @@ void ARM_MultiplyLong(u32 instruction) {
284291
if (sign_extend) {
285292
result = s64(s32(lhs)) * s64(s32(rhs));
286293
} else {
287-
result = s64(u64(lhs) * u64(rhs));
294+
result = u64(lhs) * u64(rhs);
288295
}
289296

290-
TickMultiply<sign_extend>(rhs);
297+
bool full = TickMultiply<sign_extend>(rhs);
291298
bus.Idle();
292299

300+
u32 accum_lo = 0;
301+
u32 accum_hi = 0;
293302
if (accumulate) {
294-
s64 value = GetReg(dst_hi);
303+
accum_lo = GetReg(dst_lo);
304+
accum_hi = GetReg(dst_hi);
295305

296-
value <<= 16;
297-
value <<= 16;
298-
value |= GetReg(dst_lo);
306+
u64 value = accum_hi;
307+
value <<= 32;
308+
value |= accum_lo;
299309

300310
result += value;
301311
bus.Idle();
@@ -306,6 +316,11 @@ void ARM_MultiplyLong(u32 instruction) {
306316
if (set_flags) {
307317
state.cpsr.f.n = result_hi >> 31;
308318
state.cpsr.f.z = result == 0;
319+
if (full) {
320+
state.cpsr.f.c = MultiplyCarryHi<sign_extend>(lhs, rhs, accum_hi);
321+
} else {
322+
state.cpsr.f.c = MultiplyCarryLo(lhs, rhs, accum_lo);
323+
}
309324
}
310325

311326
SetReg(dst_lo, result & 0xFFFFFFFF);

0 commit comments

Comments
 (0)