Skip to content
This repository was archived by the owner on May 21, 2019. It is now read-only.

Commit a09d09d

Browse files
author
Nick Kledzik
committed
<rdar://problem/12512722> Use arm divide instruction if available
git-svn-id: https://llvm.org/svn/llvm-project/compiler-rt/trunk@182665 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent f5a9ace commit a09d09d

File tree

5 files changed

+100
-0
lines changed

5 files changed

+100
-0
lines changed

lib/arm/divmodsi4.S

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,18 @@
2424
.syntax unified
2525
.align 3
2626
DEFINE_COMPILERRT_FUNCTION(__divmodsi4)
27+
#if __ARM_ARCH_7S__
28+
tst r1, r1
29+
beq LOCAL_LABEL(divzero)
30+
mov r3, r0
31+
sdiv r0, r3, r1
32+
mls r1, r0, r1, r3
33+
str r1, [r2]
34+
bx lr
35+
LOCAL_LABEL(divzero):
36+
mov r0, #0
37+
bx lr
38+
#else
2739
ESTABLISH_FRAME
2840
// Set aside the sign of the quotient and modulus, and the address for the
2941
// modulus.
@@ -45,3 +57,4 @@ DEFINE_COMPILERRT_FUNCTION(__divmodsi4)
4557
sub r1, r1, r5, asr #31
4658
str r1, [r6]
4759
CLEAR_FRAME_AND_RETURN
60+
#endif

lib/arm/modsi3.S

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,16 @@
2323
.syntax unified
2424
.align 3
2525
DEFINE_COMPILERRT_FUNCTION(__modsi3)
26+
#if __ARM_ARCH_7S__
27+
tst r1, r1
28+
beq LOCAL_LABEL(divzero)
29+
sdiv r2, r0, r1
30+
mls r0, r2, r1, r0
31+
bx lr
32+
LOCAL_LABEL(divzero):
33+
mov r0, #0
34+
bx lr
35+
#else
2636
ESTABLISH_FRAME
2737
// Set aside the sign of the dividend.
2838
mov r4, r0
@@ -37,3 +47,4 @@ DEFINE_COMPILERRT_FUNCTION(__modsi3)
3747
eor r0, r0, r4, asr #31
3848
sub r0, r0, r4, asr #31
3949
CLEAR_FRAME_AND_RETURN
50+
#endif

lib/arm/udivmodsi4.S

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,18 @@
3131
.syntax unified
3232
.align 3
3333
DEFINE_COMPILERRT_FUNCTION(__udivmodsi4)
34+
#if __ARM_ARCH_7S__
35+
tst r1, r1
36+
beq LOCAL_LABEL(divzero)
37+
mov r3, r0
38+
udiv r0, r3, r1
39+
mls r1, r0, r1, r3
40+
str r1, [r2]
41+
bx lr
42+
LOCAL_LABEL(divzero):
43+
mov r0, #0
44+
bx lr
45+
#else
3446
// We use a simple digit by digit algorithm; before we get into the actual
3547
// divide loop, we must calculate the left-shift amount necessary to align
3648
// the MSB of the divisor with that of the dividend (If this shift is
@@ -78,3 +90,4 @@ LOCAL_LABEL(return):
7890
str a, [r2]
7991
mov r0, q
8092
CLEAR_FRAME_AND_RETURN
93+
#endif

lib/arm/umodsi3.S

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,16 @@
2323
.syntax unified
2424
.align 3
2525
DEFINE_COMPILERRT_FUNCTION(__umodsi3)
26+
#if __ARM_ARCH_7S__
27+
tst r1, r1
28+
beq LOCAL_LABEL(divzero)
29+
udiv r2, r0, r1
30+
mls r0, r2, r1, r0
31+
bx lr
32+
LOCAL_LABEL(divzero):
33+
mov r0, #0
34+
bx lr
35+
#else
2636
// We use a simple digit by digit algorithm; before we get into the actual
2737
// divide loop, we must calculate the left-shift amount necessary to align
2838
// the MSB of the divisor with that of the dividend.
@@ -56,3 +66,4 @@ LOCAL_LABEL(mainLoop):
5666
subs r, a, b
5767
movhs a, r
5868
bx lr
69+
#endif

test/timing/modsi3.c

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
#include "timing.h"
2+
#include <stdio.h>
3+
4+
#define INPUT_TYPE int32_t
5+
#define INPUT_SIZE 256
6+
#define FUNCTION_NAME __modsi3
7+
8+
#ifndef LIBNAME
9+
#define LIBNAME UNKNOWN
10+
#endif
11+
12+
#define LIBSTRING LIBSTRINGX(LIBNAME)
13+
#define LIBSTRINGX(a) LIBSTRINGXX(a)
14+
#define LIBSTRINGXX(a) #a
15+
16+
INPUT_TYPE FUNCTION_NAME(INPUT_TYPE input1, INPUT_TYPE input2);
17+
18+
int main(int argc, char *argv[]) {
19+
INPUT_TYPE input1[INPUT_SIZE];
20+
INPUT_TYPE input2[INPUT_SIZE];
21+
int i, j;
22+
23+
srand(42);
24+
25+
// Initialize the input array with data of various sizes.
26+
for (i=0; i<INPUT_SIZE; ++i) {
27+
input1[i] = rand();
28+
input2[i] = rand() + 1;
29+
}
30+
31+
int64_t fixedInput = INT64_C(0x1234567890ABCDEF);
32+
33+
double bestTime = __builtin_inf();
34+
void *dummyp;
35+
for (j=0; j<1024; ++j) {
36+
37+
uint64_t startTime = mach_absolute_time();
38+
for (i=0; i<INPUT_SIZE; ++i)
39+
FUNCTION_NAME(input1[i], input2[i]);
40+
uint64_t endTime = mach_absolute_time();
41+
42+
double thisTime = intervalInCycles(startTime, endTime);
43+
bestTime = __builtin_fmin(thisTime, bestTime);
44+
45+
// Move the stack alignment between trials to eliminate (mostly) aliasing effects
46+
dummyp = alloca(1);
47+
}
48+
49+
printf("%16s: %f cycles.\n", LIBSTRING, bestTime / (double) INPUT_SIZE);
50+
51+
return 0;
52+
}

0 commit comments

Comments
 (0)