-
Notifications
You must be signed in to change notification settings - Fork 2
GCC
The GCC inline assembler has many traps for the unwary.
The first part of this document contains general observations about the behaviour of the inline assembler and how it interacts with the compiler's optimisation system.
The second part is RISC OS oriented, with suggestions and examples for using the inline assembler in RISC OS programs and the kernel.
This is probably not an exhaustive list!
- The code only affects the parameters passed to it
- Register variables don't last
- If the output parameters are ignored, the code will be dropped
- Passing a pointer to a struct or array doesn't ensure initialisation
- The code has no side effects
- Input registers will not be used after an output register is
None of these things are unreasonable, but they can easily trip you up. (They trip me up, anyway.)
Unless you explicitly tell the compiler that a given variable is going to be affected by the assembler, it will assume that it is not.
int function()
{
register int code asm( "r0" ) = 15;
register int copy asm( "r1" );
asm ( "mov r1, r0" : "=r" (copy) );
return copy;
}
Compiles with optimisation to this, with no initialisation of code:
00000000 <function>:
0: e1a01000 mov r1, r0
4: e1a00001 mov r0, r1
8: e12fff1e bx lr
The compiler will allow you to declare a register variable for a register that
can be clobbered by a function call (r0-r3, r12, lr). It will not preserve the
value over a function call for you, even if the variable is referenced later.
int other_function( int *array );
int function()
{
register int number asm ( "r2" );
asm ( "mov r2, #22" : "=r" (number) );
int array[50] = { [20] = number };
return other_function( array );
}
Compiles with -Os to:
00000000 <function>:
0: e52de004 push {lr} ; (str lr, [sp, #-4]!)
4: e24dd0cc sub sp, sp, #204 ; 0xcc
8: e3a020c8 mov r2, #200 ; 0xc8
c: e3a01000 mov r1, #0
10: e1a0000d mov r0, sp
14: ebfffffe bl 0 <memset>
18: e1a0000d mov r0, sp
1c: e58d2050 str r2, [sp, #80] ; 0x50
20: ebfffffe bl 0 <other_function>
24: e28dd0cc add sp, sp, #204 ; 0xcc
28: e49de004 pop {lr} ; (ldr lr, [sp], #4)
2c: e12fff1e bx lr
Note no mov r2, #22 instruction. The optimiser has silently decided the value
isn't used (although it does store the value in r2 at 0x1c).
Code with all outputs ignored will be dropped by the optimiser, unless it is labeled with the volatile keyword. An exception seems to be when there are no output parameters, e.g. asm ( "bkpt 1" );
int function()
{
register int code asm( "r0" ) = 15;
register int copy asm( "r1" );
asm ( "mov r1, r0" : "=r" (copy) : "r" (code) );
return 17;
}
Compiles with optimization to:
00000000 <function>:
0: e3a00011 mov r0, #17
4: e12fff1e bx lr
int function()
{
int array[] = { 1, 0, 0, 0, 0, 0, 0, 0 };
register int *arr asm( "r0" ) = array;
register int copy asm( "r1" );
asm ( "mov r1, r0" : "=r" (copy) : "r" (arr) );
return copy;
}
Compiles with -O4 to:
00000000 <function>:
0: e24dd020 sub sp, sp, #32
4: e1a0000d mov r0, sp
8: e1a01000 mov r1, r0
c: e1a00001 mov r0, r1
10: e28dd020 add sp, sp, #32
14: e12fff1e bx lr
And with -Os to (note that memset, and see Wrapping SWIs below):
00000000 <function>:
0: e52de004 push {lr} ; (str lr, [sp, #-4]!)
4: e24dd024 sub sp, sp, #36 ; 0x24
8: e3a0201c mov r2, #28
c: e3a01000 mov r1, #0
10: e28d0004 add r0, sp, #4
14: ebfffffe bl 0 <memset>
18: e1a0000d mov r0, sp
1c: e1a01000 mov r1, r0
20: e1a00001 mov r0, r1
24: e28dd024 add sp, sp, #36 ; 0x24
28: e49de004 pop {lr} ; (ldr lr, [sp], #4)
2c: e12fff1e bx lr
The value passed in r0 is a pointer to valid memory on the stack, but it has not been initialised. (Adding to the confusion, initialise the array to something like { 1, 2, 3, 4, 5 }, and it will be (at least on current compilers)!
To ensure the local array or structure is initialised, include an anonymous input to the asm instruction like "m" (array).
int function()
{
int array[] = { 1, 0, 0, 0, 0, 0, 0, 0 };
register int *arr asm( "r0" ) = array;
register int copy asm( "r1" );
asm ( "mov r1, r0" : "=r" (copy) : "r" (arr), "m" (array) );
return copy;
}
The "memory" clobber entry is necessary for any assembler that affects memory, or the state of the system, which includes 99% of SWIs.
extern int rma_heap;
#define OS_Heap 0x1d
static inline void *alloc( int size )
{
register int code asm( "r0" ) = 2;
register void *heap asm( "r1" ) = &rma_heap;
register int bytes asm( "r3" ) = size;
register void *memory asm( "r2" );
asm ( "svc %[swi]" : "=r" (memory) : [swi] "i" (OS_Heap | 0x20000), "r" (code), "r" (heap), "r" (bytes) : "memory", "lr" );
return memory;
}
void test()
{
{
int *p = alloc( 12 );
*p = 55;
}
{
int *p = alloc( 12 );
*p = 77;
}
}
Compiles with optimization to:
00000000 <test>:
0: e92d4010 push {r4, lr}
4: e3a0c04d mov ip, #77 ; 0x4d
8: e3a04037 mov r4, #55 ; 0x37
c: e3a00002 mov r0, #2
10: e3a0300c mov r3, #12
14: e59f1010 ldr r1, [pc, #16] ; 2c <test+0x2c>
18: ef02001d svc 0x0002001d
1c: e5824000 str r4, [r2]
20: ef02001d svc 0x0002001d
24: e582c000 str ip, [r2]
28: e8bd8010 pop {r4, pc}
2c: 00000000 andeq r0, r0, r0
But without the "memory" clobber, it compiles to:
00000000 <test>:
0: e3a0c04d mov ip, #77 ; 0x4d
4: e52de004 push {lr} ; (str lr, [sp, #-4]!)
8: e3a00002 mov r0, #2
c: e3a0300c mov r3, #12
10: e59f1008 ldr r1, [pc, #8] ; 20 <test+0x20>
14: ef02001d svc 0x0002001d
18: e582c000 str ip, [r2]
1c: e49df004 pop {pc} ; (ldr pc, [sp], #4)
20: 00000000 andeq r0, r0, r0
Only calling the SWI once (because the parameters are the same).
You might expect this function to return 3x the input value (in + in + in). It will not, it will return 4x, ((in + in) + (in + in)).
int function( int in )
{
int out;
asm ( "add %[out], %[in], %[in]"
"\n add %[out], %[out], %[in]"
: [out] "=r" (out) : [in] "r" (in) );
return out;
}
00000000 <function>:
0: e0800000 add r0, r0, r0
4: e0800000 add r0, r0, r0
8: e12fff1e bx lr
To avoid input registers being used for output registers, label the output register with "=&r", rather than "=r".
int function( int in )
{
int out;
asm ( "add %[out], %[in], %[in]"
"\n add %[out], %[out], %[in]"
: [out] "=&r" (out) : [in] "r" (in) );
return out;
}
00000000 <function>:
0: e0803000 add r3, r0, r0
4: e0833000 add r3, r3, r0
8: e1a00003 mov r0, r3
c: e12fff1e bx lr
If the assembly modifies an input register, you have to list it as an output register as well, otherwise the optimizer will assume the register is unchanged.
Registers that are used internally that aren't an input or output register must be listed in the clobbers list.
int function( int in )
{
static const int array[] = { 1, 2, 3, 4, 5, 6 };
register const int *entry asm ( "r3" ) = array;
asm ( "add r3, r3, #4" : : "r" (entry) );
return *entry;
}
Compiles to:
00000000 <function>:
0: e59f3008 ldr r3, [pc, #8] ; 10 <function+0x10>
4: e2833004 add r3, r3, #4
8: e3a00001 mov r0, #1
c: e12fff1e bx lr
10: 00000000 andeq r0, r0, r0 ; will be filled in with the address of array
As you can see, it assumes that entry has not been modified by the assembler, so it can return the constant.
asm ( "add r3, r3, #4" : "=r" (entry) : "r" (entry) );
0: e59f3008 ldr r3, [pc, #8] ; 10 <function+0x10>
4: e2833004 add r3, r3, #4
8: e5930000 ldr r0, [r3]
c: e12fff1e bx lr
10: 00000000 andeq r0, r0, r0
In RISC OS, most SWIs require specific registers to be used to pass in parameters. Using GCC, the safe way to do this is to use register variables. It's only safe if you follow all the restrictions shown in the first section!
Like this (which will allocate or die!):
static inline void *alloc( int size )
{
register int code asm( "r0" ) = 2;
register void *heap asm( "r1" ) = &rma_heap;
register int bytes asm( "r3" ) = size;
register void *memory asm( "r2" );
asm ( "svc %[swi]"
: "=r" (memory)
: [swi] "i" (OS_Heap)
, "r" (code)
, "r" (heap)
, "r" (bytes)
: "lr", "memory" );
return memory;
}
Code that is run from SVC mode needs to include "lr" in the clobber list
of the instruction, since it will be corrupted by the SVC (SWI) instruction.
This can lead to invisible infinite loops as a routine "returns" to a point
inside the routine.
It would probably be worth defining a SWI_CLOBBER macro which can be set to
"cc", for usr32 mode code, or "lr", "cc" for SVC code.
This was my first problem with a wrapper to Draw_Stroke with a fixed cap and join style:
void Draw_Stroke( uint32_t *path )
{
register uint32_t *draw_path asm( "r0" ) = path;
register uint32_t fill_style asm( "r1" ) = 0x3f;
register uint32_t *matrix asm( "r2" ) = 0;
register uint32_t flatness asm( "r3" ) = 0;
register uint32_t thickness asm( "r4" ) = 8;
uint32_t cap_and_join_style[5] = { 0x00000001 };
register uint32_t *cap_and_join asm( "r5" ) = cap_and_join_style;
register uint32_t dashes asm( "r6" ) = 0;
register void *error asm( "r0" );
asm volatile ( "svc 0x60704"
"\n movvc r0, #0"
: "=r" (error)
: "r" (draw_path)
, "r" (fill_style)
, "r" (matrix)
, "r" (flatness)
, "r" (thickness)
, "r" (cap_and_join)
, "r" (dashes) );
}
When compiled with the -Os optimization flag, the initialisation of cap_and_join silently inserts a call to memset, which corrupts r0, r1 and r2.
00000000 <Draw_Stroke>:
0: e92d4070 push {r4, r5, r6, lr}
4: e24dd018 sub sp, sp, #24
8: e3a02014 mov r2, #20
c: e3a01000 mov r1, #0
10: e28d0004 add r0, sp, #4
14: e3a04008 mov r4, #8
18: ebfffffe bl 0 <memset>
1c: e3a06000 mov r6, #0
20: e28d5004 add r5, sp, #4
24: ef060704 svc 0x00060704
28: 73a00000 movvc r0, #0
2c: e28dd018 add sp, sp, #24
30: e8bd4070 pop {r4, r5, r6, lr}
34: e12fff1e bx lr
Moving the initialisation of the array to before the first register variable declaration avoids that problem.
void Draw_Stroke( uint32_t *path )
{
uint32_t cap_and_join_style[5] = { 0x00000001 };
register uint32_t *draw_path asm( "r0" ) = path;
register uint32_t fill_style asm( "r1" ) = 0x3f;
register uint32_t *matrix asm( "r2" ) = 0;
register uint32_t flatness asm( "r3" ) = 0;
register uint32_t thickness asm( "r4" ) = 8;
register uint32_t *cap_and_join asm( "r5" ) = cap_and_join_style;
register uint32_t dashes asm( "r6" ) = 0;
asm volatile ( "svc "
:
: "r" (draw_path)
, "r" (fill_style)
, "r" (matrix)
, "r" (flatness)
, "r" (thickness)
, "r" (cap_and_join)
, "r" (dashes) );
}
00000000 <Draw_Stroke>:
0: e92d4070 push {r4, r5, r6, lr}
4: e24dd018 sub sp, sp, #24
8: e1a04000 mov r4, r0
c: e3a02010 mov r2, #16
10: e3a01000 mov r1, #0
14: e28d0008 add r0, sp, #8
18: ebfffffe bl 0 <memset>
1c: e3a02000 mov r2, #0
20: e1a00004 mov r0, r4
24: e3a0103f mov r1, #63 ; 0x3f
28: e1a03002 mov r3, r2
2c: e3a04008 mov r4, #8
30: e1a06002 mov r6, r2
34: e28d5004 add r5, sp, #4
38: ef060704 svc 0x00060704
3c: 73a00000 movvc r0, #0
40: e28dd018 add sp, sp, #24
44: e8bd4070 pop {r4, r5, r6, lr}
48: e12fff1e bx lr
The problems, however, don't end there. If you look very carefully, you can see that the array is zeroed, but the first element is not initialised to one.
This is, I think, the final form.
- The register variables are initialised with simple values (at most a pointer)
- The required array is initialised before the first register value
- The optimiser is informed that the assembler uses the array, so it should be initialised
- There is an output value from the assembler, so
asm volatileis used in case it is ignored by whoever callsDraw_Stroke - The assembler has side-effects
"memory"(probably not, strictly speaking, needed in this case, although it might be necessary with alpha blending) - It uses the
SWI_CLOBBERmacro to ensure that the link register is stored on the stack as needed. - In light of [Register variables don't last], return the output into a normal variable
static inline error_block *Draw_Stroke( uint32_t *path ){
// Keep this declaration before the first register variable declaration, or
// -Os will cause the compiler to forget the preceding registers.
// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101422
uint32_t cap_and_join_style[5] = { 0x00000001 }; // Round joints
register uint32_t *draw_path asm( "r0" ) = path;
register uint32_t fill_style asm( "r1" ) = 0x3f;
register uint32_t *matrix asm( "r2" ) = 0;
register uint32_t flatness asm( "r3" ) = 0;
register uint32_t thickness asm( "r4" ) = 8;
register uint32_t *cap_and_join asm( "r5" ) = cap_and_join_style;
register uint32_t dashes asm( "r6" ) = 0;
register void *error;
asm volatile ( "swi 0x60704"
"\n movvc %[error], #0"
"\n movvs %[error], r0"
: [error] "=r" (error)
: "r" (draw_path)
, "r" (fill_style)
, "r" (matrix)
, "r" (flatness)
, "r" (thickness)
, "r" (cap_and_join)
, "r" (dashes)
, "m" (cap_and_join_style) // Without this, array is not initialised
: SWI_CLOBBER, "memory" );
return error;
}