Skip to content

Commit c5d424b

Browse files
mauri870cherrymui
authored andcommitted
internal/runtime/atomic: add arm native implementations of And8/Or8
With LDREXB/STREXB now available for the arm assembler we can implement these operations natively. The instructions are armv6k+ but for simplicity I only use them on armv7. Benchmark results for a raspberry Pi 3 model B+: goos: linux goarch: arm pkg: internal/runtime/atomic cpu: ARMv7 Processor rev 4 (v7l) │ old.txt │ new.txt │ │ sec/op │ sec/op vs base │ And8-4 127.65n ± 0% 68.74n ± 0% -46.15% (p=0.000 n=10) Change-Id: Ic87f307c35f7d7f56010980302f253056f6d54dc GitHub-Last-Rev: a735180 GitHub-Pull-Request: #70002 Cq-Include-Trybots: luci.golang.try:gotip-linux-arm Reviewed-on: https://go-review.googlesource.com/c/go/+/622075 Reviewed-by: Keith Randall <[email protected]> Reviewed-by: Cherry Mui <[email protected]> Reviewed-by: Keith Randall <[email protected]> LUCI-TryBot-Result: Go LUCI <[email protected]>
1 parent bbdc65b commit c5d424b

File tree

2 files changed

+72
-2
lines changed

2 files changed

+72
-2
lines changed

src/internal/runtime/atomic/atomic_arm.go

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -159,8 +159,11 @@ func goStore64(addr *uint64, v uint64) {
159159
addrLock(addr).unlock()
160160
}
161161

162+
//go:noescape
163+
func Or8(addr *uint8, v uint8)
164+
162165
//go:nosplit
163-
func Or8(addr *uint8, v uint8) {
166+
func goOr8(addr *uint8, v uint8) {
164167
// Align down to 4 bytes and use 32-bit CAS.
165168
uaddr := uintptr(unsafe.Pointer(addr))
166169
addr32 := (*uint32)(unsafe.Pointer(uaddr &^ 3))
@@ -173,8 +176,11 @@ func Or8(addr *uint8, v uint8) {
173176
}
174177
}
175178

179+
//go:noescape
180+
func And8(addr *uint8, v uint8)
181+
176182
//go:nosplit
177-
func And8(addr *uint8, v uint8) {
183+
func goAnd8(addr *uint8, v uint8) {
178184
// Align down to 4 bytes and use 32-bit CAS.
179185
uaddr := uintptr(unsafe.Pointer(addr))
180186
addr32 := (*uint32)(unsafe.Pointer(uaddr &^ 3))

src/internal/runtime/atomic/atomic_arm.s

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -228,6 +228,42 @@ store64loop:
228228
DMB MB_ISH
229229
RET
230230

231+
TEXT armAnd8<>(SB),NOSPLIT,$0-5
232+
// addr is already in R1
233+
MOVB v+4(FP), R2
234+
235+
and8loop:
236+
LDREXB (R1), R6
237+
238+
DMB MB_ISHST
239+
240+
AND R2, R6
241+
STREXB R6, (R1), R0
242+
CMP $0, R0
243+
BNE and8loop
244+
245+
DMB MB_ISH
246+
247+
RET
248+
249+
TEXT armOr8<>(SB),NOSPLIT,$0-5
250+
// addr is already in R1
251+
MOVB v+4(FP), R2
252+
253+
or8loop:
254+
LDREXB (R1), R6
255+
256+
DMB MB_ISHST
257+
258+
ORR R2, R6
259+
STREXB R6, (R1), R0
260+
CMP $0, R0
261+
BNE or8loop
262+
263+
DMB MB_ISH
264+
265+
RET
266+
231267
// The following functions all panic if their address argument isn't
232268
// 8-byte aligned. Since we're calling back into Go code to do this,
233269
// we have to cooperate with stack unwinding. In the normal case, the
@@ -310,3 +346,31 @@ TEXT ·Store64(SB),NOSPLIT,$-4-12
310346
JMP ·goStore64(SB)
311347
#endif
312348
JMP armStore64<>(SB)
349+
350+
TEXT ·And8(SB),NOSPLIT,$-4-5
351+
NO_LOCAL_POINTERS
352+
MOVW addr+0(FP), R1
353+
354+
// Uses STREXB/LDREXB that is armv6k or later.
355+
// For simplicity we only enable this on armv7.
356+
#ifndef GOARM_7
357+
MOVB internal∕cpu·ARM+const_offsetARMHasV7Atomics(SB), R11
358+
CMP $1, R11
359+
BEQ 2(PC)
360+
JMP ·goAnd8(SB)
361+
#endif
362+
JMP armAnd8<>(SB)
363+
364+
TEXT ·Or8(SB),NOSPLIT,$-4-5
365+
NO_LOCAL_POINTERS
366+
MOVW addr+0(FP), R1
367+
368+
// Uses STREXB/LDREXB that is armv6k or later.
369+
// For simplicity we only enable this on armv7.
370+
#ifndef GOARM_7
371+
MOVB internal∕cpu·ARM+const_offsetARMHasV7Atomics(SB), R11
372+
CMP $1, R11
373+
BEQ 2(PC)
374+
JMP ·goOr8(SB)
375+
#endif
376+
JMP armOr8<>(SB)

0 commit comments

Comments
 (0)