Skip to content

Commit 5cdb132

Browse files
committed
cmd/compile/internal/ssa: improve masking codegen on PPC64
Generate RLDIC[LR] instead of MOVD mask, Rx; AND Rx, Ry, Rz. This helps reduce code size, and reduces the latency caused by the constant load. Similarly, for smaller-than-register values, truncate constants which exceed the range of the value's type to avoid needing to load a constant. Change-Id: I6019684795eb8962d4fd6d9585d08b17c15e7d64 Reviewed-on: https://go-review.googlesource.com/c/go/+/515576 Reviewed-by: Lynn Boger <[email protected]> Reviewed-by: Dmitri Shuralyov <[email protected]> Run-TryBot: Paul Murphy <[email protected]> TryBot-Result: Gopher Robot <[email protected]> Reviewed-by: Cherry Mui <[email protected]>
1 parent 2186909 commit 5cdb132

File tree

4 files changed

+181
-0
lines changed

4 files changed

+181
-0
lines changed

src/cmd/compile/internal/ssa/_gen/PPC64latelower.rules

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,3 +17,16 @@
1717
(SETBCR [0] cmp) && buildcfg.GOPPC64 <= 9 => (ISELZ [4] (MOVDconst [1]) cmp)
1818
(SETBC [1] cmp) && buildcfg.GOPPC64 <= 9 => (ISELZ [1] (MOVDconst [1]) cmp)
1919
(SETBCR [1] cmp) && buildcfg.GOPPC64 <= 9 => (ISELZ [5] (MOVDconst [1]) cmp)
20+
21+
// The upper bits of the smaller than register values is undefined. Take advantage of that.
22+
(AND <t> x:(MOVDconst [m]) n) && t.Size() <= 2 => (Select0 (ANDCCconst [int64(int16(m))] n))
23+
24+
// Convert simple bit masks to an equivalent rldic[lr] if possible.
25+
(AND x:(MOVDconst [m]) n) && isPPC64ValidShiftMask(m) => (RLDICL [encodePPC64RotateMask(0,m,64)] n)
26+
(AND x:(MOVDconst [m]) n) && m != 0 && isPPC64ValidShiftMask(^m) => (RLDICR [encodePPC64RotateMask(0,m,64)] n)
27+
28+
// If the RLDICL does not rotate its value, a shifted value can be merged.
29+
(RLDICL [em] x:(SRDconst [s] a)) && (em&0xFF0000) == 0 => (RLDICL [mergePPC64RLDICLandSRDconst(em, s)] a)
30+
31+
// Convert rotated 32 bit masks on 32 bit values into rlwinm. In general, this leaves the upper 32 bits in an undefined state.
32+
(AND <t> x:(MOVDconst [m]) n) && t.Size() == 4 && isPPC64WordRotateMask(m) => (RLWINM [encodePPC64RotateMask(0,m,32)] n)

src/cmd/compile/internal/ssa/rewrite.go

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1499,6 +1499,25 @@ func encodePPC64RotateMask(rotate, mask, nbits int64) int64 {
14991499
return int64(me) | int64(mb<<8) | int64(rotate<<16) | int64(nbits<<24)
15001500
}
15011501

1502+
// Merge (RLDICL [encoded] (SRDconst [s] x)) into (RLDICL [new_encoded] x)
1503+
// SRDconst on PPC64 is an extended mnemonic of RLDICL. If the input to an
1504+
// RLDICL is an SRDconst, and the RLDICL does not rotate its value, the two
1505+
// operations can be combined. This functions assumes the two opcodes can
1506+
// be merged, and returns an encoded rotate+mask value of the combined RLDICL.
1507+
func mergePPC64RLDICLandSRDconst(encoded, s int64) int64 {
1508+
mb := s
1509+
r := 64 - s
1510+
// A larger mb is a smaller mask.
1511+
if (encoded>>8)&0xFF < mb {
1512+
encoded = (encoded &^ 0xFF00) | mb<<8
1513+
}
1514+
// The rotate is expected to be 0.
1515+
if (encoded & 0xFF0000) != 0 {
1516+
panic("non-zero rotate")
1517+
}
1518+
return encoded | r<<16
1519+
}
1520+
15021521
// DecodePPC64RotateMask is the inverse operation of encodePPC64RotateMask. The values returned as
15031522
// mb and me satisfy the POWER ISA definition of MASK(x,y) where MASK(mb,me) = mask.
15041523
func DecodePPC64RotateMask(sauxint int64) (rotate, mb, me int64, mask uint64) {

src/cmd/compile/internal/ssa/rewritePPC64latelower.go

Lines changed: 123 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

test/codegen/bits.go

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -394,3 +394,29 @@ func zeroextendAndMask8to64(a int8, b int16) (x, y uint64) {
394394
return
395395

396396
}
397+
398+
// Verify rotate and mask instructions, and further simplified instructions for small types
399+
func bitRotateAndMask(io64 [4]uint64, io32 [4]uint32, io16 [4]uint16, io8 [4]uint8) {
400+
// ppc64x: "RLDICR\t[$]0, R[0-9]*, [$]47, R"
401+
io64[0] = io64[0] & 0xFFFFFFFFFFFF0000
402+
// ppc64x: "RLDICL\t[$]0, R[0-9]*, [$]16, R"
403+
io64[1] = io64[1] & 0x0000FFFFFFFFFFFF
404+
// ppc64x: -"SRD", -"AND", "RLDICL\t[$]60, R[0-9]*, [$]16, R"
405+
io64[2] = (io64[2] >> 4) & 0x0000FFFFFFFFFFFF
406+
// ppc64x: -"SRD", -"AND", "RLDICL\t[$]36, R[0-9]*, [$]28, R"
407+
io64[3] = (io64[3] >> 28) & 0x0000FFFFFFFFFFFF
408+
409+
// ppc64x: "RLWNM\t[$]0, R[0-9]*, [$]4, [$]19, R"
410+
io32[0] = io32[0] & 0x0FFFF000
411+
// ppc64x: "RLWNM\t[$]0, R[0-9]*, [$]20, [$]3, R"
412+
io32[1] = io32[1] & 0xF0000FFF
413+
// ppc64x: -"RLWNM", MOVD, AND
414+
io32[2] = io32[2] & 0xFFFF0002
415+
416+
var bigc uint32 = 0x12345678
417+
// ppc64x: "ANDCC\t[$]22136"
418+
io16[0] = io16[0] & uint16(bigc)
419+
420+
// ppc64x: "ANDCC\t[$]120"
421+
io8[0] = io8[0] & uint8(bigc)
422+
}

0 commit comments

Comments
 (0)