Skip to content

Commit 828e70c

Browse files
jgreenhalgh-armJames Greenhalgh
authored andcommitted
[AArch64] Improve arm_neon.h vml<as>_lane handling.
gcc/ * config/aarch64/aarch64-simd-builtins.def (fma): New. * config/aarch64/aarch64-simd.md (aarch64_mla_elt<mode>): New. (aarch64_mla_elt_<vswap_width_name><mode>): Likewise. (aarch64_mls_elt<mode>): Likewise. (aarch64_mls_elt_<vswap_width_name><mode>): Likewise. (aarch64_fma4_elt<mode>): Likewise. (aarch64_fma4_elt_<vswap_width_name><mode>): Likewise. (aarch64_fma4_elt_to_128v2df): Likewise. (aarch64_fma4_elt_to_64df): Likewise. (fnma<mode>4): Likewise. (aarch64_fnma4_elt<mode>): Likewise. (aarch64_fnma4_elt_<vswap_width_name><mode>): Likewise. (aarch64_fnma4_elt_to_128v2df): Likewise. (aarch64_fnma4_elt_to_64df): Likewise. * config/aarch64/iterators.md (VDQSF): New. * config/aarch64/arm_neon.h (vfm<as><sdq>_lane<q>_f<32, 64>): Convert to C implementation. (vml<sa><q>_lane<q>_<fsu><16, 32, 64>): Likewise. gcc/testsuite/ * gcc.target/aarch64/fmla-intrinsic.c: New. * gcc.target/aarch64/mla-intrinsic.c: Likewise. * gcc.target/aarch64/fmls-intrinsic.c: Likewise. * gcc.target/aarch64/mls-intrinsic.c: Likewise. From-SVN: r202625
1 parent 779aea4 commit 828e70c

File tree

10 files changed

+1231
-655
lines changed

10 files changed

+1231
-655
lines changed

gcc/ChangeLog

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,25 @@
1+
2013-09-16 James Greenhalgh <[email protected]>
2+
3+
* config/aarch64/aarch64-simd-builtins.def (fma): New.
4+
* config/aarch64/aarch64-simd.md
5+
(aarch64_mla_elt<mode>): New.
6+
(aarch64_mla_elt_<vswap_width_name><mode>): Likewise.
7+
(aarch64_mls_elt<mode>): Likewise.
8+
(aarch64_mls_elt_<vswap_width_name><mode>): Likewise.
9+
(aarch64_fma4_elt<mode>): Likewise.
10+
(aarch64_fma4_elt_<vswap_width_name><mode>): Likewise.
11+
(aarch64_fma4_elt_to_128v2df): Likewise.
12+
(aarch64_fma4_elt_to_64df): Likewise.
13+
(fnma<mode>4): Likewise.
14+
(aarch64_fnma4_elt<mode>): Likewise.
15+
(aarch64_fnma4_elt_<vswap_width_name><mode>): Likewise.
16+
(aarch64_fnma4_elt_to_128v2df): Likewise.
17+
(aarch64_fnma4_elt_to_64df): Likewise.
18+
* config/aarch64/iterators.md (VDQSF): New.
19+
* config/aarch64/arm_neon.h
20+
(vfm<as><sdq>_lane<q>_f<32, 64>): Convert to C implementation.
21+
(vml<sa><q>_lane<q>_<fsu><16, 32, 64>): Likewise.
22+
123
2013-09-16 James Greenhalgh <[email protected]>
224

325
* config/aarch64/aarch64-simd.md (aarch64_mul3_elt<mode>): New.

gcc/config/aarch64/aarch64-simd-builtins.def

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -359,3 +359,6 @@
359359
/* Implemented by aarch64_st1<VALL:mode>. */
360360
BUILTIN_VALL (STORE1, st1, 0)
361361

362+
/* Implemented by fma<mode>4. */
363+
BUILTIN_VDQF (TERNOP, fma, 4)
364+

gcc/config/aarch64/aarch64-simd.md

Lines changed: 195 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1070,6 +1070,38 @@
10701070
(set_attr "simd_mode" "<MODE>")]
10711071
)
10721072

1073+
(define_insn "*aarch64_mla_elt<mode>"
1074+
[(set (match_operand:VDQHS 0 "register_operand" "=w")
1075+
(plus:VDQHS
1076+
(mult:VDQHS
1077+
(vec_duplicate:VDQHS
1078+
(vec_select:<VEL>
1079+
(match_operand:VDQHS 1 "register_operand" "<h_con>")
1080+
(parallel [(match_operand:SI 2 "immediate_operand")])))
1081+
(match_operand:VDQHS 3 "register_operand" "w"))
1082+
(match_operand:VDQHS 4 "register_operand" "0")))]
1083+
"TARGET_SIMD"
1084+
"mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]"
1085+
[(set_attr "simd_type" "simd_mla")
1086+
(set_attr "simd_mode" "<MODE>")]
1087+
)
1088+
1089+
(define_insn "*aarch64_mla_elt_<vswap_width_name><mode>"
1090+
[(set (match_operand:VDQHS 0 "register_operand" "=w")
1091+
(plus:VDQHS
1092+
(mult:VDQHS
1093+
(vec_duplicate:VDQHS
1094+
(vec_select:<VEL>
1095+
(match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1096+
(parallel [(match_operand:SI 2 "immediate_operand")])))
1097+
(match_operand:VDQHS 3 "register_operand" "w"))
1098+
(match_operand:VDQHS 4 "register_operand" "0")))]
1099+
"TARGET_SIMD"
1100+
"mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]"
1101+
[(set_attr "simd_type" "simd_mla")
1102+
(set_attr "simd_mode" "<MODE>")]
1103+
)
1104+
10731105
(define_insn "aarch64_mls<mode>"
10741106
[(set (match_operand:VQ_S 0 "register_operand" "=w")
10751107
(minus:VQ_S (match_operand:VQ_S 1 "register_operand" "0")
@@ -1081,6 +1113,38 @@
10811113
(set_attr "simd_mode" "<MODE>")]
10821114
)
10831115

1116+
(define_insn "*aarch64_mls_elt<mode>"
1117+
[(set (match_operand:VDQHS 0 "register_operand" "=w")
1118+
(minus:VDQHS
1119+
(match_operand:VDQHS 4 "register_operand" "0")
1120+
(mult:VDQHS
1121+
(vec_duplicate:VDQHS
1122+
(vec_select:<VEL>
1123+
(match_operand:VDQHS 1 "register_operand" "<h_con>")
1124+
(parallel [(match_operand:SI 2 "immediate_operand")])))
1125+
(match_operand:VDQHS 3 "register_operand" "w"))))]
1126+
"TARGET_SIMD"
1127+
"mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]"
1128+
[(set_attr "simd_type" "simd_mla")
1129+
(set_attr "simd_mode" "<MODE>")]
1130+
)
1131+
1132+
(define_insn "*aarch64_mls_elt_<vswap_width_name><mode>"
1133+
[(set (match_operand:VDQHS 0 "register_operand" "=w")
1134+
(minus:VDQHS
1135+
(match_operand:VDQHS 4 "register_operand" "0")
1136+
(mult:VDQHS
1137+
(vec_duplicate:VDQHS
1138+
(vec_select:<VEL>
1139+
(match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1140+
(parallel [(match_operand:SI 2 "immediate_operand")])))
1141+
(match_operand:VDQHS 3 "register_operand" "w"))))]
1142+
"TARGET_SIMD"
1143+
"mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]"
1144+
[(set_attr "simd_type" "simd_mla")
1145+
(set_attr "simd_mode" "<MODE>")]
1146+
)
1147+
10841148
;; Max/Min operations.
10851149
(define_insn "<su><maxmin><mode>3"
10861150
[(set (match_operand:VQ_S 0 "register_operand" "=w")
@@ -1483,6 +1547,137 @@
14831547
(set_attr "simd_mode" "<MODE>")]
14841548
)
14851549

1550+
(define_insn "*aarch64_fma4_elt<mode>"
1551+
[(set (match_operand:VDQF 0 "register_operand" "=w")
1552+
(fma:VDQF
1553+
(vec_duplicate:VDQF
1554+
(vec_select:<VEL>
1555+
(match_operand:VDQF 1 "register_operand" "<h_con>")
1556+
(parallel [(match_operand:SI 2 "immediate_operand")])))
1557+
(match_operand:VDQF 3 "register_operand" "w")
1558+
(match_operand:VDQF 4 "register_operand" "0")))]
1559+
"TARGET_SIMD"
1560+
"fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]"
1561+
[(set_attr "simd_type" "simd_fmla_elt")
1562+
(set_attr "simd_mode" "<MODE>")]
1563+
)
1564+
1565+
(define_insn "*aarch64_fma4_elt_<vswap_width_name><mode>"
1566+
[(set (match_operand:VDQSF 0 "register_operand" "=w")
1567+
(fma:VDQSF
1568+
(vec_duplicate:VDQSF
1569+
(vec_select:<VEL>
1570+
(match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1571+
(parallel [(match_operand:SI 2 "immediate_operand")])))
1572+
(match_operand:VDQSF 3 "register_operand" "w")
1573+
(match_operand:VDQSF 4 "register_operand" "0")))]
1574+
"TARGET_SIMD"
1575+
"fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]"
1576+
[(set_attr "simd_type" "simd_fmla_elt")
1577+
(set_attr "simd_mode" "<MODE>")]
1578+
)
1579+
1580+
(define_insn "*aarch64_fma4_elt_to_128df"
1581+
[(set (match_operand:V2DF 0 "register_operand" "=w")
1582+
(fma:V2DF
1583+
(vec_duplicate:V2DF
1584+
(match_operand:DF 1 "register_operand" "w"))
1585+
(match_operand:V2DF 2 "register_operand" "w")
1586+
(match_operand:V2DF 3 "register_operand" "0")))]
1587+
"TARGET_SIMD"
1588+
"fmla\\t%0.2d, %2.2d, %1.2d[0]"
1589+
[(set_attr "simd_type" "simd_fmla_elt")
1590+
(set_attr "simd_mode" "V2DF")]
1591+
)
1592+
1593+
(define_insn "*aarch64_fma4_elt_to_64v2df"
1594+
[(set (match_operand:DF 0 "register_operand" "=w")
1595+
(fma:DF
1596+
(vec_select:DF
1597+
(match_operand:V2DF 1 "register_operand" "w")
1598+
(parallel [(match_operand:SI 2 "immediate_operand")]))
1599+
(match_operand:DF 3 "register_operand" "w")
1600+
(match_operand:DF 4 "register_operand" "0")))]
1601+
"TARGET_SIMD"
1602+
"fmla\\t%0.2d, %3.2d, %1.2d[%2]"
1603+
[(set_attr "simd_type" "simd_fmla_elt")
1604+
(set_attr "simd_mode" "V2DF")]
1605+
)
1606+
1607+
(define_insn "fnma<mode>4"
1608+
[(set (match_operand:VDQF 0 "register_operand" "=w")
1609+
(fma:VDQF
1610+
(match_operand:VDQF 1 "register_operand" "w")
1611+
(neg:VDQF
1612+
(match_operand:VDQF 2 "register_operand" "w"))
1613+
(match_operand:VDQF 3 "register_operand" "0")))]
1614+
"TARGET_SIMD"
1615+
"fmls\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1616+
[(set_attr "simd_type" "simd_fmla")
1617+
(set_attr "simd_mode" "<MODE>")]
1618+
)
1619+
1620+
(define_insn "*aarch64_fnma4_elt<mode>"
1621+
[(set (match_operand:VDQF 0 "register_operand" "=w")
1622+
(fma:VDQF
1623+
(neg:VDQF
1624+
(match_operand:VDQF 3 "register_operand" "w"))
1625+
(vec_duplicate:VDQF
1626+
(vec_select:<VEL>
1627+
(match_operand:VDQF 1 "register_operand" "<h_con>")
1628+
(parallel [(match_operand:SI 2 "immediate_operand")])))
1629+
(match_operand:VDQF 4 "register_operand" "0")))]
1630+
"TARGET_SIMD"
1631+
"fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]"
1632+
[(set_attr "simd_type" "simd_fmla_elt")
1633+
(set_attr "simd_mode" "<MODE>")]
1634+
)
1635+
1636+
(define_insn "*aarch64_fnma4_elt_<vswap_width_name><mode>"
1637+
[(set (match_operand:VDQSF 0 "register_operand" "=w")
1638+
(fma:VDQSF
1639+
(neg:VDQSF
1640+
(match_operand:VDQSF 3 "register_operand" "w"))
1641+
(vec_duplicate:VDQSF
1642+
(vec_select:<VEL>
1643+
(match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1644+
(parallel [(match_operand:SI 2 "immediate_operand")])))
1645+
(match_operand:VDQSF 4 "register_operand" "0")))]
1646+
"TARGET_SIMD"
1647+
"fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]"
1648+
[(set_attr "simd_type" "simd_fmla_elt")
1649+
(set_attr "simd_mode" "<MODE>")]
1650+
)
1651+
1652+
(define_insn "*aarch64_fnma4_elt_to_128df"
1653+
[(set (match_operand:V2DF 0 "register_operand" "=w")
1654+
(fma:V2DF
1655+
(neg:V2DF
1656+
(match_operand:V2DF 2 "register_operand" "w"))
1657+
(vec_duplicate:V2DF
1658+
(match_operand:DF 1 "register_operand" "w"))
1659+
(match_operand:V2DF 3 "register_operand" "0")))]
1660+
"TARGET_SIMD"
1661+
"fmls\\t%0.2d, %2.2d, %1.2d[0]"
1662+
[(set_attr "simd_type" "simd_fmla_elt")
1663+
(set_attr "simd_mode" "V2DF")]
1664+
)
1665+
1666+
(define_insn "*aarch64_fnma4_elt_to_64v2df"
1667+
[(set (match_operand:DF 0 "register_operand" "=w")
1668+
(fma:DF
1669+
(vec_select:DF
1670+
(match_operand:V2DF 1 "register_operand" "w")
1671+
(parallel [(match_operand:SI 2 "immediate_operand")]))
1672+
(neg:DF
1673+
(match_operand:DF 3 "register_operand" "w"))
1674+
(match_operand:DF 4 "register_operand" "0")))]
1675+
"TARGET_SIMD"
1676+
"fmls\\t%0.2d, %3.2d, %1.2d[%2]"
1677+
[(set_attr "simd_type" "simd_fmla_elt")
1678+
(set_attr "simd_mode" "V2DF")]
1679+
)
1680+
14861681
;; Vector versions of the floating-point frint patterns.
14871682
;; Expands to btrunc, ceil, floor, nearbyint, rint, round.
14881683
(define_insn "<frint_pattern><mode>2"

0 commit comments

Comments
 (0)