|
154 | 154 | ;; ---- [FP<-INT] Packs
|
155 | 155 | ;; ---- [FP<-INT] Unpacks
|
156 | 156 | ;; ---- [FP<-FP] Packs
|
| 157 | +;; ---- [FP<-FP] Truncating conversions |
157 | 158 | ;; ---- [FP<-FP] Packs (bfloat16)
|
158 | 159 | ;; ---- [FP<-FP] Unpacks
|
| 160 | +;; ---- [FP<-FP] Extending conversions |
159 | 161 | ;; ---- [PRED<-PRED] Packs
|
160 | 162 | ;; ---- [PRED<-PRED] Unpacks
|
161 | 163 | ;;
|
|
9524 | 9526 | ;; - FCVTZU
|
9525 | 9527 | ;; -------------------------------------------------------------------------
|
9526 | 9528 |
|
9527 |
| -;; Unpredicated conversion of floats to integers of the same size (HF to HI, |
9528 |
| -;; SF to SI or DF to DI). |
9529 |
| -(define_expand "<optab><mode><v_int_equiv>2" |
9530 |
| - [(set (match_operand:<V_INT_EQUIV> 0 "register_operand") |
9531 |
| - (unspec:<V_INT_EQUIV> |
| 9529 | +;; Unpredicated conversion of floats to integers of the same size or wider, |
| 9530 | +;; excluding conversions from DF (see below). |
| 9531 | +(define_expand "<optab><SVE_HSF:mode><SVE_HSDI:mode>2" |
| 9532 | + [(set (match_operand:SVE_HSDI 0 "register_operand") |
| 9533 | + (unspec:SVE_HSDI |
| 9534 | + [(match_dup 2) |
| 9535 | + (match_dup 3) |
| 9536 | + (match_operand:SVE_HSF 1 "register_operand")] |
| 9537 | + SVE_COND_FCVTI))] |
| 9538 | + "TARGET_SVE |
| 9539 | + && (~(<SVE_HSDI:self_mask> | <SVE_HSDI:narrower_mask>) & <SVE_HSF:self_mask>) == 0" |
| 9540 | + { |
| 9541 | + operands[2] = aarch64_sve_fp_pred (<SVE_HSDI:MODE>mode, &operands[3]); |
| 9542 | + } |
| 9543 | +) |
| 9544 | + |
| 9545 | +;; SI <- DF can't use SI <- trunc (DI <- DF) without -ffast-math, so this |
| 9546 | +;; truncating variant of FCVTZ{S,U} is useful for auto-vectorization. |
| 9547 | +;; |
| 9548 | +;; DF is the only source mode for which the mask used above doesn't apply, |
| 9549 | +;; we define a separate pattern for it here. |
| 9550 | +(define_expand "<optab><VNx2DF_ONLY:mode><SVE_2SDI:mode>2" |
| 9551 | + [(set (match_operand:SVE_2SDI 0 "register_operand") |
| 9552 | + (unspec:SVE_2SDI |
9532 | 9553 | [(match_dup 2)
|
9533 | 9554 | (const_int SVE_RELAXED_GP)
|
9534 |
| - (match_operand:SVE_FULL_F 1 "register_operand")] |
| 9555 | + (match_operand:VNx2DF_ONLY 1 "register_operand")] |
9535 | 9556 | SVE_COND_FCVTI))]
|
9536 | 9557 | "TARGET_SVE"
|
9537 | 9558 | {
|
9538 |
| - operands[2] = aarch64_ptrue_reg (<VPRED>mode); |
| 9559 | + operands[2] = aarch64_ptrue_reg (VNx2BImode); |
9539 | 9560 | }
|
9540 | 9561 | )
|
9541 | 9562 |
|
|
9554 | 9575 | }
|
9555 | 9576 | )
|
9556 | 9577 |
|
9557 |
| -;; Predicated narrowing float-to-integer conversion. |
9558 |
| -(define_insn "@aarch64_sve_<optab>_trunc<VNx2DF_ONLY:mode><VNx4SI_ONLY:mode>" |
9559 |
| - [(set (match_operand:VNx4SI_ONLY 0 "register_operand") |
9560 |
| - (unspec:VNx4SI_ONLY |
| 9578 | +;; As above, for pairs used by the auto-vectorizer only. |
| 9579 | +(define_insn "*aarch64_sve_<optab>_nontrunc<SVE_PARTIAL_F:mode><SVE_HSDI:mode>" |
| 9580 | + [(set (match_operand:SVE_HSDI 0 "register_operand") |
| 9581 | + (unspec:SVE_HSDI |
| 9582 | + [(match_operand:<SVE_HSDI:VPRED> 1 "aarch64_predicate_operand") |
| 9583 | + (match_operand:SI 3 "aarch64_sve_gp_strictness") |
| 9584 | + (match_operand:SVE_PARTIAL_F 2 "register_operand")] |
| 9585 | + SVE_COND_FCVTI))] |
| 9586 | + "TARGET_SVE |
| 9587 | + && (~(<SVE_HSDI:self_mask> | <SVE_HSDI:narrower_mask>) & <SVE_PARTIAL_F:self_mask>) == 0" |
| 9588 | + {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ] |
| 9589 | + [ w , Upl , 0 ; * ] fcvtz<su>\t%0.<SVE_HSDI:Vetype>, %1/m, %2.<SVE_PARTIAL_F:Vetype> |
| 9590 | + [ ?&w , Upl , w ; yes ] movprfx\t%0, %2\;fcvtz<su>\t%0.<SVE_HSDI:Vetype>, %1/m, %2.<SVE_PARTIAL_F:Vetype> |
| 9591 | + } |
| 9592 | +) |
| 9593 | + |
| 9594 | +;; Predicated narrowing float-to-integer conversion. The VNx2DF->VNx4SI |
| 9595 | +;; variant is provided for the ACLE, where the zeroed odd-indexed lanes are |
| 9596 | +;; significant. The VNx2DF->VNx2SI variant is provided for auto-vectorization, |
| 9597 | +;; where the upper 32 bits of each container are ignored. |
| 9598 | +(define_insn "@aarch64_sve_<optab>_trunc<VNx2DF_ONLY:mode><SVE_SI:mode>" |
| 9599 | + [(set (match_operand:SVE_SI 0 "register_operand") |
| 9600 | + (unspec:SVE_SI |
9561 | 9601 | [(match_operand:VNx2BI 1 "register_operand")
|
9562 | 9602 | (match_operand:SI 3 "aarch64_sve_gp_strictness")
|
9563 | 9603 | (match_operand:VNx2DF_ONLY 2 "register_operand")]
|
9564 | 9604 | SVE_COND_FCVTI))]
|
9565 | 9605 | "TARGET_SVE"
|
9566 | 9606 | {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
|
9567 |
| - [ w , Upl , 0 ; * ] fcvtz<su>\t%0.<VNx4SI_ONLY:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype> |
9568 |
| - [ ?&w , Upl , w ; yes ] movprfx\t%0, %2\;fcvtz<su>\t%0.<VNx4SI_ONLY:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype> |
| 9607 | + [ w , Upl , 0 ; * ] fcvtz<su>\t%0.<SVE_SI:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype> |
| 9608 | + [ ?&w , Upl , w ; yes ] movprfx\t%0, %2\;fcvtz<su>\t%0.<SVE_SI:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype> |
9569 | 9609 | }
|
9570 | 9610 | )
|
9571 | 9611 |
|
|
9710 | 9750 | ;; - UCVTF
|
9711 | 9751 | ;; -------------------------------------------------------------------------
|
9712 | 9752 |
|
9713 |
| -;; Unpredicated conversion of integers to floats of the same size |
9714 |
| -;; (HI to HF, SI to SF or DI to DF). |
9715 |
| -(define_expand "<optab><v_int_equiv><mode>2" |
9716 |
| - [(set (match_operand:SVE_FULL_F 0 "register_operand") |
9717 |
| - (unspec:SVE_FULL_F |
| 9753 | +;; Unpredicated conversion of integers to floats of the same size or |
| 9754 | +;; narrower. |
| 9755 | +(define_expand "<optab><SVE_HSDI:mode><SVE_F:mode>2" |
| 9756 | + [(set (match_operand:SVE_F 0 "register_operand") |
| 9757 | + (unspec:SVE_F |
9718 | 9758 | [(match_dup 2)
|
9719 |
| - (const_int SVE_RELAXED_GP) |
9720 |
| - (match_operand:<V_INT_EQUIV> 1 "register_operand")] |
| 9759 | + (match_dup 3) |
| 9760 | + (match_operand:SVE_HSDI 1 "register_operand")] |
9721 | 9761 | SVE_COND_ICVTF))]
|
9722 |
| - "TARGET_SVE" |
| 9762 | + "TARGET_SVE |
| 9763 | + && (~(<SVE_HSDI:self_mask> | <SVE_HSDI:narrower_mask>) & <SVE_F:self_mask>) == 0" |
9723 | 9764 | {
|
9724 |
| - operands[2] = aarch64_ptrue_reg (<VPRED>mode); |
| 9765 | + operands[2] = aarch64_sve_fp_pred (<SVE_HSDI:MODE>mode, &operands[3]); |
9725 | 9766 | }
|
9726 | 9767 | )
|
9727 | 9768 |
|
|
9741 | 9782 | }
|
9742 | 9783 | )
|
9743 | 9784 |
|
| 9785 | +;; As above, for pairs that are used by the auto-vectorizer only. |
| 9786 | +(define_insn "*aarch64_sve_<optab>_nonextend<SVE_HSDI:mode><SVE_PARTIAL_F:mode>" |
| 9787 | + [(set (match_operand:SVE_PARTIAL_F 0 "register_operand") |
| 9788 | + (unspec:SVE_PARTIAL_F |
| 9789 | + [(match_operand:<SVE_HSDI:VPRED> 1 "aarch64_predicate_operand") |
| 9790 | + (match_operand:SI 3 "aarch64_sve_gp_strictness") |
| 9791 | + (match_operand:SVE_HSDI 2 "register_operand")] |
| 9792 | + SVE_COND_ICVTF))] |
| 9793 | + "TARGET_SVE |
| 9794 | + && (~(<SVE_HSDI:self_mask> | <SVE_HSDI:narrower_mask>) & <SVE_PARTIAL_F:self_mask>) == 0" |
| 9795 | + {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ] |
| 9796 | + [ w , Upl , 0 ; * ] <su>cvtf\t%0.<SVE_PARTIAL_F:Vetype>, %1/m, %2.<SVE_HSDI:Vetype> |
| 9797 | + [ ?&w , Upl , w ; yes ] movprfx\t%0, %2\;<su>cvtf\t%0.<SVE_PARTIAL_F:Vetype>, %1/m, %2.<SVE_HSDI:Vetype> |
| 9798 | + } |
| 9799 | +) |
| 9800 | + |
9744 | 9801 | ;; Predicated widening integer-to-float conversion.
|
9745 | 9802 | (define_insn "@aarch64_sve_<optab>_extend<VNx4SI_ONLY:mode><VNx2DF_ONLY:mode>"
|
9746 | 9803 | [(set (match_operand:VNx2DF_ONLY 0 "register_operand")
|
|
9924 | 9981 | }
|
9925 | 9982 | )
|
9926 | 9983 |
|
| 9984 | +;; ------------------------------------------------------------------------- |
| 9985 | +;; ---- [FP<-FP] Truncating conversions |
| 9986 | +;; ------------------------------------------------------------------------- |
| 9987 | +;; Includes: |
| 9988 | +;; - FCVT |
| 9989 | +;; ------------------------------------------------------------------------- |
| 9990 | + |
| 9991 | +;; Unpredicated float-to-float truncation. |
| 9992 | +(define_expand "trunc<SVE_SDF:mode><SVE_PARTIAL_HSF:mode>2" |
| 9993 | + [(set (match_operand:SVE_PARTIAL_HSF 0 "register_operand") |
| 9994 | + (unspec:SVE_PARTIAL_HSF |
| 9995 | + [(match_dup 2) |
| 9996 | + (match_dup 3) |
| 9997 | + (match_operand:SVE_SDF 1 "register_operand")] |
| 9998 | + SVE_COND_FCVT))] |
| 9999 | + "TARGET_SVE && (~<SVE_SDF:narrower_mask> & <SVE_PARTIAL_HSF:self_mask>) == 0" |
| 10000 | + { |
| 10001 | + operands[2] = aarch64_sve_fp_pred (<SVE_SDF:MODE>mode, &operands[3]); |
| 10002 | + } |
| 10003 | +) |
| 10004 | + |
9927 | 10005 | ;; Predicated float-to-float truncation.
|
9928 | 10006 | (define_insn "@aarch64_sve_<optab>_trunc<SVE_FULL_SDF:mode><SVE_FULL_HSF:mode>"
|
9929 | 10007 | [(set (match_operand:SVE_FULL_HSF 0 "register_operand")
|
|
9939 | 10017 | }
|
9940 | 10018 | )
|
9941 | 10019 |
|
| 10020 | +;; As above, for pairs that are used by the auto-vectorizer only. |
| 10021 | +(define_insn "*aarch64_sve_<optab>_trunc<SVE_SDF:mode><SVE_PARTIAL_HSF:mode>" |
| 10022 | + [(set (match_operand:SVE_PARTIAL_HSF 0 "register_operand") |
| 10023 | + (unspec:SVE_PARTIAL_HSF |
| 10024 | + [(match_operand:<SVE_SDF:VPRED> 1 "aarch64_predicate_operand") |
| 10025 | + (match_operand:SI 3 "aarch64_sve_gp_strictness") |
| 10026 | + (match_operand:SVE_SDF 2 "register_operand")] |
| 10027 | + SVE_COND_FCVT))] |
| 10028 | + "TARGET_SVE && (~<SVE_SDF:narrower_mask> & <SVE_PARTIAL_HSF:self_mask>) == 0" |
| 10029 | + {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ] |
| 10030 | + [ w , Upl , 0 ; * ] fcvt\t%0.<SVE_PARTIAL_HSF:Vetype>, %1/m, %2.<SVE_SDF:Vetype> |
| 10031 | + [ ?&w , Upl , w ; yes ] movprfx\t%0, %2\;fcvt\t%0.<SVE_PARTIAL_HSF:Vetype>, %1/m, %2.<SVE_SDF:Vetype> |
| 10032 | + } |
| 10033 | +) |
| 10034 | + |
9942 | 10035 | ;; Predicated float-to-float truncation with merging.
|
9943 | 10036 | (define_expand "@cond_<optab>_trunc<SVE_FULL_SDF:mode><SVE_FULL_HSF:mode>"
|
9944 | 10037 | [(set (match_operand:SVE_FULL_HSF 0 "register_operand")
|
|
10081 | 10174 | }
|
10082 | 10175 | )
|
10083 | 10176 |
|
| 10177 | +;; ------------------------------------------------------------------------- |
| 10178 | +;; ---- [FP<-FP] Extending conversions |
| 10179 | +;; ------------------------------------------------------------------------- |
| 10180 | +;; Includes: |
| 10181 | +;; - FCVT |
| 10182 | +;; ------------------------------------------------------------------------- |
| 10183 | + |
| 10184 | +;; Unpredicated float-to-float extension. |
| 10185 | +(define_expand "extend<SVE_PARTIAL_HSF:mode><SVE_SDF:mode>2" |
| 10186 | + [(set (match_operand:SVE_SDF 0 "register_operand") |
| 10187 | + (unspec:SVE_SDF |
| 10188 | + [(match_dup 2) |
| 10189 | + (match_dup 3) |
| 10190 | + (match_operand:SVE_PARTIAL_HSF 1 "register_operand")] |
| 10191 | + SVE_COND_FCVT))] |
| 10192 | + "TARGET_SVE && (~<SVE_SDF:narrower_mask> & <SVE_PARTIAL_HSF:self_mask>) == 0" |
| 10193 | + { |
| 10194 | + operands[2] = aarch64_sve_fp_pred (<SVE_SDF:MODE>mode, &operands[3]); |
| 10195 | + } |
| 10196 | +) |
| 10197 | + |
10084 | 10198 | ;; Predicated float-to-float extension.
|
10085 | 10199 | (define_insn "@aarch64_sve_<optab>_nontrunc<SVE_FULL_HSF:mode><SVE_FULL_SDF:mode>"
|
10086 | 10200 | [(set (match_operand:SVE_FULL_SDF 0 "register_operand")
|
|
10096 | 10210 | }
|
10097 | 10211 | )
|
10098 | 10212 |
|
| 10213 | +;; As above, for pairs that are used by the auto-vectorizer only. |
| 10214 | +(define_insn "*aarch64_sve_<optab>_nontrunc<SVE_PARTIAL_HSF:mode><SVE_SDF:mode>" |
| 10215 | + [(set (match_operand:SVE_SDF 0 "register_operand") |
| 10216 | + (unspec:SVE_SDF |
| 10217 | + [(match_operand:<SVE_SDF:VPRED> 1 "aarch64_predicate_operand") |
| 10218 | + (match_operand:SI 3 "aarch64_sve_gp_strictness") |
| 10219 | + (match_operand:SVE_PARTIAL_HSF 2 "register_operand")] |
| 10220 | + SVE_COND_FCVT))] |
| 10221 | + "TARGET_SVE && (~<SVE_SDF:narrower_mask> & <SVE_PARTIAL_HSF:self_mask>) == 0" |
| 10222 | + {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ] |
| 10223 | + [ w , Upl , 0 ; * ] fcvt\t%0.<SVE_SDF:Vetype>, %1/m, %2.<SVE_PARTIAL_HSF:Vetype> |
| 10224 | + [ ?&w , Upl , w ; yes ] movprfx\t%0, %2\;fcvt\t%0.<SVE_SDF:Vetype>, %1/m, %2.<SVE_PARTIAL_HSF:Vetype> |
| 10225 | + } |
| 10226 | +) |
| 10227 | + |
10099 | 10228 | ;; Predicated float-to-float extension with merging.
|
10100 | 10229 | (define_expand "@cond_<optab>_nontrunc<SVE_FULL_HSF:mode><SVE_FULL_SDF:mode>"
|
10101 | 10230 | [(set (match_operand:SVE_FULL_SDF 0 "register_operand")
|
|
0 commit comments