Skip to content

Commit 9617da8

Browse files
authored
[RISCV] Use a ta vslideup if inserting over end of InterSubVT (#83230)
The description in #83146 is slightly inaccurate: it relaxes a tail undisturbed vslideup to tail agnostic if we are inserting over the entire tail of the vector **and** we didn't shrink the LMUL of the vector being inserted into. This handles the case where we did shrink down the LMUL via InterSubVT by checking if we inserted over the entire tail of InterSubVT, the actual type that we're performing the vslideup on, not VecVT.
1 parent f81d5e5 commit 9617da8

File tree

4 files changed

+14
-14
lines changed

4 files changed

+14
-14
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9732,9 +9732,9 @@ SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
97329732
ElementCount::getScalable(RemIdx) + SubVecVT.getVectorElementCount();
97339733
VL = computeVLMax(SubVecVT, DL, DAG);
97349734

9735-
// Use tail agnostic policy if we're inserting over Vec's tail.
9735+
// Use tail agnostic policy if we're inserting over InterSubVT's tail.
97369736
unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED;
9737-
if (EndIndex == VecVT.getVectorElementCount())
9737+
if (EndIndex == InterSubVT.getVectorElementCount())
97389738
Policy = RISCVII::TAIL_AGNOSTIC;
97399739

97409740
// If we're inserting into the lowest elements, use a tail undisturbed

llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -474,7 +474,7 @@ define <vscale x 6 x half> @extract_nxv6f16_nxv12f16_6(<vscale x 12 x half> %in)
474474
; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
475475
; CHECK-NEXT: vslidedown.vx v12, v9, a0
476476
; CHECK-NEXT: add a1, a0, a0
477-
; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, ma
477+
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
478478
; CHECK-NEXT: vslideup.vx v12, v10, a0
479479
; CHECK-NEXT: vmv2r.v v8, v12
480480
; CHECK-NEXT: ret

llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -227,7 +227,7 @@ define <vscale x 16 x i32> @insert_nxv16i32_nxv1i32_1(<vscale x 16 x i32> %vec,
227227
; CHECK-NEXT: csrr a0, vlenb
228228
; CHECK-NEXT: srli a0, a0, 3
229229
; CHECK-NEXT: add a1, a0, a0
230-
; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, ma
230+
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
231231
; CHECK-NEXT: vslideup.vx v8, v16, a0
232232
; CHECK-NEXT: ret
233233
%v = call <vscale x 16 x i32> @llvm.vector.insert.nxv1i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 1 x i32> %subvec, i64 1)
@@ -306,7 +306,7 @@ define <vscale x 16 x i8> @insert_nxv16i8_nxv1i8_7(<vscale x 16 x i8> %vec, <vsc
306306
; CHECK-NEXT: csrr a0, vlenb
307307
; CHECK-NEXT: srli a1, a0, 3
308308
; CHECK-NEXT: sub a1, a0, a1
309-
; CHECK-NEXT: vsetvli zero, a0, e8, m1, tu, ma
309+
; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
310310
; CHECK-NEXT: vslideup.vx v8, v10, a1
311311
; CHECK-NEXT: ret
312312
%v = call <vscale x 16 x i8> @llvm.vector.insert.nxv1i8.nxv16i8(<vscale x 16 x i8> %vec, <vscale x 1 x i8> %subvec, i64 7)
@@ -319,7 +319,7 @@ define <vscale x 16 x i8> @insert_nxv16i8_nxv1i8_15(<vscale x 16 x i8> %vec, <vs
319319
; CHECK-NEXT: csrr a0, vlenb
320320
; CHECK-NEXT: srli a1, a0, 3
321321
; CHECK-NEXT: sub a1, a0, a1
322-
; CHECK-NEXT: vsetvli zero, a0, e8, m1, tu, ma
322+
; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
323323
; CHECK-NEXT: vslideup.vx v9, v10, a1
324324
; CHECK-NEXT: ret
325325
%v = call <vscale x 16 x i8> @llvm.vector.insert.nxv1i8.nxv16i8(<vscale x 16 x i8> %vec, <vscale x 1 x i8> %subvec, i64 15)
@@ -344,7 +344,7 @@ define <vscale x 32 x half> @insert_nxv32f16_nxv2f16_2(<vscale x 32 x half> %vec
344344
; CHECK-NEXT: csrr a0, vlenb
345345
; CHECK-NEXT: srli a0, a0, 2
346346
; CHECK-NEXT: add a1, a0, a0
347-
; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, ma
347+
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
348348
; CHECK-NEXT: vslideup.vx v8, v16, a0
349349
; CHECK-NEXT: ret
350350
%v = call <vscale x 32 x half> @llvm.vector.insert.nxv2f16.nxv32f16(<vscale x 32 x half> %vec, <vscale x 2 x half> %subvec, i64 2)
@@ -357,7 +357,7 @@ define <vscale x 32 x half> @insert_nxv32f16_nxv2f16_26(<vscale x 32 x half> %ve
357357
; CHECK-NEXT: csrr a0, vlenb
358358
; CHECK-NEXT: srli a0, a0, 2
359359
; CHECK-NEXT: add a1, a0, a0
360-
; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, ma
360+
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
361361
; CHECK-NEXT: vslideup.vx v14, v16, a0
362362
; CHECK-NEXT: ret
363363
%v = call <vscale x 32 x half> @llvm.vector.insert.nxv2f16.nxv32f16(<vscale x 32 x half> %vec, <vscale x 2 x half> %subvec, i64 26)

llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -916,7 +916,7 @@ define half @vreduce_ord_fadd_nxv6f16(<vscale x 6 x half> %v, half %s) {
916916
; CHECK-NEXT: csrr a0, vlenb
917917
; CHECK-NEXT: srli a0, a0, 2
918918
; CHECK-NEXT: add a1, a0, a0
919-
; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, ma
919+
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
920920
; CHECK-NEXT: vslideup.vx v9, v10, a0
921921
; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
922922
; CHECK-NEXT: vfmv.s.f v10, fa0
@@ -938,11 +938,11 @@ define half @vreduce_ord_fadd_nxv10f16(<vscale x 10 x half> %v, half %s) {
938938
; CHECK-NEXT: csrr a0, vlenb
939939
; CHECK-NEXT: srli a0, a0, 2
940940
; CHECK-NEXT: add a1, a0, a0
941-
; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, ma
941+
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
942942
; CHECK-NEXT: vslideup.vx v10, v12, a0
943943
; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma
944944
; CHECK-NEXT: vmv.v.v v11, v12
945-
; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, ma
945+
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
946946
; CHECK-NEXT: vslideup.vx v11, v12, a0
947947
; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
948948
; CHECK-NEXT: vfmv.s.f v12, fa0
@@ -1002,7 +1002,7 @@ define half @vreduce_fadd_nxv6f16(<vscale x 6 x half> %v, half %s) {
10021002
; CHECK-NEXT: csrr a0, vlenb
10031003
; CHECK-NEXT: srli a0, a0, 2
10041004
; CHECK-NEXT: add a1, a0, a0
1005-
; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, ma
1005+
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
10061006
; CHECK-NEXT: vslideup.vx v9, v10, a0
10071007
; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
10081008
; CHECK-NEXT: vfmv.s.f v10, fa0
@@ -1025,11 +1025,11 @@ define half @vreduce_fmin_nxv10f16(<vscale x 10 x half> %v) {
10251025
; CHECK-NEXT: vlse16.v v12, (a1), zero
10261026
; CHECK-NEXT: srli a0, a0, 2
10271027
; CHECK-NEXT: add a1, a0, a0
1028-
; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, ma
1028+
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
10291029
; CHECK-NEXT: vslideup.vx v10, v12, a0
10301030
; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma
10311031
; CHECK-NEXT: vmv.v.v v11, v12
1032-
; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, ma
1032+
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
10331033
; CHECK-NEXT: vslideup.vx v11, v12, a0
10341034
; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
10351035
; CHECK-NEXT: vfredmin.vs v8, v8, v8

0 commit comments

Comments
 (0)