@@ -1285,10 +1285,11 @@ define <vscale x 4 x i32> @reverse_nxv4i32(<vscale x 4 x i32> %a) {
1285
1285
; CHECK-NEXT: csrr a0, vlenb
1286
1286
; CHECK-NEXT: srli a0, a0, 1
1287
1287
; CHECK-NEXT: addi a0, a0, -1
1288
- ; CHECK-NEXT: vsetvli a1, zero, e32, m2 , ta, ma
1288
+ ; CHECK-NEXT: vsetvli a1, zero, e16, m1 , ta, ma
1289
1289
; CHECK-NEXT: vid.v v10
1290
1290
; CHECK-NEXT: vrsub.vx v12, v10, a0
1291
- ; CHECK-NEXT: vrgather.vv v10, v8, v12
1291
+ ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
1292
+ ; CHECK-NEXT: vrgatherei16.vv v10, v8, v12
1292
1293
; CHECK-NEXT: vmv.v.v v8, v10
1293
1294
; CHECK-NEXT: ret
1294
1295
%res = call <vscale x 4 x i32 > @llvm.vector.reverse.nxv4i32 (<vscale x 4 x i32 > %a )
@@ -1300,10 +1301,11 @@ define <vscale x 8 x i32> @reverse_nxv8i32(<vscale x 8 x i32> %a) {
1300
1301
; CHECK: # %bb.0:
1301
1302
; CHECK-NEXT: csrr a0, vlenb
1302
1303
; CHECK-NEXT: addi a0, a0, -1
1303
- ; CHECK-NEXT: vsetvli a1, zero, e32, m4 , ta, ma
1304
+ ; CHECK-NEXT: vsetvli a1, zero, e16, m2 , ta, ma
1304
1305
; CHECK-NEXT: vid.v v12
1305
1306
; CHECK-NEXT: vrsub.vx v16, v12, a0
1306
- ; CHECK-NEXT: vrgather.vv v12, v8, v16
1307
+ ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
1308
+ ; CHECK-NEXT: vrgatherei16.vv v12, v8, v16
1307
1309
; CHECK-NEXT: vmv.v.v v8, v12
1308
1310
; CHECK-NEXT: ret
1309
1311
%res = call <vscale x 8 x i32 > @llvm.vector.reverse.nxv8i32 (<vscale x 8 x i32 > %a )
@@ -1316,10 +1318,11 @@ define <vscale x 16 x i32> @reverse_nxv16i32(<vscale x 16 x i32> %a) {
1316
1318
; CHECK-NEXT: csrr a0, vlenb
1317
1319
; CHECK-NEXT: slli a0, a0, 1
1318
1320
; CHECK-NEXT: addi a0, a0, -1
1319
- ; CHECK-NEXT: vsetvli a1, zero, e32, m8 , ta, ma
1321
+ ; CHECK-NEXT: vsetvli a1, zero, e16, m4 , ta, ma
1320
1322
; CHECK-NEXT: vid.v v16
1321
1323
; CHECK-NEXT: vrsub.vx v24, v16, a0
1322
- ; CHECK-NEXT: vrgather.vv v16, v8, v24
1324
+ ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
1325
+ ; CHECK-NEXT: vrgatherei16.vv v16, v8, v24
1323
1326
; CHECK-NEXT: vmv.v.v v8, v16
1324
1327
; CHECK-NEXT: ret
1325
1328
%res = call <vscale x 16 x i32 > @llvm.vector.reverse.nxv16i32 (<vscale x 16 x i32 > %a )
@@ -1348,10 +1351,11 @@ define <vscale x 2 x i64> @reverse_nxv2i64(<vscale x 2 x i64> %a) {
1348
1351
; CHECK-NEXT: csrr a0, vlenb
1349
1352
; CHECK-NEXT: srli a0, a0, 2
1350
1353
; CHECK-NEXT: addi a0, a0, -1
1351
- ; CHECK-NEXT: vsetvli a1, zero, e64, m2 , ta, ma
1354
+ ; CHECK-NEXT: vsetvli a1, zero, e16, mf2 , ta, ma
1352
1355
; CHECK-NEXT: vid.v v10
1353
1356
; CHECK-NEXT: vrsub.vx v12, v10, a0
1354
- ; CHECK-NEXT: vrgather.vv v10, v8, v12
1357
+ ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
1358
+ ; CHECK-NEXT: vrgatherei16.vv v10, v8, v12
1355
1359
; CHECK-NEXT: vmv.v.v v8, v10
1356
1360
; CHECK-NEXT: ret
1357
1361
%res = call <vscale x 2 x i64 > @llvm.vector.reverse.nxv2i64 (<vscale x 2 x i64 > %a )
@@ -1364,10 +1368,11 @@ define <vscale x 4 x i64> @reverse_nxv4i64(<vscale x 4 x i64> %a) {
1364
1368
; CHECK-NEXT: csrr a0, vlenb
1365
1369
; CHECK-NEXT: srli a0, a0, 1
1366
1370
; CHECK-NEXT: addi a0, a0, -1
1367
- ; CHECK-NEXT: vsetvli a1, zero, e64, m4 , ta, ma
1371
+ ; CHECK-NEXT: vsetvli a1, zero, e16, m1 , ta, ma
1368
1372
; CHECK-NEXT: vid.v v12
1369
1373
; CHECK-NEXT: vrsub.vx v16, v12, a0
1370
- ; CHECK-NEXT: vrgather.vv v12, v8, v16
1374
+ ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma
1375
+ ; CHECK-NEXT: vrgatherei16.vv v12, v8, v16
1371
1376
; CHECK-NEXT: vmv.v.v v8, v12
1372
1377
; CHECK-NEXT: ret
1373
1378
%res = call <vscale x 4 x i64 > @llvm.vector.reverse.nxv4i64 (<vscale x 4 x i64 > %a )
@@ -1379,10 +1384,11 @@ define <vscale x 8 x i64> @reverse_nxv8i64(<vscale x 8 x i64> %a) {
1379
1384
; CHECK: # %bb.0:
1380
1385
; CHECK-NEXT: csrr a0, vlenb
1381
1386
; CHECK-NEXT: addi a0, a0, -1
1382
- ; CHECK-NEXT: vsetvli a1, zero, e64, m8 , ta, ma
1387
+ ; CHECK-NEXT: vsetvli a1, zero, e16, m2 , ta, ma
1383
1388
; CHECK-NEXT: vid.v v16
1384
1389
; CHECK-NEXT: vrsub.vx v24, v16, a0
1385
- ; CHECK-NEXT: vrgather.vv v16, v8, v24
1390
+ ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
1391
+ ; CHECK-NEXT: vrgatherei16.vv v16, v8, v24
1386
1392
; CHECK-NEXT: vmv.v.v v8, v16
1387
1393
; CHECK-NEXT: ret
1388
1394
%res = call <vscale x 8 x i64 > @llvm.vector.reverse.nxv8i64 (<vscale x 8 x i64 > %a )
@@ -1526,10 +1532,11 @@ define <vscale x 4 x float> @reverse_nxv4f32(<vscale x 4 x float> %a) {
1526
1532
; CHECK-NEXT: csrr a0, vlenb
1527
1533
; CHECK-NEXT: srli a0, a0, 1
1528
1534
; CHECK-NEXT: addi a0, a0, -1
1529
- ; CHECK-NEXT: vsetvli a1, zero, e32, m2 , ta, ma
1535
+ ; CHECK-NEXT: vsetvli a1, zero, e16, m1 , ta, ma
1530
1536
; CHECK-NEXT: vid.v v10
1531
1537
; CHECK-NEXT: vrsub.vx v12, v10, a0
1532
- ; CHECK-NEXT: vrgather.vv v10, v8, v12
1538
+ ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
1539
+ ; CHECK-NEXT: vrgatherei16.vv v10, v8, v12
1533
1540
; CHECK-NEXT: vmv.v.v v8, v10
1534
1541
; CHECK-NEXT: ret
1535
1542
%res = call <vscale x 4 x float > @llvm.vector.reverse.nxv4f32 (<vscale x 4 x float > %a )
@@ -1541,10 +1548,11 @@ define <vscale x 8 x float> @reverse_nxv8f32(<vscale x 8 x float> %a) {
1541
1548
; CHECK: # %bb.0:
1542
1549
; CHECK-NEXT: csrr a0, vlenb
1543
1550
; CHECK-NEXT: addi a0, a0, -1
1544
- ; CHECK-NEXT: vsetvli a1, zero, e32, m4 , ta, ma
1551
+ ; CHECK-NEXT: vsetvli a1, zero, e16, m2 , ta, ma
1545
1552
; CHECK-NEXT: vid.v v12
1546
1553
; CHECK-NEXT: vrsub.vx v16, v12, a0
1547
- ; CHECK-NEXT: vrgather.vv v12, v8, v16
1554
+ ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
1555
+ ; CHECK-NEXT: vrgatherei16.vv v12, v8, v16
1548
1556
; CHECK-NEXT: vmv.v.v v8, v12
1549
1557
; CHECK-NEXT: ret
1550
1558
%res = call <vscale x 8 x float > @llvm.vector.reverse.nxv8f32 (<vscale x 8 x float > %a )
@@ -1557,10 +1565,11 @@ define <vscale x 16 x float> @reverse_nxv16f32(<vscale x 16 x float> %a) {
1557
1565
; CHECK-NEXT: csrr a0, vlenb
1558
1566
; CHECK-NEXT: slli a0, a0, 1
1559
1567
; CHECK-NEXT: addi a0, a0, -1
1560
- ; CHECK-NEXT: vsetvli a1, zero, e32, m8 , ta, ma
1568
+ ; CHECK-NEXT: vsetvli a1, zero, e16, m4 , ta, ma
1561
1569
; CHECK-NEXT: vid.v v16
1562
1570
; CHECK-NEXT: vrsub.vx v24, v16, a0
1563
- ; CHECK-NEXT: vrgather.vv v16, v8, v24
1571
+ ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
1572
+ ; CHECK-NEXT: vrgatherei16.vv v16, v8, v24
1564
1573
; CHECK-NEXT: vmv.v.v v8, v16
1565
1574
; CHECK-NEXT: ret
1566
1575
%res = call <vscale x 16 x float > @llvm.vector.reverse.nxv16f32 (<vscale x 16 x float > %a )
@@ -1589,10 +1598,11 @@ define <vscale x 2 x double> @reverse_nxv2f64(<vscale x 2 x double> %a) {
1589
1598
; CHECK-NEXT: csrr a0, vlenb
1590
1599
; CHECK-NEXT: srli a0, a0, 2
1591
1600
; CHECK-NEXT: addi a0, a0, -1
1592
- ; CHECK-NEXT: vsetvli a1, zero, e64, m2 , ta, ma
1601
+ ; CHECK-NEXT: vsetvli a1, zero, e16, mf2 , ta, ma
1593
1602
; CHECK-NEXT: vid.v v10
1594
1603
; CHECK-NEXT: vrsub.vx v12, v10, a0
1595
- ; CHECK-NEXT: vrgather.vv v10, v8, v12
1604
+ ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
1605
+ ; CHECK-NEXT: vrgatherei16.vv v10, v8, v12
1596
1606
; CHECK-NEXT: vmv.v.v v8, v10
1597
1607
; CHECK-NEXT: ret
1598
1608
%res = call <vscale x 2 x double > @llvm.vector.reverse.nxv2f64 (<vscale x 2 x double > %a )
@@ -1605,10 +1615,11 @@ define <vscale x 4 x double> @reverse_nxv4f64(<vscale x 4 x double> %a) {
1605
1615
; CHECK-NEXT: csrr a0, vlenb
1606
1616
; CHECK-NEXT: srli a0, a0, 1
1607
1617
; CHECK-NEXT: addi a0, a0, -1
1608
- ; CHECK-NEXT: vsetvli a1, zero, e64, m4 , ta, ma
1618
+ ; CHECK-NEXT: vsetvli a1, zero, e16, m1 , ta, ma
1609
1619
; CHECK-NEXT: vid.v v12
1610
1620
; CHECK-NEXT: vrsub.vx v16, v12, a0
1611
- ; CHECK-NEXT: vrgather.vv v12, v8, v16
1621
+ ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma
1622
+ ; CHECK-NEXT: vrgatherei16.vv v12, v8, v16
1612
1623
; CHECK-NEXT: vmv.v.v v8, v12
1613
1624
; CHECK-NEXT: ret
1614
1625
%res = call <vscale x 4 x double > @llvm.vector.reverse.nxv4f64 (<vscale x 4 x double > %a )
@@ -1620,10 +1631,11 @@ define <vscale x 8 x double> @reverse_nxv8f64(<vscale x 8 x double> %a) {
1620
1631
; CHECK: # %bb.0:
1621
1632
; CHECK-NEXT: csrr a0, vlenb
1622
1633
; CHECK-NEXT: addi a0, a0, -1
1623
- ; CHECK-NEXT: vsetvli a1, zero, e64, m8 , ta, ma
1634
+ ; CHECK-NEXT: vsetvli a1, zero, e16, m2 , ta, ma
1624
1635
; CHECK-NEXT: vid.v v16
1625
1636
; CHECK-NEXT: vrsub.vx v24, v16, a0
1626
- ; CHECK-NEXT: vrgather.vv v16, v8, v24
1637
+ ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
1638
+ ; CHECK-NEXT: vrgatherei16.vv v16, v8, v24
1627
1639
; CHECK-NEXT: vmv.v.v v8, v16
1628
1640
; CHECK-NEXT: ret
1629
1641
%res = call <vscale x 8 x double > @llvm.vector.reverse.nxv8f64 (<vscale x 8 x double > %a )
@@ -1638,10 +1650,11 @@ define <vscale x 3 x i64> @reverse_nxv3i64(<vscale x 3 x i64> %a) {
1638
1650
; CHECK-NEXT: csrr a0, vlenb
1639
1651
; CHECK-NEXT: srli a0, a0, 1
1640
1652
; CHECK-NEXT: addi a0, a0, -1
1641
- ; CHECK-NEXT: vsetvli a1, zero, e64, m4 , ta, ma
1653
+ ; CHECK-NEXT: vsetvli a1, zero, e16, m1 , ta, ma
1642
1654
; CHECK-NEXT: vid.v v12
1643
1655
; CHECK-NEXT: vrsub.vx v12, v12, a0
1644
- ; CHECK-NEXT: vrgather.vv v16, v8, v12
1656
+ ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma
1657
+ ; CHECK-NEXT: vrgatherei16.vv v16, v8, v12
1645
1658
; CHECK-NEXT: vmv1r.v v8, v17
1646
1659
; CHECK-NEXT: vmv1r.v v9, v18
1647
1660
; CHECK-NEXT: vmv1r.v v10, v19
@@ -1655,10 +1668,11 @@ define <vscale x 6 x i64> @reverse_nxv6i64(<vscale x 6 x i64> %a) {
1655
1668
; CHECK: # %bb.0:
1656
1669
; CHECK-NEXT: csrr a0, vlenb
1657
1670
; CHECK-NEXT: addi a0, a0, -1
1658
- ; CHECK-NEXT: vsetvli a1, zero, e64, m8 , ta, ma
1671
+ ; CHECK-NEXT: vsetvli a1, zero, e16, m2 , ta, ma
1659
1672
; CHECK-NEXT: vid.v v16
1660
1673
; CHECK-NEXT: vrsub.vx v16, v16, a0
1661
- ; CHECK-NEXT: vrgather.vv v24, v8, v16
1674
+ ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
1675
+ ; CHECK-NEXT: vrgatherei16.vv v24, v8, v16
1662
1676
; CHECK-NEXT: vmv2r.v v8, v26
1663
1677
; CHECK-NEXT: vmv2r.v v10, v28
1664
1678
; CHECK-NEXT: vmv2r.v v12, v30
@@ -1684,12 +1698,13 @@ define <vscale x 12 x i64> @reverse_nxv12i64(<vscale x 12 x i64> %a) {
1684
1698
; RV32-NEXT: andi sp, sp, -64
1685
1699
; RV32-NEXT: csrr a0, vlenb
1686
1700
; RV32-NEXT: addi a1, a0, -1
1687
- ; RV32-NEXT: vsetvli a2, zero, e64, m8 , ta, ma
1701
+ ; RV32-NEXT: vsetvli a2, zero, e16, m2 , ta, ma
1688
1702
; RV32-NEXT: vid.v v24
1689
1703
; RV32-NEXT: vrsub.vx v24, v24, a1
1690
- ; RV32-NEXT: vrgather.vv v0, v16, v24
1704
+ ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
1705
+ ; RV32-NEXT: vrgatherei16.vv v0, v16, v24
1691
1706
; RV32-NEXT: vmv4r.v v16, v4
1692
- ; RV32-NEXT: vrgather .vv v0, v8, v24
1707
+ ; RV32-NEXT: vrgatherei16 .vv v0, v8, v24
1693
1708
; RV32-NEXT: vmv4r.v v20, v0
1694
1709
; RV32-NEXT: slli a0, a0, 3
1695
1710
; RV32-NEXT: addi a1, sp, 64
@@ -1720,12 +1735,13 @@ define <vscale x 12 x i64> @reverse_nxv12i64(<vscale x 12 x i64> %a) {
1720
1735
; RV64-NEXT: andi sp, sp, -64
1721
1736
; RV64-NEXT: csrr a0, vlenb
1722
1737
; RV64-NEXT: addi a1, a0, -1
1723
- ; RV64-NEXT: vsetvli a2, zero, e64, m8 , ta, ma
1738
+ ; RV64-NEXT: vsetvli a2, zero, e16, m2 , ta, ma
1724
1739
; RV64-NEXT: vid.v v24
1725
1740
; RV64-NEXT: vrsub.vx v24, v24, a1
1726
- ; RV64-NEXT: vrgather.vv v0, v16, v24
1741
+ ; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma
1742
+ ; RV64-NEXT: vrgatherei16.vv v0, v16, v24
1727
1743
; RV64-NEXT: vmv4r.v v16, v4
1728
- ; RV64-NEXT: vrgather .vv v0, v8, v24
1744
+ ; RV64-NEXT: vrgatherei16 .vv v0, v8, v24
1729
1745
; RV64-NEXT: vmv4r.v v20, v0
1730
1746
; RV64-NEXT: slli a0, a0, 3
1731
1747
; RV64-NEXT: addi a1, sp, 64
0 commit comments