Skip to content

Commit 470145b

Browse files
authored
[mono][jit] Adding Vector128.GetElement as intrinsic in arm64. (#84929)
* Adding Vector128.GetElement. * Cases with 32b and smaller elements are now spill+load. * Addressed review comments. * Code cleanup.
1 parent 6887fc4 commit 470145b

File tree

3 files changed

+127
-3
lines changed

3 files changed

+127
-3
lines changed

src/mono/mono/mini/cpu-arm64.mdesc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -533,6 +533,8 @@ create_scalar_unsafe_int: dest:x src1:i len:4
533533
create_scalar_unsafe_float: dest:x src1:f len:4
534534
arm64_bic: dest:x src1:x src2:x len:4
535535
bitwise_select: dest:x src1:x src2:x src3:x len:12
536+
xextract_i8: dest:i src1:x src2:i len:16
537+
xextract_r8: dest:f src1:x src2:i len:16
536538
arm64_xtn: dest:x src1:x len:4
537539
arm64_xtn2: dest:x src1:x src2:x len:4 clob:1
538540
arm64_fcvtn: dest:x src1:x len:4

src/mono/mono/mini/mini-arm64.c

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -926,6 +926,42 @@ mono_arm_emit_ldrx (guint8 *code, int rt, int rn, int imm)
926926
return emit_ldrx (code, rt, rn, imm);
927927
}
928928

929+
static guint8*
930+
emit_xextract_i8 (guint8* code, int dreg, int sreg1, int sreg2)
931+
{
932+
guint8* ret = code;
933+
/* code: */
934+
arm_cbnzw (ret, sreg2, code + 12 /*upper*/);
935+
arm_neon_umov (ret, TYPE_I64, dreg, sreg1, 0);
936+
arm_b (ret, code + 16 /*done*/);
937+
/* upper: */
938+
arm_neon_umov (ret, TYPE_I64, dreg, sreg1, 1);
939+
/* done: */
940+
return ret;
941+
}
942+
943+
static guint8*
944+
emit_xextract_r8 (guint8* code, int dreg, int sreg1, int sreg2)
945+
{
946+
guint8* ret = code;
947+
948+
if (dreg == sreg1) {
949+
/* code: */
950+
arm_cbzw (ret, sreg2, code + 8 /*done*/);
951+
arm_neon_fdup_e (ret, VREG_FULL, TYPE_F64, dreg, sreg1, 1);
952+
/* done: */
953+
} else {
954+
/* code: */
955+
arm_cbnzw (ret, sreg2, code + 12 /*upper*/);
956+
arm_neon_fdup_e (ret, VREG_FULL, TYPE_F64, dreg, sreg1, 0);
957+
arm_b (ret, code + 16 /*done*/);
958+
/* upper: */
959+
arm_neon_fdup_e (ret, VREG_FULL, TYPE_F64, dreg, sreg1, 1);
960+
/* done: */
961+
}
962+
return ret;
963+
}
964+
929965
static guint8*
930966
emit_call (MonoCompile *cfg, guint8* code, MonoJumpInfoType patch_type, gconstpointer data)
931967
{
@@ -3829,6 +3865,22 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
38293865
arm_neon_fdup_e (code, VREG_FULL, t, dreg, sreg1, ins->inst_c0);
38303866
}
38313867
break;
3868+
3869+
case OP_XEXTRACT_I8:
3870+
code = emit_xextract_i8 (code, dreg, sreg1, sreg2);
3871+
break;
3872+
3873+
case OP_XEXTRACT_R8:
3874+
code = emit_xextract_r8 (code, dreg, sreg1, sreg2);
3875+
break;
3876+
3877+
case OP_XEXTRACT_I1:
3878+
case OP_XEXTRACT_I2:
3879+
case OP_XEXTRACT_I4:
3880+
case OP_XEXTRACT_R4:
3881+
g_assert_not_reached ();
3882+
break;
3883+
38323884
case OP_INSERT_I1:
38333885
case OP_INSERT_I2:
38343886
case OP_INSERT_I4:

src/mono/mono/mini/simd-intrinsics.c

Lines changed: 73 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -874,6 +874,38 @@ type_to_insert_op (MonoTypeEnum type)
874874
}
875875
}
876876

877+
static int
878+
type_to_width_log2 (MonoTypeEnum type)
879+
{
880+
switch (type) {
881+
case MONO_TYPE_I1:
882+
case MONO_TYPE_U1:
883+
return 0;
884+
case MONO_TYPE_I2:
885+
case MONO_TYPE_U2:
886+
return 1;
887+
case MONO_TYPE_I4:
888+
case MONO_TYPE_U4:
889+
return 2;
890+
case MONO_TYPE_I8:
891+
case MONO_TYPE_U8:
892+
return 3;
893+
case MONO_TYPE_R4:
894+
return 2;
895+
case MONO_TYPE_R8:
896+
return 3;
897+
case MONO_TYPE_I:
898+
case MONO_TYPE_U:
899+
#if TARGET_SIZEOF_VOID_P == 8
900+
return 3;
901+
#else
902+
return 2;
903+
#endif
904+
default:
905+
g_assert_not_reached ();
906+
}
907+
}
908+
877909
typedef struct {
878910
const char *name;
879911
MonoCPUFeatures feature;
@@ -1324,7 +1356,6 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi
13241356
case SN_ConvertToUInt32:
13251357
case SN_ConvertToUInt64:
13261358
case SN_Create:
1327-
case SN_GetElement:
13281359
case SN_GetLower:
13291360
case SN_GetUpper:
13301361
case SN_Shuffle:
@@ -1702,10 +1733,49 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi
17021733
elems = 4;
17031734
}
17041735

1736+
if (args [1]->opcode == OP_ICONST) {
1737+
// If the index is provably a constant, we can generate vastly better code.
1738+
int index = args[1]->inst_c0;
1739+
1740+
if (index < 0 || index >= elems) {
1741+
MONO_EMIT_NEW_BIALU_IMM (cfg, OP_COMPARE_IMM, -1, args [1]->dreg, elems);
1742+
MONO_EMIT_NEW_COND_EXC (cfg, GE_UN, "ArgumentOutOfRangeException");
1743+
}
1744+
1745+
// Bounds check is elided if we know the index is safe.
1746+
int extract_op = type_to_extract_op (arg0_type);
1747+
MonoInst* ret = emit_simd_ins (cfg, args [0]->klass, extract_op, args [0]->dreg, -1);
1748+
ret->inst_c0 = index;
1749+
ret->inst_c1 = fsig->ret->type;
1750+
return ret;
1751+
}
1752+
1753+
// Bounds check needed in non-const case.
17051754
MONO_EMIT_NEW_BIALU_IMM (cfg, OP_COMPARE_IMM, -1, args [1]->dreg, elems);
17061755
MONO_EMIT_NEW_COND_EXC (cfg, GE_UN, "ArgumentOutOfRangeException");
1707-
int extract_op = type_to_xextract_op (arg0_type);
1708-
return emit_simd_ins_for_sig (cfg, klass, extract_op, -1, arg0_type, fsig, args);
1756+
1757+
if (COMPILE_LLVM(cfg) || type_to_width_log2 (arg0_type) == 3) {
1758+
// Use optimized paths for 64-bit extractions or whatever LLVM yields if enabled.
1759+
int extract_op = type_to_xextract_op (arg0_type);
1760+
return emit_simd_ins_for_sig (cfg, klass, extract_op, -1, arg0_type, fsig, args);
1761+
} else {
1762+
// Spill the vector reg.
1763+
// Load back from spilled + index << elem_size_log2
1764+
// TODO: on x86, use a LEA
1765+
MonoInst* spilled;
1766+
NEW_VARLOADA_VREG (cfg, spilled, args [0]->dreg, fsig->params [0]);
1767+
MONO_ADD_INS (cfg->cbb, spilled);
1768+
int offset_reg = alloc_lreg (cfg);
1769+
MONO_EMIT_NEW_BIALU_IMM (cfg, OP_SHL_IMM, offset_reg, args [1]->dreg, type_to_width_log2 (arg0_type));
1770+
int addr_reg = alloc_preg (cfg);
1771+
MONO_EMIT_NEW_BIALU(cfg, OP_PADD, addr_reg, spilled->dreg, offset_reg);
1772+
MonoInst* ret;
1773+
int dreg = arg0_type == MONO_TYPE_R4 ? alloc_freg (cfg) : alloc_ireg (cfg);
1774+
NEW_LOAD_MEMBASE (cfg, ret, mono_type_to_load_membase (cfg, fsig->ret), dreg, addr_reg, 0);
1775+
MONO_ADD_INS (cfg->cbb, ret);
1776+
return ret;
1777+
}
1778+
break;
17091779
}
17101780
case SN_GetLower:
17111781
case SN_GetUpper: {

0 commit comments

Comments
 (0)