@@ -874,6 +874,38 @@ type_to_insert_op (MonoTypeEnum type)
874
874
}
875
875
}
876
876
877
+ static int
878
+ type_to_width_log2 (MonoTypeEnum type )
879
+ {
880
+ switch (type ) {
881
+ case MONO_TYPE_I1 :
882
+ case MONO_TYPE_U1 :
883
+ return 0 ;
884
+ case MONO_TYPE_I2 :
885
+ case MONO_TYPE_U2 :
886
+ return 1 ;
887
+ case MONO_TYPE_I4 :
888
+ case MONO_TYPE_U4 :
889
+ return 2 ;
890
+ case MONO_TYPE_I8 :
891
+ case MONO_TYPE_U8 :
892
+ return 3 ;
893
+ case MONO_TYPE_R4 :
894
+ return 2 ;
895
+ case MONO_TYPE_R8 :
896
+ return 3 ;
897
+ case MONO_TYPE_I :
898
+ case MONO_TYPE_U :
899
+ #if TARGET_SIZEOF_VOID_P == 8
900
+ return 3 ;
901
+ #else
902
+ return 2 ;
903
+ #endif
904
+ default :
905
+ g_assert_not_reached ();
906
+ }
907
+ }
908
+
877
909
typedef struct {
878
910
const char * name ;
879
911
MonoCPUFeatures feature ;
@@ -1324,7 +1356,6 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi
1324
1356
case SN_ConvertToUInt32 :
1325
1357
case SN_ConvertToUInt64 :
1326
1358
case SN_Create :
1327
- case SN_GetElement :
1328
1359
case SN_GetLower :
1329
1360
case SN_GetUpper :
1330
1361
case SN_Shuffle :
@@ -1702,10 +1733,49 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi
1702
1733
elems = 4 ;
1703
1734
}
1704
1735
1736
+ if (args [1 ]-> opcode == OP_ICONST ) {
1737
+ // If the index is provably a constant, we can generate vastly better code.
1738
+ int index = args [1 ]-> inst_c0 ;
1739
+
1740
+ if (index < 0 || index >= elems ) {
1741
+ MONO_EMIT_NEW_BIALU_IMM (cfg , OP_COMPARE_IMM , -1 , args [1 ]-> dreg , elems );
1742
+ MONO_EMIT_NEW_COND_EXC (cfg , GE_UN , "ArgumentOutOfRangeException" );
1743
+ }
1744
+
1745
+ // Bounds check is elided if we know the index is safe.
1746
+ int extract_op = type_to_extract_op (arg0_type );
1747
+ MonoInst * ret = emit_simd_ins (cfg , args [0 ]-> klass , extract_op , args [0 ]-> dreg , -1 );
1748
+ ret -> inst_c0 = index ;
1749
+ ret -> inst_c1 = fsig -> ret -> type ;
1750
+ return ret ;
1751
+ }
1752
+
1753
+ // Bounds check needed in non-const case.
1705
1754
MONO_EMIT_NEW_BIALU_IMM (cfg , OP_COMPARE_IMM , -1 , args [1 ]-> dreg , elems );
1706
1755
MONO_EMIT_NEW_COND_EXC (cfg , GE_UN , "ArgumentOutOfRangeException" );
1707
- int extract_op = type_to_xextract_op (arg0_type );
1708
- return emit_simd_ins_for_sig (cfg , klass , extract_op , -1 , arg0_type , fsig , args );
1756
+
1757
+ if (COMPILE_LLVM (cfg ) || type_to_width_log2 (arg0_type ) == 3 ) {
1758
+ // Use optimized paths for 64-bit extractions or whatever LLVM yields if enabled.
1759
+ int extract_op = type_to_xextract_op (arg0_type );
1760
+ return emit_simd_ins_for_sig (cfg , klass , extract_op , -1 , arg0_type , fsig , args );
1761
+ } else {
1762
+ // Spill the vector reg.
1763
+ // Load back from spilled + index << elem_size_log2
1764
+ // TODO: on x86, use a LEA
1765
+ MonoInst * spilled ;
1766
+ NEW_VARLOADA_VREG (cfg , spilled , args [0 ]-> dreg , fsig -> params [0 ]);
1767
+ MONO_ADD_INS (cfg -> cbb , spilled );
1768
+ int offset_reg = alloc_lreg (cfg );
1769
+ MONO_EMIT_NEW_BIALU_IMM (cfg , OP_SHL_IMM , offset_reg , args [1 ]-> dreg , type_to_width_log2 (arg0_type ));
1770
+ int addr_reg = alloc_preg (cfg );
1771
+ MONO_EMIT_NEW_BIALU (cfg , OP_PADD , addr_reg , spilled -> dreg , offset_reg );
1772
+ MonoInst * ret ;
1773
+ int dreg = arg0_type == MONO_TYPE_R4 ? alloc_freg (cfg ) : alloc_ireg (cfg );
1774
+ NEW_LOAD_MEMBASE (cfg , ret , mono_type_to_load_membase (cfg , fsig -> ret ), dreg , addr_reg , 0 );
1775
+ MONO_ADD_INS (cfg -> cbb , ret );
1776
+ return ret ;
1777
+ }
1778
+ break ;
1709
1779
}
1710
1780
case SN_GetLower :
1711
1781
case SN_GetUpper : {
0 commit comments