@@ -761,7 +761,7 @@ let TargetPrefix = "aarch64" in {
761
761
class RNDR_Intrinsic
762
762
: DefaultAttrsIntrinsic<[llvm_i64_ty, llvm_i1_ty], [], [IntrNoMem, IntrHasSideEffects]>;
763
763
class FPMR_Set_Intrinsic
764
- : DefaultAttrsIntrinsic<[], [llvm_i64_ty], [IntrWriteMem, IntrInaccessibleMemOnly ]>;
764
+ : DefaultAttrsIntrinsic<[], [llvm_i64_ty], [IntrInaccessibleWriteMemOnly<AArch64_FPMR> ]>;
765
765
}
766
766
767
767
// FP environment registers.
@@ -999,7 +999,7 @@ def int_aarch64_st64bv0: Intrinsic<[llvm_i64_ty], !listconcat([llvm_ptr_ty], dat
999
999
1000
1000
// Conversions
1001
1001
class AdvSIMD_FP8_1VectorArg_Long_Intrinsic
1002
- : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrReadMem, IntrInaccessibleMemOnly ]>;
1002
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrInaccessibleReadMemOnly<AArch64_FPMR> ]>;
1003
1003
1004
1004
def int_aarch64_neon_fp8_cvtl1 : AdvSIMD_FP8_1VectorArg_Long_Intrinsic;
1005
1005
def int_aarch64_neon_fp8_cvtl2 : AdvSIMD_FP8_1VectorArg_Long_Intrinsic;
@@ -1008,28 +1008,28 @@ def int_aarch64_st64bv0: Intrinsic<[llvm_i64_ty], !listconcat([llvm_ptr_ty], dat
1008
1008
: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
1009
1009
[llvm_anyvector_ty,
1010
1010
LLVMMatchType<1>],
1011
- [IntrReadMem, IntrInaccessibleMemOnly ]>;
1011
+ [IntrInaccessibleReadMemOnly<AArch64_FPMR> ]>;
1012
1012
def int_aarch64_neon_fp8_fcvtn2
1013
1013
: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
1014
1014
[LLVMMatchType<0>,
1015
1015
llvm_anyvector_ty,
1016
1016
LLVMMatchType<1>],
1017
- [IntrReadMem, IntrInaccessibleMemOnly ]>;
1017
+ [IntrInaccessibleReadMemOnly<AArch64_FPMR> ]>;
1018
1018
1019
1019
// Dot-product
1020
1020
class AdvSIMD_FP8_DOT_Intrinsic
1021
1021
: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
1022
1022
[LLVMMatchType<0>,
1023
1023
llvm_anyvector_ty,
1024
1024
LLVMMatchType<1>],
1025
- [IntrReadMem, IntrInaccessibleMemOnly ]>;
1025
+ [IntrInaccessibleReadMemOnly<AArch64_FPMR> ]>;
1026
1026
class AdvSIMD_FP8_DOT_LANE_Intrinsic
1027
1027
: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
1028
1028
[LLVMMatchType<0>,
1029
1029
llvm_anyvector_ty,
1030
1030
llvm_v16i8_ty,
1031
1031
llvm_i32_ty],
1032
- [IntrReadMem, IntrInaccessibleMemOnly , ImmArg<ArgIndex<3>>]>;
1032
+ [IntrInaccessibleReadMemOnly<AArch64_FPMR> , ImmArg<ArgIndex<3>>]>;
1033
1033
1034
1034
def int_aarch64_neon_fp8_fdot2 : AdvSIMD_FP8_DOT_Intrinsic;
1035
1035
def int_aarch64_neon_fp8_fdot2_lane : AdvSIMD_FP8_DOT_LANE_Intrinsic;
@@ -1044,15 +1044,15 @@ def int_aarch64_st64bv0: Intrinsic<[llvm_i64_ty], !listconcat([llvm_ptr_ty], dat
1044
1044
[LLVMMatchType<0>,
1045
1045
llvm_v16i8_ty,
1046
1046
llvm_v16i8_ty],
1047
- [IntrReadMem, IntrInaccessibleMemOnly ]>;
1047
+ [IntrInaccessibleReadMemOnly<AArch64_FPMR> ]>;
1048
1048
1049
1049
class AdvSIMD_FP8_FMLA_LANE_Intrinsic
1050
1050
: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
1051
1051
[LLVMMatchType<0>,
1052
1052
llvm_v16i8_ty,
1053
1053
llvm_v16i8_ty,
1054
1054
llvm_i32_ty],
1055
- [IntrReadMem, IntrInaccessibleMemOnly , ImmArg<ArgIndex<3>>]>;
1055
+ [IntrInaccessibleReadMemOnly<AArch64_FPMR> , ImmArg<ArgIndex<3>>]>;
1056
1056
1057
1057
def int_aarch64_neon_fp8_fmlalb : AdvSIMD_FP8_FMLA_Intrinsic;
1058
1058
def int_aarch64_neon_fp8_fmlalt : AdvSIMD_FP8_FMLA_Intrinsic;
@@ -3070,13 +3070,26 @@ let TargetPrefix = "aarch64" in {
3070
3070
llvm_anyvector_ty,
3071
3071
LLVMMatchType<0>], [ImmArg<ArgIndex<0>>, IntrInaccessibleMemOnly, IntrHasSideEffects]>;
3072
3072
3073
+ class SME_FP8_OuterProduct_QuarterTile_Single_Single
3074
+ : DefaultAttrsIntrinsic<[],
3075
+ [llvm_i32_ty,
3076
+ llvm_anyvector_ty,
3077
+ LLVMMatchType<0>], [ImmArg<ArgIndex<0>>, IntrInaccessibleReadMemOnly<AArch64_FPMR>, IntrInaccessibleReadWriteMem<AArch64_ZA>, IntrHasSideEffects]>;
3078
+
3073
3079
class SME_OuterProduct_QuarterTile_Single_Multi
3074
3080
: DefaultAttrsIntrinsic<[],
3075
3081
[llvm_i32_ty,
3076
3082
llvm_anyvector_ty,
3077
3083
LLVMMatchType<0>,
3078
3084
LLVMMatchType<0>], [ImmArg<ArgIndex<0>>, IntrInaccessibleMemOnly, IntrHasSideEffects]>;
3079
3085
3086
+ class SME_FP8_OuterProduct_QuarterTile_Single_Multi
3087
+ : DefaultAttrsIntrinsic<[],
3088
+ [llvm_i32_ty,
3089
+ llvm_anyvector_ty,
3090
+ LLVMMatchType<0>,
3091
+ LLVMMatchType<0>], [ImmArg<ArgIndex<0>>, IntrInaccessibleReadMemOnly<AArch64_FPMR>, IntrInaccessibleReadWriteMem<AArch64_ZA>, IntrHasSideEffects]>;
3092
+
3080
3093
class SME_OuterProduct_QuarterTile_Multi_Multi
3081
3094
: DefaultAttrsIntrinsic<[],
3082
3095
[llvm_i32_ty,
@@ -3085,6 +3098,14 @@ let TargetPrefix = "aarch64" in {
3085
3098
LLVMMatchType<0>,
3086
3099
LLVMMatchType<0>], [ImmArg<ArgIndex<0>>, IntrInaccessibleMemOnly, IntrHasSideEffects]>;
3087
3100
3101
+ class SME_FP8_OuterProduct_QuarterTile_Multi_Multi
3102
+ : DefaultAttrsIntrinsic<[],
3103
+ [llvm_i32_ty,
3104
+ llvm_anyvector_ty,
3105
+ LLVMMatchType<0>,
3106
+ LLVMMatchType<0>,
3107
+ LLVMMatchType<0>], [ImmArg<ArgIndex<0>>, IntrInaccessibleReadMemOnly<AArch64_FPMR>, IntrInaccessibleReadWriteMem<AArch64_ZA>, IntrHasSideEffects]>;
3108
+
3088
3109
// 2-way and 4-way multi-vector signed/unsigned Quarter Tile Quarter Product A/S
3089
3110
foreach mode = ["s", "a"] in {
3090
3111
foreach za = ["", "_za64"] in {
@@ -3127,10 +3148,10 @@ let TargetPrefix = "aarch64" in {
3127
3148
3128
3149
// 16 and 32 bit multi-vector floating point 8 Quarter Tile Quarter Product
3129
3150
foreach za = ["za16", "za32"] in {
3130
- def int_aarch64_sme_fp8_fmop4a_ # za # "_1x1" : SME_OuterProduct_QuarterTile_Single_Single ;
3131
- def int_aarch64_sme_fp8_fmop4a_ # za # "_1x2" : SME_OuterProduct_QuarterTile_Single_Multi ;
3132
- def int_aarch64_sme_fp8_fmop4a_ # za # "_2x1" : SME_OuterProduct_QuarterTile_Single_Multi ;
3133
- def int_aarch64_sme_fp8_fmop4a_ # za # "_2x2" : SME_OuterProduct_QuarterTile_Multi_Multi ;
3151
+ def int_aarch64_sme_fp8_fmop4a_ # za # "_1x1" : SME_FP8_OuterProduct_QuarterTile_Single_Single ;
3152
+ def int_aarch64_sme_fp8_fmop4a_ # za # "_1x2" : SME_FP8_OuterProduct_QuarterTile_Single_Multi ;
3153
+ def int_aarch64_sme_fp8_fmop4a_ # za # "_2x1" : SME_FP8_OuterProduct_QuarterTile_Single_Multi ;
3154
+ def int_aarch64_sme_fp8_fmop4a_ # za # "_2x2" : SME_FP8_OuterProduct_QuarterTile_Multi_Multi ;
3134
3155
}
3135
3156
3136
3157
class SME_AddVectorToTile_Intrinsic
@@ -4027,7 +4048,7 @@ let TargetPrefix = "aarch64" in {
4027
4048
class SVE2_FP8_Cvt
4028
4049
: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
4029
4050
[llvm_nxv16i8_ty],
4030
- [IntrReadMem, IntrInaccessibleMemOnly ]>;
4051
+ [IntrInaccessibleReadMemOnly<AArch64_FPMR> ]>;
4031
4052
4032
4053
def int_aarch64_sve_fp8_cvt1 : SVE2_FP8_Cvt;
4033
4054
def int_aarch64_sve_fp8_cvt2 : SVE2_FP8_Cvt;
@@ -4038,28 +4059,28 @@ let TargetPrefix = "aarch64" in {
4038
4059
class SVE2_FP8_Narrow_Cvt
4039
4060
: DefaultAttrsIntrinsic<[llvm_nxv16i8_ty],
4040
4061
[llvm_anyvector_ty, LLVMMatchType<0>],
4041
- [IntrReadMem, IntrInaccessibleMemOnly ]>;
4062
+ [IntrInaccessibleReadMemOnly<AArch64_FPMR> ]>;
4042
4063
4043
4064
def int_aarch64_sve_fp8_cvtn : SVE2_FP8_Narrow_Cvt;
4044
4065
def int_aarch64_sve_fp8_cvtnb : SVE2_FP8_Narrow_Cvt;
4045
4066
4046
4067
def int_aarch64_sve_fp8_cvtnt
4047
4068
: DefaultAttrsIntrinsic<[llvm_nxv16i8_ty],
4048
4069
[llvm_nxv16i8_ty, llvm_anyvector_ty, LLVMMatchType<0>],
4049
- [IntrReadMem, IntrInaccessibleMemOnly ]>;
4070
+ [IntrInaccessibleReadMemOnly<AArch64_FPMR> ]>;
4050
4071
4051
4072
// Dot product
4052
4073
class SVE2_FP8_FMLA_FDOT
4053
4074
: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
4054
4075
[LLVMMatchType<0>,
4055
4076
llvm_nxv16i8_ty, llvm_nxv16i8_ty],
4056
- [IntrReadMem, IntrInaccessibleMemOnly ]>;
4077
+ [IntrInaccessibleReadMemOnly<AArch64_FPMR> ]>;
4057
4078
4058
4079
class SVE2_FP8_FMLA_FDOT_Lane
4059
4080
: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
4060
4081
[LLVMMatchType<0>,
4061
4082
llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_i32_ty],
4062
- [IntrReadMem, IntrInaccessibleMemOnly , ImmArg<ArgIndex<3>>]>;
4083
+ [IntrInaccessibleReadMemOnly<AArch64_FPMR> , ImmArg<ArgIndex<3>>]>;
4063
4084
4064
4085
def int_aarch64_sve_fp8_fdot : SVE2_FP8_FMLA_FDOT;
4065
4086
def int_aarch64_sve_fp8_fdot_lane : SVE2_FP8_FMLA_FDOT_Lane;
@@ -4086,69 +4107,69 @@ let TargetPrefix = "aarch64" in {
4086
4107
class SVE2_FP8_CVT_X2_Single_Intrinsic
4087
4108
: DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
4088
4109
[llvm_nxv16i8_ty],
4089
- [IntrReadMem, IntrInaccessibleMemOnly ]>;
4110
+ [IntrInaccessibleReadMemOnly<AArch64_FPMR> ]>;
4090
4111
4091
4112
class SVE2_FP8_CVT_Single_X4_Intrinsic
4092
4113
: DefaultAttrsIntrinsic<[llvm_nxv16i8_ty],
4093
4114
[llvm_nxv4f32_ty, llvm_nxv4f32_ty, llvm_nxv4f32_ty, llvm_nxv4f32_ty],
4094
- [IntrReadMem, IntrInaccessibleMemOnly ]>;
4115
+ [IntrInaccessibleReadMemOnly<AArch64_FPMR> ]>;
4095
4116
4096
4117
class SME_FP8_OuterProduct_Intrinsic
4097
4118
: DefaultAttrsIntrinsic<[],
4098
4119
[llvm_i32_ty,
4099
4120
llvm_nxv16i1_ty, llvm_nxv16i1_ty,
4100
4121
llvm_nxv16i8_ty, llvm_nxv16i8_ty],
4101
- [ImmArg<ArgIndex<0>>, IntrInaccessibleMemOnly ]>;
4122
+ [ImmArg<ArgIndex<0>>, IntrInaccessibleReadMemOnly<AArch64_FPMR>, IntrInaccessibleReadWriteMem<AArch64_ZA> ]>;
4102
4123
4103
4124
class SME_FP8_ZA_LANE_VGx1_Intrinsic
4104
4125
: DefaultAttrsIntrinsic<[], [llvm_i32_ty,
4105
4126
llvm_nxv16i8_ty,
4106
4127
llvm_nxv16i8_ty,
4107
4128
llvm_i32_ty],
4108
- [IntrInaccessibleMemOnly , ImmArg<ArgIndex<3>>]>;
4129
+ [IntrInaccessibleReadMemOnly<AArch64_FPMR>, IntrInaccessibleReadWriteMem<AArch64_ZA> , ImmArg<ArgIndex<3>>]>;
4109
4130
4110
4131
class SME_FP8_ZA_LANE_VGx2_Intrinsic
4111
4132
: DefaultAttrsIntrinsic<[], [llvm_i32_ty,
4112
4133
llvm_nxv16i8_ty, llvm_nxv16i8_ty,
4113
4134
llvm_nxv16i8_ty,
4114
4135
llvm_i32_ty],
4115
- [IntrInaccessibleMemOnly , ImmArg<ArgIndex<4>>]>;
4136
+ [IntrInaccessibleReadMemOnly<AArch64_FPMR>, IntrInaccessibleReadWriteMem<AArch64_ZA> , ImmArg<ArgIndex<4>>]>;
4116
4137
4117
4138
class SME_FP8_ZA_LANE_VGx4_Intrinsic
4118
4139
: DefaultAttrsIntrinsic<[], [llvm_i32_ty,
4119
4140
llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty,
4120
4141
llvm_nxv16i8_ty,
4121
4142
llvm_i32_ty],
4122
- [IntrInaccessibleMemOnly , ImmArg<ArgIndex<6>>]>;
4143
+ [IntrInaccessibleReadMemOnly<AArch64_FPMR>, IntrInaccessibleReadWriteMem<AArch64_ZA> , ImmArg<ArgIndex<6>>]>;
4123
4144
class SME_FP8_ZA_SINGLE_VGx1_Intrinsic
4124
4145
: DefaultAttrsIntrinsic<[], [llvm_i32_ty,
4125
4146
llvm_nxv16i8_ty,
4126
4147
llvm_nxv16i8_ty],
4127
- [IntrInaccessibleMemOnly ]>;
4148
+ [IntrInaccessibleReadMemOnly<AArch64_FPMR>, IntrInaccessibleReadWriteMem<AArch64_ZA> ]>;
4128
4149
4129
4150
class SME_FP8_ZA_SINGLE_VGx2_Intrinsic
4130
4151
: DefaultAttrsIntrinsic<[], [llvm_i32_ty,
4131
4152
llvm_nxv16i8_ty, llvm_nxv16i8_ty,
4132
4153
llvm_nxv16i8_ty],
4133
- [IntrInaccessibleMemOnly ]>;
4154
+ [IntrInaccessibleReadMemOnly<AArch64_FPMR>, IntrInaccessibleReadWriteMem<AArch64_ZA> ]>;
4134
4155
4135
4156
class SME_FP8_ZA_SINGLE_VGx4_Intrinsic
4136
4157
: DefaultAttrsIntrinsic<[], [llvm_i32_ty,
4137
4158
llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty,
4138
4159
llvm_nxv16i8_ty],
4139
- [IntrInaccessibleMemOnly ]>;
4160
+ [IntrInaccessibleReadMemOnly<AArch64_FPMR>, IntrInaccessibleReadWriteMem<AArch64_ZA> ]>;
4140
4161
4141
4162
class SME_FP8_ZA_MULTI_VGx2_Intrinsic
4142
4163
: DefaultAttrsIntrinsic<[], [llvm_i32_ty,
4143
4164
llvm_nxv16i8_ty, llvm_nxv16i8_ty,
4144
4165
llvm_nxv16i8_ty, llvm_nxv16i8_ty],
4145
- [IntrInaccessibleMemOnly ]>;
4166
+ [IntrInaccessibleReadMemOnly<AArch64_FPMR>, IntrInaccessibleReadWriteMem<AArch64_ZA> ]>;
4146
4167
4147
4168
class SME_FP8_ZA_MULTI_VGx4_Intrinsic
4148
4169
: DefaultAttrsIntrinsic<[], [llvm_i32_ty,
4149
4170
llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty,
4150
4171
llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty],
4151
- [IntrInaccessibleMemOnly ]>;
4172
+ [IntrInaccessibleReadMemOnly<AArch64_FPMR>, IntrInaccessibleReadWriteMem<AArch64_ZA> ]>;
4152
4173
//
4153
4174
// CVT from FP8 to half-precision/BFloat16 multi-vector
4154
4175
//
@@ -4167,7 +4188,7 @@ let TargetPrefix = "aarch64" in {
4167
4188
def int_aarch64_sve_fp8_cvt_x2
4168
4189
: DefaultAttrsIntrinsic<[llvm_nxv16i8_ty],
4169
4190
[llvm_anyvector_ty, LLVMMatchType<0>],
4170
- [IntrReadMem, IntrInaccessibleMemOnly ]>;
4191
+ [IntrInaccessibleReadMemOnly<AArch64_FPMR> ]>;
4171
4192
4172
4193
def int_aarch64_sve_fp8_cvt_x4 : SVE2_FP8_CVT_Single_X4_Intrinsic;
4173
4194
def int_aarch64_sve_fp8_cvtn_x4 : SVE2_FP8_CVT_Single_X4_Intrinsic;
0 commit comments