Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 17 additions & 8 deletions clang/test/CodeGen/AArch64/attr-fp8-function.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,20 +18,29 @@ svfloat16_t test_svcvtlt2_f16_mf8(svmfloat8_t zn, fpm_t fpm) __arm_streaming {
return svcvtlt2_f16_mf8_fpm(zn, fpm);
}

// CHECK: declare void @llvm.aarch64.set.fpmr(i64) [[ATTR3:#.*]]
// CHECK: declare <vscale x 8 x half> @llvm.aarch64.sve.fp8.cvtlt2.nxv8f16(<vscale x 16 x i8>) [[ATTR4:#.*]]
// CHECK: declare void @llvm.aarch64.set.fpmr(i64) [[ATTR2:#.*]]
// CHECK: declare <vscale x 8 x half> @llvm.aarch64.sve.fp8.cvtlt2.nxv8f16(<vscale x 16 x i8>) [[ATTR3:#.*]]


// SME
// With only fprm as inaccessible memory
svfloat32_t test_svmlalltt_lane_f32_mf8(svfloat32_t zda, svmfloat8_t zn, svmfloat8_t zm, fpm_t fpm) __arm_streaming {
return svmlalltt_lane_f32_mf8_fpm(zda, zn, zm, 7, fpm);
}

// CHECK: declare <vscale x 4 x float> @llvm.aarch64.sve.fp8.fmlalltt.lane.nxv4f32(<vscale x 4 x float>, <vscale x 16 x i8>, <vscale x 16 x i8>, i32 immarg) [[ATTR4]]
// CHECK: declare <vscale x 4 x float> @llvm.aarch64.sve.fp8.fmlalltt.lane.nxv4f32(<vscale x 4 x float>, <vscale x 16 x i8>, <vscale x 16 x i8>, i32 immarg) [[ATTR3:#.*]]

// CHECK: declare <16 x i8> @llvm.aarch64.neon.fp8.fcvtn.v16i8.v8f16(<8 x half>, <8 x half>) [[ATTR4]]
// With fpmr and za as incaccessible memory
void test_svdot_lane_za32_f8_vg1x2(uint32_t slice, svmfloat8x2_t zn, svmfloat8_t zm, fpm_t fpmr) __arm_streaming __arm_inout("za") {
svdot_lane_za32_mf8_vg1x2_fpm(slice, zn, zm, 3, fpmr);
}

// CHECK: declare void @llvm.aarch64.sme.fp8.fdot.lane.za32.vg1x2(i32, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, i32 immarg) [[ATTR5:#.*]]
// CHECK: declare <16 x i8> @llvm.aarch64.neon.fp8.fcvtn.v16i8.v8f16(<8 x half>, <8 x half>) [[ATTR3]]

// CHECK: attributes [[ATTR1:#.*]] = {{{.*}}}
// CHECK: attributes [[ATTR2:#.*]] = {{{.*}}}
// CHECK: attributes [[ATTR3]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) }
// CHECK: attributes [[ATTR4]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: read) }
// CHECK: attributes [[ATTR0:#.*]] = {{{.*}}}
// CHECK: attributes [[ATTR1:#.*]] = {{{.*}}}
// CHECK: attributes [[ATTR2]] = { nocallback nofree nosync nounwind willreturn memory(aarch64_fpmr: write) }
// CHECK: attributes [[ATTR3]] = { nocallback nofree nosync nounwind willreturn memory(aarch64_fpmr: read) }
// CHECK: attributes [[ATTR4:#.*]] = {{{.*}}}
// CHECK: attributes [[ATTR5:#.*]] = { nocallback nofree nosync nounwind willreturn memory(aarch64_fpmr: read, aarch64_za: readwrite) }
2 changes: 2 additions & 0 deletions llvm/include/llvm/AsmParser/LLToken.h
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,8 @@ enum Kind {
kw_readwrite,
kw_argmem,
kw_inaccessiblemem,
kw_aarch64_fpmr,
kw_aarch64_za,
kw_errnomem,

// Legacy attributes:
Expand Down
12 changes: 12 additions & 0 deletions llvm/include/llvm/IR/Intrinsics.td
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,18 @@ def IntrArgMemOnly : IntrinsicProperty;
// accessible by the module being compiled. This is a weaker form of IntrNoMem.
def IntrInaccessibleMemOnly : IntrinsicProperty;



class IntrinsicMemoryLocation;
// This should be added in the Target, but once in IntrinsicsAArch64.td
// It complains error: "Variable not defined: 'AArch64_FPMR'"
def AArch64_FPMR : IntrinsicMemoryLocation;
def AArch64_ZA: IntrinsicMemoryLocation;
// IntrInaccessible{Read|Write}MemOnly needs to set Location
class IntrInaccessibleReadMemOnly<IntrinsicMemoryLocation idx> : IntrinsicProperty{IntrinsicMemoryLocation Loc=idx;}
class IntrInaccessibleWriteMemOnly<IntrinsicMemoryLocation idx> : IntrinsicProperty{IntrinsicMemoryLocation Loc=idx;}
class IntrInaccessibleReadWriteMem<IntrinsicMemoryLocation idx> : IntrinsicProperty{IntrinsicMemoryLocation Loc=idx;}

// IntrInaccessibleMemOrArgMemOnly -- This intrinsic only accesses memory that
// its pointer-typed arguments point to or memory that is not accessible
// by the module being compiled. This is a weaker form of IntrArgMemOnly.
Expand Down
79 changes: 50 additions & 29 deletions llvm/include/llvm/IR/IntrinsicsAArch64.td
Original file line number Diff line number Diff line change
Expand Up @@ -761,7 +761,7 @@ let TargetPrefix = "aarch64" in {
class RNDR_Intrinsic
: DefaultAttrsIntrinsic<[llvm_i64_ty, llvm_i1_ty], [], [IntrNoMem, IntrHasSideEffects]>;
class FPMR_Set_Intrinsic
: DefaultAttrsIntrinsic<[], [llvm_i64_ty], [IntrWriteMem, IntrInaccessibleMemOnly]>;
: DefaultAttrsIntrinsic<[], [llvm_i64_ty], [IntrInaccessibleWriteMemOnly<AArch64_FPMR>]>;
}

// FP environment registers.
Expand Down Expand Up @@ -999,7 +999,7 @@ def int_aarch64_st64bv0: Intrinsic<[llvm_i64_ty], !listconcat([llvm_ptr_ty], dat

// Conversions
class AdvSIMD_FP8_1VectorArg_Long_Intrinsic
: DefaultAttrsIntrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrReadMem, IntrInaccessibleMemOnly]>;
: DefaultAttrsIntrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrInaccessibleReadMemOnly<AArch64_FPMR>]>;

def int_aarch64_neon_fp8_cvtl1 : AdvSIMD_FP8_1VectorArg_Long_Intrinsic;
def int_aarch64_neon_fp8_cvtl2 : AdvSIMD_FP8_1VectorArg_Long_Intrinsic;
Expand All @@ -1008,28 +1008,28 @@ def int_aarch64_st64bv0: Intrinsic<[llvm_i64_ty], !listconcat([llvm_ptr_ty], dat
: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[llvm_anyvector_ty,
LLVMMatchType<1>],
[IntrReadMem, IntrInaccessibleMemOnly]>;
[IntrInaccessibleReadMemOnly<AArch64_FPMR>]>;
def int_aarch64_neon_fp8_fcvtn2
: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>,
llvm_anyvector_ty,
LLVMMatchType<1>],
[IntrReadMem, IntrInaccessibleMemOnly]>;
[IntrInaccessibleReadMemOnly<AArch64_FPMR>]>;

// Dot-product
class AdvSIMD_FP8_DOT_Intrinsic
: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>,
llvm_anyvector_ty,
LLVMMatchType<1>],
[IntrReadMem, IntrInaccessibleMemOnly]>;
[IntrInaccessibleReadMemOnly<AArch64_FPMR>]>;
class AdvSIMD_FP8_DOT_LANE_Intrinsic
: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>,
llvm_anyvector_ty,
llvm_v16i8_ty,
llvm_i32_ty],
[IntrReadMem, IntrInaccessibleMemOnly, ImmArg<ArgIndex<3>>]>;
[IntrInaccessibleReadMemOnly<AArch64_FPMR>, ImmArg<ArgIndex<3>>]>;

def int_aarch64_neon_fp8_fdot2 : AdvSIMD_FP8_DOT_Intrinsic;
def int_aarch64_neon_fp8_fdot2_lane : AdvSIMD_FP8_DOT_LANE_Intrinsic;
Expand All @@ -1044,15 +1044,15 @@ def int_aarch64_st64bv0: Intrinsic<[llvm_i64_ty], !listconcat([llvm_ptr_ty], dat
[LLVMMatchType<0>,
llvm_v16i8_ty,
llvm_v16i8_ty],
[IntrReadMem, IntrInaccessibleMemOnly]>;
[IntrInaccessibleReadMemOnly<AArch64_FPMR>]>;

class AdvSIMD_FP8_FMLA_LANE_Intrinsic
: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>,
llvm_v16i8_ty,
llvm_v16i8_ty,
llvm_i32_ty],
[IntrReadMem, IntrInaccessibleMemOnly, ImmArg<ArgIndex<3>>]>;
[IntrInaccessibleReadMemOnly<AArch64_FPMR>, ImmArg<ArgIndex<3>>]>;

def int_aarch64_neon_fp8_fmlalb : AdvSIMD_FP8_FMLA_Intrinsic;
def int_aarch64_neon_fp8_fmlalt : AdvSIMD_FP8_FMLA_Intrinsic;
Expand Down Expand Up @@ -3070,13 +3070,26 @@ let TargetPrefix = "aarch64" in {
llvm_anyvector_ty,
LLVMMatchType<0>], [ImmArg<ArgIndex<0>>, IntrInaccessibleMemOnly, IntrHasSideEffects]>;

class SME_FP8_OuterProduct_QuarterTile_Single_Single
: DefaultAttrsIntrinsic<[],
[llvm_i32_ty,
llvm_anyvector_ty,
LLVMMatchType<0>], [ImmArg<ArgIndex<0>>, IntrInaccessibleReadMemOnly<AArch64_FPMR>, IntrInaccessibleReadWriteMem<AArch64_ZA>, IntrHasSideEffects]>;

class SME_OuterProduct_QuarterTile_Single_Multi
: DefaultAttrsIntrinsic<[],
[llvm_i32_ty,
llvm_anyvector_ty,
LLVMMatchType<0>,
LLVMMatchType<0>], [ImmArg<ArgIndex<0>>, IntrInaccessibleMemOnly, IntrHasSideEffects]>;

class SME_FP8_OuterProduct_QuarterTile_Single_Multi
: DefaultAttrsIntrinsic<[],
[llvm_i32_ty,
llvm_anyvector_ty,
LLVMMatchType<0>,
LLVMMatchType<0>], [ImmArg<ArgIndex<0>>, IntrInaccessibleReadMemOnly<AArch64_FPMR>, IntrInaccessibleReadWriteMem<AArch64_ZA>, IntrHasSideEffects]>;

class SME_OuterProduct_QuarterTile_Multi_Multi
: DefaultAttrsIntrinsic<[],
[llvm_i32_ty,
Expand All @@ -3085,6 +3098,14 @@ let TargetPrefix = "aarch64" in {
LLVMMatchType<0>,
LLVMMatchType<0>], [ImmArg<ArgIndex<0>>, IntrInaccessibleMemOnly, IntrHasSideEffects]>;

class SME_FP8_OuterProduct_QuarterTile_Multi_Multi
: DefaultAttrsIntrinsic<[],
[llvm_i32_ty,
llvm_anyvector_ty,
LLVMMatchType<0>,
LLVMMatchType<0>,
LLVMMatchType<0>], [ImmArg<ArgIndex<0>>, IntrInaccessibleReadMemOnly<AArch64_FPMR>, IntrInaccessibleReadWriteMem<AArch64_ZA>, IntrHasSideEffects]>;

// 2-way and 4-way multi-vector signed/unsigned Quarter Tile Quarter Product A/S
foreach mode = ["s", "a"] in {
foreach za = ["", "_za64"] in {
Expand Down Expand Up @@ -3127,10 +3148,10 @@ let TargetPrefix = "aarch64" in {

// 16 and 32 bit multi-vector floating point 8 Quarter Tile Quarter Product
foreach za = ["za16", "za32"] in {
def int_aarch64_sme_fp8_fmop4a_ # za # "_1x1" : SME_OuterProduct_QuarterTile_Single_Single;
def int_aarch64_sme_fp8_fmop4a_ # za # "_1x2" : SME_OuterProduct_QuarterTile_Single_Multi;
def int_aarch64_sme_fp8_fmop4a_ # za # "_2x1" : SME_OuterProduct_QuarterTile_Single_Multi;
def int_aarch64_sme_fp8_fmop4a_ # za # "_2x2" : SME_OuterProduct_QuarterTile_Multi_Multi;
def int_aarch64_sme_fp8_fmop4a_ # za # "_1x1" : SME_FP8_OuterProduct_QuarterTile_Single_Single;
def int_aarch64_sme_fp8_fmop4a_ # za # "_1x2" : SME_FP8_OuterProduct_QuarterTile_Single_Multi;
def int_aarch64_sme_fp8_fmop4a_ # za # "_2x1" : SME_FP8_OuterProduct_QuarterTile_Single_Multi;
def int_aarch64_sme_fp8_fmop4a_ # za # "_2x2" : SME_FP8_OuterProduct_QuarterTile_Multi_Multi;
}

class SME_AddVectorToTile_Intrinsic
Expand Down Expand Up @@ -4027,7 +4048,7 @@ let TargetPrefix = "aarch64" in {
class SVE2_FP8_Cvt
: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[llvm_nxv16i8_ty],
[IntrReadMem, IntrInaccessibleMemOnly]>;
[IntrInaccessibleReadMemOnly<AArch64_FPMR>]>;

def int_aarch64_sve_fp8_cvt1 : SVE2_FP8_Cvt;
def int_aarch64_sve_fp8_cvt2 : SVE2_FP8_Cvt;
Expand All @@ -4038,28 +4059,28 @@ let TargetPrefix = "aarch64" in {
class SVE2_FP8_Narrow_Cvt
: DefaultAttrsIntrinsic<[llvm_nxv16i8_ty],
[llvm_anyvector_ty, LLVMMatchType<0>],
[IntrReadMem, IntrInaccessibleMemOnly]>;
[IntrInaccessibleReadMemOnly<AArch64_FPMR>]>;

def int_aarch64_sve_fp8_cvtn : SVE2_FP8_Narrow_Cvt;
def int_aarch64_sve_fp8_cvtnb : SVE2_FP8_Narrow_Cvt;

def int_aarch64_sve_fp8_cvtnt
: DefaultAttrsIntrinsic<[llvm_nxv16i8_ty],
[llvm_nxv16i8_ty, llvm_anyvector_ty, LLVMMatchType<0>],
[IntrReadMem, IntrInaccessibleMemOnly]>;
[IntrInaccessibleReadMemOnly<AArch64_FPMR>]>;

// Dot product
class SVE2_FP8_FMLA_FDOT
: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>,
llvm_nxv16i8_ty, llvm_nxv16i8_ty],
[IntrReadMem, IntrInaccessibleMemOnly]>;
[IntrInaccessibleReadMemOnly<AArch64_FPMR>]>;

class SVE2_FP8_FMLA_FDOT_Lane
: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>,
llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_i32_ty],
[IntrReadMem, IntrInaccessibleMemOnly, ImmArg<ArgIndex<3>>]>;
[IntrInaccessibleReadMemOnly<AArch64_FPMR>, ImmArg<ArgIndex<3>>]>;

def int_aarch64_sve_fp8_fdot : SVE2_FP8_FMLA_FDOT;
def int_aarch64_sve_fp8_fdot_lane : SVE2_FP8_FMLA_FDOT_Lane;
Expand All @@ -4086,69 +4107,69 @@ let TargetPrefix = "aarch64" in {
class SVE2_FP8_CVT_X2_Single_Intrinsic
: DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
[llvm_nxv16i8_ty],
[IntrReadMem, IntrInaccessibleMemOnly]>;
[IntrInaccessibleReadMemOnly<AArch64_FPMR>]>;

class SVE2_FP8_CVT_Single_X4_Intrinsic
: DefaultAttrsIntrinsic<[llvm_nxv16i8_ty],
[llvm_nxv4f32_ty, llvm_nxv4f32_ty, llvm_nxv4f32_ty, llvm_nxv4f32_ty],
[IntrReadMem, IntrInaccessibleMemOnly]>;
[IntrInaccessibleReadMemOnly<AArch64_FPMR>]>;

class SME_FP8_OuterProduct_Intrinsic
: DefaultAttrsIntrinsic<[],
[llvm_i32_ty,
llvm_nxv16i1_ty, llvm_nxv16i1_ty,
llvm_nxv16i8_ty, llvm_nxv16i8_ty],
[ImmArg<ArgIndex<0>>, IntrInaccessibleMemOnly]>;
[ImmArg<ArgIndex<0>>, IntrInaccessibleReadMemOnly<AArch64_FPMR>, IntrInaccessibleReadWriteMem<AArch64_ZA>]>;

class SME_FP8_ZA_LANE_VGx1_Intrinsic
: DefaultAttrsIntrinsic<[], [llvm_i32_ty,
llvm_nxv16i8_ty,
llvm_nxv16i8_ty,
llvm_i32_ty],
[IntrInaccessibleMemOnly, ImmArg<ArgIndex<3>>]>;
[IntrInaccessibleReadMemOnly<AArch64_FPMR>, IntrInaccessibleReadWriteMem<AArch64_ZA>, ImmArg<ArgIndex<3>>]>;

class SME_FP8_ZA_LANE_VGx2_Intrinsic
: DefaultAttrsIntrinsic<[], [llvm_i32_ty,
llvm_nxv16i8_ty, llvm_nxv16i8_ty,
llvm_nxv16i8_ty,
llvm_i32_ty],
[IntrInaccessibleMemOnly, ImmArg<ArgIndex<4>>]>;
[IntrInaccessibleReadMemOnly<AArch64_FPMR>, IntrInaccessibleReadWriteMem<AArch64_ZA>, ImmArg<ArgIndex<4>>]>;

class SME_FP8_ZA_LANE_VGx4_Intrinsic
: DefaultAttrsIntrinsic<[], [llvm_i32_ty,
llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty,
llvm_nxv16i8_ty,
llvm_i32_ty],
[IntrInaccessibleMemOnly, ImmArg<ArgIndex<6>>]>;
[IntrInaccessibleReadMemOnly<AArch64_FPMR>, IntrInaccessibleReadWriteMem<AArch64_ZA>, ImmArg<ArgIndex<6>>]>;
class SME_FP8_ZA_SINGLE_VGx1_Intrinsic
: DefaultAttrsIntrinsic<[], [llvm_i32_ty,
llvm_nxv16i8_ty,
llvm_nxv16i8_ty],
[IntrInaccessibleMemOnly]>;
[IntrInaccessibleReadMemOnly<AArch64_FPMR>, IntrInaccessibleReadWriteMem<AArch64_ZA>]>;

class SME_FP8_ZA_SINGLE_VGx2_Intrinsic
: DefaultAttrsIntrinsic<[], [llvm_i32_ty,
llvm_nxv16i8_ty, llvm_nxv16i8_ty,
llvm_nxv16i8_ty],
[IntrInaccessibleMemOnly]>;
[IntrInaccessibleReadMemOnly<AArch64_FPMR>, IntrInaccessibleReadWriteMem<AArch64_ZA>]>;

class SME_FP8_ZA_SINGLE_VGx4_Intrinsic
: DefaultAttrsIntrinsic<[], [llvm_i32_ty,
llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty,
llvm_nxv16i8_ty],
[IntrInaccessibleMemOnly]>;
[IntrInaccessibleReadMemOnly<AArch64_FPMR>, IntrInaccessibleReadWriteMem<AArch64_ZA>]>;

class SME_FP8_ZA_MULTI_VGx2_Intrinsic
: DefaultAttrsIntrinsic<[], [llvm_i32_ty,
llvm_nxv16i8_ty, llvm_nxv16i8_ty,
llvm_nxv16i8_ty, llvm_nxv16i8_ty],
[IntrInaccessibleMemOnly]>;
[IntrInaccessibleReadMemOnly<AArch64_FPMR>, IntrInaccessibleReadWriteMem<AArch64_ZA>]>;

class SME_FP8_ZA_MULTI_VGx4_Intrinsic
: DefaultAttrsIntrinsic<[], [llvm_i32_ty,
llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty,
llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty],
[IntrInaccessibleMemOnly]>;
[IntrInaccessibleReadMemOnly<AArch64_FPMR>, IntrInaccessibleReadWriteMem<AArch64_ZA>]>;
//
// CVT from FP8 to half-precision/BFloat16 multi-vector
//
Expand All @@ -4167,7 +4188,7 @@ let TargetPrefix = "aarch64" in {
def int_aarch64_sve_fp8_cvt_x2
: DefaultAttrsIntrinsic<[llvm_nxv16i8_ty],
[llvm_anyvector_ty, LLVMMatchType<0>],
[IntrReadMem, IntrInaccessibleMemOnly]>;
[IntrInaccessibleReadMemOnly<AArch64_FPMR>]>;

def int_aarch64_sve_fp8_cvt_x4 : SVE2_FP8_CVT_Single_X4_Intrinsic;
def int_aarch64_sve_fp8_cvtn_x4 : SVE2_FP8_CVT_Single_X4_Intrinsic;
Expand Down
Loading