Skip to content

[CLANG][AArch64] Add mfloat8_t support for more SVE load intrinsics #145383

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions clang/include/clang/Basic/arm_sve.td
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ def SVLD1UW_GATHER_INDEX_S : MInst<"svld1uw_gather[_{2}base]_index_{d}", "dPul


// First-faulting load one vector (scalar base)
def SVLDFF1 : MInst<"svldff1[_{2}]", "dPc", "csilUcUsUiUlhfd", [IsLoad], MemEltTyDefault, "aarch64_sve_ldff1">;
def SVLDFF1 : MInst<"svldff1[_{2}]", "dPc", "csilUcUsUiUlhfdm", [IsLoad], MemEltTyDefault, "aarch64_sve_ldff1">;
def SVLDFF1SB : MInst<"svldff1sb_{d}", "dPS", "silUsUiUl", [IsLoad], MemEltTyInt8, "aarch64_sve_ldff1">;
def SVLDFF1UB : MInst<"svldff1ub_{d}", "dPW", "silUsUiUl", [IsLoad, IsZExtReturn], MemEltTyInt8, "aarch64_sve_ldff1">;
def SVLDFF1SH : MInst<"svldff1sh_{d}", "dPT", "ilUiUl", [IsLoad], MemEltTyInt16, "aarch64_sve_ldff1">;
Expand All @@ -130,7 +130,7 @@ def SVLDFF1SW : MInst<"svldff1sw_{d}", "dPU", "lUl", [IsLoad],
def SVLDFF1UW : MInst<"svldff1uw_{d}", "dPY", "lUl", [IsLoad, IsZExtReturn], MemEltTyInt32, "aarch64_sve_ldff1">;

// First-faulting load one vector (scalar base, VL displacement)
def SVLDFF1_VNUM : MInst<"svldff1_vnum[_{2}]", "dPcl", "csilUcUsUiUlhfd", [IsLoad], MemEltTyDefault, "aarch64_sve_ldff1">;
def SVLDFF1_VNUM : MInst<"svldff1_vnum[_{2}]", "dPcl", "csilUcUsUiUlhfdm", [IsLoad], MemEltTyDefault, "aarch64_sve_ldff1">;
def SVLDFF1SB_VNUM : MInst<"svldff1sb_vnum_{d}", "dPSl", "silUsUiUl", [IsLoad], MemEltTyInt8, "aarch64_sve_ldff1">;
def SVLDFF1UB_VNUM : MInst<"svldff1ub_vnum_{d}", "dPWl", "silUsUiUl", [IsLoad, IsZExtReturn], MemEltTyInt8, "aarch64_sve_ldff1">;
def SVLDFF1SH_VNUM : MInst<"svldff1sh_vnum_{d}", "dPTl", "ilUiUl", [IsLoad], MemEltTyInt16, "aarch64_sve_ldff1">;
Expand Down Expand Up @@ -223,7 +223,7 @@ def SVLDFF1SW_GATHER_INDEX_S : MInst<"svldff1sw_gather[_{2}base]_index_{d}", "dP
def SVLDFF1UW_GATHER_INDEX_S : MInst<"svldff1uw_gather[_{2}base]_index_{d}", "dPul", "lUl", [IsGatherLoad, IsZExtReturn], MemEltTyInt32, "aarch64_sve_ldff1_gather_scalar_offset">;

// Non-faulting load one vector (scalar base)
def SVLDNF1 : MInst<"svldnf1[_{2}]", "dPc", "csilUcUsUiUlhfd", [IsLoad], MemEltTyDefault, "aarch64_sve_ldnf1">;
def SVLDNF1 : MInst<"svldnf1[_{2}]", "dPc", "csilUcUsUiUlhfdm", [IsLoad], MemEltTyDefault, "aarch64_sve_ldnf1">;
def SVLDNF1SB : MInst<"svldnf1sb_{d}", "dPS", "silUsUiUl", [IsLoad], MemEltTyInt8, "aarch64_sve_ldnf1">;
def SVLDNF1UB : MInst<"svldnf1ub_{d}", "dPW", "silUsUiUl", [IsLoad, IsZExtReturn], MemEltTyInt8, "aarch64_sve_ldnf1">;
def SVLDNF1SH : MInst<"svldnf1sh_{d}", "dPT", "ilUiUl", [IsLoad], MemEltTyInt16, "aarch64_sve_ldnf1">;
Expand All @@ -232,7 +232,7 @@ def SVLDNF1SW : MInst<"svldnf1sw_{d}", "dPU", "lUl", [IsLoad],
def SVLDNF1UW : MInst<"svldnf1uw_{d}", "dPY", "lUl", [IsLoad, IsZExtReturn], MemEltTyInt32, "aarch64_sve_ldnf1">;

// Non-faulting load one vector (scalar base, VL displacement)
def SVLDNF1_VNUM : MInst<"svldnf1_vnum[_{2}]", "dPcl", "csilUcUsUiUlhfd", [IsLoad], MemEltTyDefault, "aarch64_sve_ldnf1">;
def SVLDNF1_VNUM : MInst<"svldnf1_vnum[_{2}]", "dPcl", "csilUcUsUiUlhfdm", [IsLoad], MemEltTyDefault, "aarch64_sve_ldnf1">;
def SVLDNF1SB_VNUM : MInst<"svldnf1sb_vnum_{d}", "dPSl", "silUsUiUl", [IsLoad], MemEltTyInt8, "aarch64_sve_ldnf1">;
def SVLDNF1UB_VNUM : MInst<"svldnf1ub_vnum_{d}", "dPWl", "silUsUiUl", [IsLoad, IsZExtReturn], MemEltTyInt8, "aarch64_sve_ldnf1">;
def SVLDNF1SH_VNUM : MInst<"svldnf1sh_vnum_{d}", "dPTl", "ilUiUl", [IsLoad], MemEltTyInt16, "aarch64_sve_ldnf1">;
Expand All @@ -258,7 +258,7 @@ let SVETargetGuard = "sve,bf16", SMETargetGuard = "sme,bf16" in {
}

// Load one quadword and replicate (scalar base)
def SVLD1RQ : SInst<"svld1rq[_{2}]", "dPc", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_ld1rq", [VerifyRuntimeMode]>;
def SVLD1RQ : SInst<"svld1rq[_{2}]", "dPc", "csilUcUsUiUlhfdm", MergeNone, "aarch64_sve_ld1rq", [VerifyRuntimeMode]>;

let SVETargetGuard = "sve,bf16", SMETargetGuard = "sme,bf16" in {
def SVLD1RQ_BF : SInst<"svld1rq[_{2}]", "dPc", "b", MergeNone, "aarch64_sve_ld1rq", [VerifyRuntimeMode]>;
Expand All @@ -283,7 +283,7 @@ defm SVLD4_VNUM : StructLoad<"svld4_vnum[_{2}]", "4Pcl", "aarch64_sve_ld4_sret">

// Load one octoword and replicate (scalar base)
let SVETargetGuard = "sve,f64mm", SMETargetGuard = InvalidMode in {
def SVLD1RO : SInst<"svld1ro[_{2}]", "dPc", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_ld1ro">;
def SVLD1RO : SInst<"svld1ro[_{2}]", "dPc", "csilUcUsUiUlhfdm", MergeNone, "aarch64_sve_ld1ro">;
}
let SVETargetGuard = "sve,f64mm,bf16", SMETargetGuard = InvalidMode in {
def SVLD1RO_BF16 : SInst<"svld1ro[_{2}]", "dPc", "b", MergeNone, "aarch64_sve_ld1ro">;
Expand Down
14 changes: 14 additions & 0 deletions clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld1ro.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,20 @@
#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4
#endif

// CHECK-LABEL: @test_svld1ro_mf8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.ld1ro.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
// CPP-CHECK-LABEL: @_Z16test_svld1ro_mf8u10__SVBool_tPKu6__mfp8(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.ld1ro.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
svmfloat8_t test_svld1ro_mf8(svbool_t pg, mfloat8_t const *base) {
return SVE_ACLE_FUNC(svld1ro, _mf8, , )(pg, base);
}

// CHECK-LABEL: @test_svld1ro_s8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.ld1ro.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
Expand Down
15 changes: 15 additions & 0 deletions clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld1rq.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,21 @@
#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
#endif

// CHECK-LABEL: @test_svld1rq_mf8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.ld1rq.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
// CPP-CHECK-LABEL: @_Z16test_svld1rq_mf8u10__SVBool_tPKu6__mfp8(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.ld1rq.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
svmfloat8_t test_svld1rq_mf8(svbool_t pg, mfloat8_t const *base) MODE_ATTR
{
return SVE_ACLE_FUNC(svld1rq,_mf8,,)(pg, base);
}

// CHECK-LABEL: @test_svld1rq_s8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.ld1rq.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
Expand Down
32 changes: 32 additions & 0 deletions clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ldff1.c
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,21 @@
#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
#endif

// CHECK-LABEL: @test_svldff1_mf8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.ldff1.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
// CPP-CHECK-LABEL: @_Z16test_svldff1_mf8u10__SVBool_tPKu6__mfp8(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.ldff1.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
svmfloat8_t test_svldff1_mf8(svbool_t pg, const mfloat8_t *base)
{
return SVE_ACLE_FUNC(svldff1,_mf8,,)(pg, base);
}

// CHECK-LABEL: @test_svldff1_s8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.ldff1.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
Expand Down Expand Up @@ -197,6 +212,23 @@ svfloat64_t test_svldff1_f64(svbool_t pg, const float64_t *base)
return SVE_ACLE_FUNC(svldff1,_f64,,)(pg, base);
}

// CHECK-LABEL: @test_svldff1_vnum_mf8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = getelementptr <vscale x 16 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.ldff1.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[TMP0]])
// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP1]]
//
// CPP-CHECK-LABEL: @_Z21test_svldff1_vnum_mf8u10__SVBool_tPKu6__mfp8l(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr <vscale x 16 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.ldff1.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[TMP0]])
// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP1]]
//
svmfloat8_t test_svldff1_vnum_mf8(svbool_t pg, mfloat8_t const *base, int64_t vnum)
{
return SVE_ACLE_FUNC(svldff1_vnum,_mf8,,)(pg, base, vnum);
}

// CHECK-LABEL: @test_svldff1_vnum_s8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = getelementptr <vscale x 16 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
Expand Down
32 changes: 32 additions & 0 deletions clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ldnf1.c
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,21 @@
#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
#endif

// CHECK-LABEL: @test_svldnf1_mf8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.ldnf1.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
// CPP-CHECK-LABEL: @_Z16test_svldnf1_mf8u10__SVBool_tPKu6__mfp8(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.ldnf1.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
svmfloat8_t test_svldnf1_mf8(svbool_t pg, mfloat8_t const *base)
{
return SVE_ACLE_FUNC(svldnf1,_mf8,,)(pg, base);
}

// CHECK-LABEL: @test_svldnf1_s8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.ldnf1.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
Expand Down Expand Up @@ -197,6 +212,23 @@ svfloat64_t test_svldnf1_f64(svbool_t pg, const float64_t *base)
return SVE_ACLE_FUNC(svldnf1,_f64,,)(pg, base);
}

// CHECK-LABEL: @test_svldnf1_vnum_mf8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = getelementptr <vscale x 16 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.ldnf1.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[TMP0]])
// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP1]]
//
// CPP-CHECK-LABEL: @_Z21test_svldnf1_vnum_mf8u10__SVBool_tPKu6__mfp8l(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr <vscale x 16 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.ldnf1.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[TMP0]])
// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP1]]
//
svmfloat8_t test_svldnf1_vnum_mf8(svbool_t pg, mfloat8_t const *base, int64_t vnum)
{
return SVE_ACLE_FUNC(svldnf1_vnum,_mf8,,)(pg, base, vnum);
}

// CHECK-LABEL: @test_svldnf1_vnum_s8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = getelementptr <vscale x 16 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
Expand Down
Loading