forked from intel/llvm
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgroup_shuffle.cpp
More file actions
151 lines (144 loc) · 12.7 KB
/
group_shuffle.cpp
File metadata and controls
151 lines (144 loc) · 12.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
// RUN: %clangxx -O3 -fsycl -fsycl-device-only -fno-discard-value-names -S -emit-llvm -fno-sycl-instrument-device-code -o - %s | FileCheck %s
// REQUIRES: linux
#include <sycl/sycl.hpp>
using namespace sycl;
using namespace sycl::ext::oneapi;
// CHECK-LABEL: @_Z13test_shuffle1RN4sycl3_V19sub_groupEPNS0_3vecINS0_3ext6oneapi8bfloat16ELi4EEEm(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[AGG_TMP14_I:%.*]] = alloca %"class.sycl::_V1::vec", align 8
// CHECK-NEXT: [[REF_TMP:%.*]] = alloca %"class.sycl::_V1::vec", align 8
// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw %"class.sycl::_V1::vec", ptr addrspace(4) [[BUF:%.*]], i64 [[ID:%.*]]
// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr addrspace(4) [[ARRAYIDX]], align 8
// CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr nonnull [[REF_TMP]]) #[[ATTR5:[0-9]+]]
// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META11:![0-9]+]])
// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META14:![0-9]+]])
// CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr nonnull [[AGG_TMP14_I]]), !noalias [[META11]]
// CHECK-NEXT: store i64 [[TMP0]], ptr [[AGG_TMP14_I]], align 8, !noalias [[META11]]
// CHECK-NEXT: br label [[FOR_COND_I_I:%.*]]
// CHECK: for.cond.i.i:
// CHECK-NEXT: [[S_0_I_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ]
// CHECK-NEXT: [[CMP_I_I:%.*]] = icmp samesign ult i32 [[S_0_I_I]], 4
// CHECK-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[_ZN4SYCL3_V117SELECT_FROM_GROUPINS0_9SUB_GROUPENS0_3VECINS0_3EXT6ONEAPI8BFLOAT16ELI4EEEEENST9ENABLE_IFIXAAOOSR3STDE9IS_SAME_VINST5DECAYIT_E4TYPEES2_ESR4SYCL3EXT6ONEAPI12EXPERIMENTALE27IS_USER_CONSTRUCTED_GROUP_VISC_EOOSR3STDE23IS_TRIVIALLY_COPYABLE_VIT0_ESR6DETAIL6IS_VECISD_EE5VALUEESD_E4TYPEESA_SD_NSA_7ID_TYPEE_EXIT:%.*]]
// CHECK: for.body.i.i:
// CHECK-NEXT: [[IDXPROM_I_I_I:%.*]] = zext nneg i32 [[S_0_I_I]] to i64
// CHECK-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds %"class.sycl::_V1::ext::oneapi::bfloat16", ptr [[AGG_TMP14_I]], i64 [[IDXPROM_I_I_I]]
// CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[ARRAYIDX_I_I_I]], align 2, !tbaa [[TBAA17:![0-9]+]], !noalias [[META19:![0-9]+]]
// CHECK-NEXT: [[CALL3_I_I_I_I:%.*]] = tail call spir_func noundef zeroext i16 @_Z28__spirv_SubgroupShuffleINTELtj(i16 noundef zeroext [[TMP1]], i32 noundef 1) #[[ATTR6:[0-9]+]], !noalias [[META20:![0-9]+]]
// CHECK-NEXT: [[ARRAYIDX_I12_I_I:%.*]] = getelementptr inbounds %"class.sycl::_V1::ext::oneapi::bfloat16", ptr [[REF_TMP]], i64 [[IDXPROM_I_I_I]]
// CHECK-NEXT: store i16 [[CALL3_I_I_I_I]], ptr [[ARRAYIDX_I12_I_I]], align 2, !tbaa [[TBAA17]], !alias.scope [[META19]]
// CHECK-NEXT: [[INC_I_I]] = add nuw nsw i32 [[S_0_I_I]], 1
// CHECK-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP23:![0-9]+]]
// CHECK: _ZN4sycl3_V117select_from_groupINS0_9sub_groupENS0_3vecINS0_3ext6oneapi8bfloat16ELi4EEEEENSt9enable_ifIXaaoosr3stdE9is_same_vINSt5decayIT_E4typeES2_Esr4sycl3ext6oneapi12experimentalE27is_user_constructed_group_vISC_Eoosr3stdE23is_trivially_copyable_vIT0_Esr6detail6is_vecISD_EE5valueESD_E4typeESA_SD_NSA_7id_typeE.exit:
// CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr nonnull [[AGG_TMP14_I]]), !noalias [[META11]]
// CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[REF_TMP]], align 8
// CHECK-NEXT: store i64 [[TMP2]], ptr addrspace(4) [[ARRAYIDX]], align 8
// CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr nonnull [[REF_TMP]]) #[[ATTR5]]
// CHECK-NEXT: ret void
//
SYCL_EXTERNAL void test_shuffle1(sycl::sub_group &sg, vec<bfloat16, 4> *buf,
size_t id) {
vec<bfloat16, 4> ItemVal = buf[id];
buf[id] = sycl::select_from_group(sg, ItemVal, 1);
}
// CHECK-LABEL: @_Z13test_shuffle2RN4sycl3_V19sub_groupEPNS0_6marrayINS0_3ext6oneapi8bfloat16ELm4EEEm(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[AGG_TMP14_I:%.*]] = alloca %"class.sycl::_V1::marray", align 8
// CHECK-NEXT: [[REF_TMP:%.*]] = alloca %"class.sycl::_V1::marray", align 2
// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw %"class.sycl::_V1::marray", ptr addrspace(4) [[BUF:%.*]], i64 [[ID:%.*]]
// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr addrspace(4) [[ARRAYIDX]], align 2, !tbaa [[TBAA25:![0-9]+]]
// CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr nonnull [[REF_TMP]]) #[[ATTR5]]
// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META26:![0-9]+]])
// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META29:![0-9]+]])
// CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr nonnull [[AGG_TMP14_I]]), !noalias [[META26]]
// CHECK-NEXT: store i64 [[TMP0]], ptr [[AGG_TMP14_I]], align 8, !noalias [[META26]]
// CHECK-NEXT: br label [[FOR_COND_I_I:%.*]]
// CHECK: for.cond.i.i:
// CHECK-NEXT: [[S_0_I_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ]
// CHECK-NEXT: [[CMP_I_I:%.*]] = icmp samesign ult i32 [[S_0_I_I]], 4
// CHECK-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[_ZN4SYCL3_V117SELECT_FROM_GROUPINS0_9SUB_GROUPENS0_6MARRAYINS0_3EXT6ONEAPI8BFLOAT16ELM4EEEEENST9ENABLE_IFIXAAOOSR3STDE9IS_SAME_VINST5DECAYIT_E4TYPEES2_ESR4SYCL3EXT6ONEAPI12EXPERIMENTALE27IS_USER_CONSTRUCTED_GROUP_VISC_EOOSR3STDE23IS_TRIVIALLY_COPYABLE_VIT0_ESR6DETAIL6IS_VECISD_EE5VALUEESD_E4TYPEESA_SD_NSA_7ID_TYPEE_EXIT:%.*]]
// CHECK: for.body.i.i:
// CHECK-NEXT: [[CONV_I_I:%.*]] = zext nneg i32 [[S_0_I_I]] to i64
// CHECK-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds %"class.sycl::_V1::ext::oneapi::bfloat16", ptr [[AGG_TMP14_I]], i64 [[CONV_I_I]]
// CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[ARRAYIDX_I_I_I]], align 2, !tbaa [[TBAA17]], !noalias [[META32:![0-9]+]]
// CHECK-NEXT: [[CALL3_I_I_I_I:%.*]] = tail call spir_func noundef zeroext i16 @_Z28__spirv_SubgroupShuffleINTELtj(i16 noundef zeroext [[TMP1]], i32 noundef 1) #[[ATTR6]], !noalias [[META33:![0-9]+]]
// CHECK-NEXT: [[ARRAYIDX_I13_I_I:%.*]] = getelementptr inbounds %"class.sycl::_V1::ext::oneapi::bfloat16", ptr [[REF_TMP]], i64 [[CONV_I_I]]
// CHECK-NEXT: store i16 [[CALL3_I_I_I_I]], ptr [[ARRAYIDX_I13_I_I]], align 2, !tbaa [[TBAA17]], !alias.scope [[META32]]
// CHECK-NEXT: [[INC_I_I]] = add nuw nsw i32 [[S_0_I_I]], 1
// CHECK-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP36:![0-9]+]]
// CHECK: _ZN4sycl3_V117select_from_groupINS0_9sub_groupENS0_6marrayINS0_3ext6oneapi8bfloat16ELm4EEEEENSt9enable_ifIXaaoosr3stdE9is_same_vINSt5decayIT_E4typeES2_Esr4sycl3ext6oneapi12experimentalE27is_user_constructed_group_vISC_Eoosr3stdE23is_trivially_copyable_vIT0_Esr6detail6is_vecISD_EE5valueESD_E4typeESA_SD_NSA_7id_typeE.exit:
// CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr nonnull [[AGG_TMP14_I]]), !noalias [[META26]]
// CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[REF_TMP]], align 2, !tbaa [[TBAA25]]
// CHECK-NEXT: store i64 [[TMP2]], ptr addrspace(4) [[ARRAYIDX]], align 2, !tbaa [[TBAA25]]
// CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr nonnull [[REF_TMP]]) #[[ATTR5]]
// CHECK-NEXT: ret void
//
SYCL_EXTERNAL void test_shuffle2(sycl::sub_group &sg, marray<bfloat16, 4> *buf,
size_t id) {
marray<bfloat16, 4> ItemVal = buf[id];
buf[id] = sycl::select_from_group(sg, ItemVal, 1);
}
// CHECK-LABEL: @_Z13test_shuffle3RN4sycl3_V19sub_groupEPNS0_6marrayINS0_3ext6oneapi8bfloat16ELm5EEEm(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[AGG_TMP14_I:%.*]] = alloca %"class.sycl::_V1::marray.32", align 8
// CHECK-NEXT: [[REF_TMP:%.*]] = alloca %"class.sycl::_V1::marray.32", align 2
// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw %"class.sycl::_V1::marray.32", ptr addrspace(4) [[BUF:%.*]], i64 [[ID:%.*]]
// CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr nonnull [[REF_TMP]]) #[[ATTR5]]
// CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr nonnull [[AGG_TMP14_I]]), !noalias [[META37:![0-9]+]]
// CHECK-NEXT: call void @llvm.memcpy.p0.p4.i64(ptr noundef nonnull align 8 dereferenceable(10) [[AGG_TMP14_I]], ptr addrspace(4) noundef align 2 dereferenceable(10) [[ARRAYIDX]], i64 10, i1 false)
// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META37]])
// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META40:![0-9]+]])
// CHECK-NEXT: br label [[FOR_COND_I_I:%.*]]
// CHECK: for.cond.i.i:
// CHECK-NEXT: [[S_0_I_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ]
// CHECK-NEXT: [[CMP_I_I:%.*]] = icmp samesign ult i32 [[S_0_I_I]], 5
// CHECK-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[_ZN4SYCL3_V117SELECT_FROM_GROUPINS0_9SUB_GROUPENS0_6MARRAYINS0_3EXT6ONEAPI8BFLOAT16ELM5EEEEENST9ENABLE_IFIXAAOOSR3STDE9IS_SAME_VINST5DECAYIT_E4TYPEES2_ESR4SYCL3EXT6ONEAPI12EXPERIMENTALE27IS_USER_CONSTRUCTED_GROUP_VISC_EOOSR3STDE23IS_TRIVIALLY_COPYABLE_VIT0_ESR6DETAIL6IS_VECISD_EE5VALUEESD_E4TYPEESA_SD_NSA_7ID_TYPEE_EXIT:%.*]]
// CHECK: for.body.i.i:
// CHECK-NEXT: [[CONV_I_I:%.*]] = zext nneg i32 [[S_0_I_I]] to i64
// CHECK-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds %"class.sycl::_V1::ext::oneapi::bfloat16", ptr [[AGG_TMP14_I]], i64 [[CONV_I_I]]
// CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[ARRAYIDX_I_I_I]], align 2, !tbaa [[TBAA17]], !noalias [[META43:![0-9]+]]
// CHECK-NEXT: [[CALL3_I_I_I_I:%.*]] = tail call spir_func noundef zeroext i16 @_Z28__spirv_SubgroupShuffleINTELtj(i16 noundef zeroext [[TMP0]], i32 noundef 1) #[[ATTR6]], !noalias [[META44:![0-9]+]]
// CHECK-NEXT: [[ARRAYIDX_I13_I_I:%.*]] = getelementptr inbounds %"class.sycl::_V1::ext::oneapi::bfloat16", ptr [[REF_TMP]], i64 [[CONV_I_I]]
// CHECK-NEXT: store i16 [[CALL3_I_I_I_I]], ptr [[ARRAYIDX_I13_I_I]], align 2, !tbaa [[TBAA17]], !alias.scope [[META43]]
// CHECK-NEXT: [[INC_I_I]] = add nuw nsw i32 [[S_0_I_I]], 1
// CHECK-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP47:![0-9]+]]
// CHECK: _ZN4sycl3_V117select_from_groupINS0_9sub_groupENS0_6marrayINS0_3ext6oneapi8bfloat16ELm5EEEEENSt9enable_ifIXaaoosr3stdE9is_same_vINSt5decayIT_E4typeES2_Esr4sycl3ext6oneapi12experimentalE27is_user_constructed_group_vISC_Eoosr3stdE23is_trivially_copyable_vIT0_Esr6detail6is_vecISD_EE5valueESD_E4typeESA_SD_NSA_7id_typeE.exit:
// CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr nonnull [[AGG_TMP14_I]]), !noalias [[META37]]
// CHECK-NEXT: call void @llvm.memcpy.p4.p0.i64(ptr addrspace(4) align 2 [[ARRAYIDX]], ptr align 2 [[REF_TMP]], i64 10, i1 false), !tbaa.struct [[TBAA_STRUCT48:![0-9]+]]
// CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr nonnull [[REF_TMP]]) #[[ATTR5]]
// CHECK-NEXT: ret void
//
SYCL_EXTERNAL void test_shuffle3(sycl::sub_group &sg, marray<bfloat16, 5> *buf,
size_t id) {
marray<bfloat16, 5> ItemVal = buf[id];
buf[id] = sycl::select_from_group(sg, ItemVal, 1);
}
// CHECK-LABEL: @_Z13test_shuffle4RN4sycl3_V19sub_groupEPPim(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw ptr addrspace(4), ptr addrspace(4) [[BUF:%.*]], i64 [[ID:%.*]]
// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[ARRAYIDX]], align 8, !tbaa [[TBAA49:![0-9]+]]
// CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(4) [[TMP0]] to i64
// CHECK-NEXT: [[CALL3_I_I_I:%.*]] = tail call spir_func noundef i64 @_Z28__spirv_SubgroupShuffleINTELmj(i64 noundef [[TMP1]], i32 noundef 1) #[[ATTR6]]
// CHECK-NEXT: [[TMP2:%.*]] = inttoptr i64 [[CALL3_I_I_I]] to ptr addrspace(4)
// CHECK-NEXT: store ptr addrspace(4) [[TMP2]], ptr addrspace(4) [[ARRAYIDX]], align 8, !tbaa [[TBAA49]]
// CHECK-NEXT: ret void
//
SYCL_EXTERNAL void test_shuffle4(sycl::sub_group &sg, int **buf, size_t id) {
int *ItemVal = buf[id];
buf[id] = sycl::select_from_group(sg, ItemVal, 1);
}
// CHECK-LABEL: @_Z13test_shuffle5RN4sycl3_V19sub_groupEPPVim(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw ptr addrspace(4), ptr addrspace(4) [[BUF:%.*]], i64 [[ID:%.*]]
// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[ARRAYIDX]], align 8, !tbaa [[TBAA49]]
// CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(4) [[TMP0]] to i64
// CHECK-NEXT: [[CALL3_I_I_I:%.*]] = tail call spir_func noundef i64 @_Z28__spirv_SubgroupShuffleINTELmj(i64 noundef [[TMP1]], i32 noundef 1) #[[ATTR6]]
// CHECK-NEXT: [[TMP2:%.*]] = inttoptr i64 [[CALL3_I_I_I]] to ptr addrspace(4)
// CHECK-NEXT: store ptr addrspace(4) [[TMP2]], ptr addrspace(4) [[ARRAYIDX]], align 8, !tbaa [[TBAA49]]
// CHECK-NEXT: ret void
//
SYCL_EXTERNAL void test_shuffle5(sycl::sub_group &sg, volatile int **buf,
size_t id) {
volatile int *ItemVal = buf[id];
buf[id] = sycl::select_from_group(sg, ItemVal, 1);
}