@@ -15,8 +15,8 @@ gpu.module @kernels {
15
15
16
16
// CHECK: %[[MASK:.+]] = spirv.Constant 8 : i32
17
17
// CHECK: %[[VAL:.+]] = spirv.Constant 4.200000e+01 : f32
18
- // CHECK: %{{.+}} = spirv.Constant true
19
18
// CHECK: %{{.+}} = spirv.GroupNonUniformShuffleXor <Subgroup> %[[VAL]], %[[MASK]] : f32, i32
19
+ // CHECK: %{{.+}} = spirv.Constant true
20
20
%result , %valid = gpu.shuffle xor %val , %mask , %width : f32
21
21
gpu.return
22
22
}
@@ -64,8 +64,8 @@ gpu.module @kernels {
64
64
65
65
// CHECK: %[[MASK:.+]] = spirv.Constant 8 : i32
66
66
// CHECK: %[[VAL:.+]] = spirv.Constant 4.200000e+01 : f32
67
- // CHECK: %{{.+}} = spirv.Constant true
68
67
// CHECK: %{{.+}} = spirv.GroupNonUniformShuffle <Subgroup> %[[VAL]], %[[MASK]] : f32, i32
68
+ // CHECK: %{{.+}} = spirv.Constant true
69
69
%result , %valid = gpu.shuffle idx %val , %mask , %width : f32
70
70
gpu.return
71
71
}
@@ -92,24 +92,10 @@ gpu.module @kernels {
92
92
// CHECK: %[[OFFSET:.+]] = spirv.Constant 4 : i32
93
93
// CHECK: %[[WIDTH:.+]] = spirv.Constant 16 : i32
94
94
// CHECK: %[[VAL:.+]] = spirv.Constant 4.200000e+01 : f32
95
- // CHECK: %{{.+}} = spirv.Constant true
96
95
// CHECK: %{{.+}} = spirv.GroupNonUniformShuffleDown <Subgroup> %[[VAL]], %[[OFFSET]] : f32, i32
97
96
98
- // CHECK: %[[BLOCK_SIZE_X:.+]] = spirv.Constant 16 : i32
99
- // CHECK: %[[BLOCK_SIZE_Y:.+]] = spirv.Constant 1 : i32
100
- // CHECK: %__builtin__LocalInvocationId___addr = spirv.mlir.addressof @__builtin__LocalInvocationId__ : !spirv.ptr<vector<3xi32>, Input>
101
- // CHECK: %[[WORKGROUP:.+]] = spirv.Load "Input" %__builtin__LocalInvocationId___addr : vector<3xi32>
102
- // CHECK: %[[THREAD_X:.+]] = spirv.CompositeExtract %[[WORKGROUP]][0 : i32] : vector<3xi32>
103
- // CHECK: %__builtin__LocalInvocationId___addr_1 = spirv.mlir.addressof @__builtin__LocalInvocationId__ : !spirv.ptr<vector<3xi32>, Input>
104
- // CHECK: %[[WORKGROUP_1:.+]] = spirv.Load "Input" %__builtin__LocalInvocationId___addr_1 : vector<3xi32>
105
- // CHECK: %[[THREAD_Y:.+]] = spirv.CompositeExtract %[[WORKGROUP_1]][1 : i32] : vector<3xi32>
106
- // CHECK: %__builtin__LocalInvocationId___addr_2 = spirv.mlir.addressof @__builtin__LocalInvocationId__ : !spirv.ptr<vector<3xi32>, Input>
107
- // CHECK: %[[WORKGROUP_2:.+]] = spirv.Load "Input" %__builtin__LocalInvocationId___addr_2 : vector<3xi32>
108
- // CHECK: %[[THREAD_Z:.+]] = spirv.CompositeExtract %[[WORKGROUP_2]][2 : i32] : vector<3xi32>
109
- // CHECK: %[[S0:.+]] = spirv.IMul %[[THREAD_Z]], %[[BLOCK_SIZE_Y]] : i32
110
- // CHECK: %[[S1:.+]] = spirv.IAdd %[[S0]], %[[THREAD_Y]] : i32
111
- // CHECK: %[[S2:.+]] = spirv.IMul %[[S1]], %[[BLOCK_SIZE_X]] : i32
112
- // CHECK: %[[LANE_ID:.+]] = spirv.IAdd %[[S2]], %[[THREAD_X]] : i32
97
+ // CHECK: %[[INVOCATION_ID_ADDR:.+]] = spirv.mlir.addressof @__builtin__SubgroupLocalInvocationId__ : !spirv.ptr<i32, Input>
98
+ // CHECK: %[[LANE_ID:.+]] = spirv.Load "Input" %[[INVOCATION_ID_ADDR]] : i32
113
99
// CHECK: %[[VAL_LANE_ID:.+]] = spirv.IAdd %[[LANE_ID]], %[[OFFSET]] : i32
114
100
// CHECK: %[[VALID:.+]] = spirv.ULessThan %[[VAL_LANE_ID]], %[[WIDTH]] : i32
115
101
@@ -139,24 +125,10 @@ gpu.module @kernels {
139
125
// CHECK: %[[OFFSET:.+]] = spirv.Constant 4 : i32
140
126
// CHECK: %[[WIDTH:.+]] = spirv.Constant 16 : i32
141
127
// CHECK: %[[VAL:.+]] = spirv.Constant 4.200000e+01 : f32
142
- // CHECK: %{{.+}} = spirv.Constant true
143
128
// CHECK: %{{.+}} = spirv.GroupNonUniformShuffleUp <Subgroup> %[[VAL]], %[[OFFSET]] : f32, i32
144
129
145
- // CHECK: %[[BLOCK_SIZE_X:.+]] = spirv.Constant 16 : i32
146
- // CHECK: %[[BLOCK_SIZE_Y:.+]] = spirv.Constant 1 : i32
147
- // CHECK: %__builtin__LocalInvocationId___addr = spirv.mlir.addressof @__builtin__LocalInvocationId__ : !spirv.ptr<vector<3xi32>, Input>
148
- // CHECK: %[[WORKGROUP:.+]] = spirv.Load "Input" %__builtin__LocalInvocationId___addr : vector<3xi32>
149
- // CHECK: %[[THREAD_X:.+]] = spirv.CompositeExtract %[[WORKGROUP]][0 : i32] : vector<3xi32>
150
- // CHECK: %__builtin__LocalInvocationId___addr_1 = spirv.mlir.addressof @__builtin__LocalInvocationId__ : !spirv.ptr<vector<3xi32>, Input>
151
- // CHECK: %[[WORKGROUP_1:.+]] = spirv.Load "Input" %__builtin__LocalInvocationId___addr_1 : vector<3xi32>
152
- // CHECK: %[[THREAD_Y:.+]] = spirv.CompositeExtract %[[WORKGROUP_1]][1 : i32] : vector<3xi32>
153
- // CHECK: %__builtin__LocalInvocationId___addr_2 = spirv.mlir.addressof @__builtin__LocalInvocationId__ : !spirv.ptr<vector<3xi32>, Input>
154
- // CHECK: %[[WORKGROUP_2:.+]] = spirv.Load "Input" %__builtin__LocalInvocationId___addr_2 : vector<3xi32>
155
- // CHECK: %[[THREAD_Z:.+]] = spirv.CompositeExtract %[[WORKGROUP_2]][2 : i32] : vector<3xi32>
156
- // CHECK: %[[S0:.+]] = spirv.IMul %[[THREAD_Z]], %[[BLOCK_SIZE_Y]] : i32
157
- // CHECK: %[[S1:.+]] = spirv.IAdd %[[S0]], %[[THREAD_Y]] : i32
158
- // CHECK: %[[S2:.+]] = spirv.IMul %[[S1]], %[[BLOCK_SIZE_X]] : i32
159
- // CHECK: %[[LANE_ID:.+]] = spirv.IAdd %[[S2]], %[[THREAD_X]] : i32
130
+ // CHECK: %[[INVOCATION_ID_ADDR:.+]] = spirv.mlir.addressof @__builtin__SubgroupLocalInvocationId__ : !spirv.ptr<i32, Input>
131
+ // CHECK: %[[LANE_ID:.+]] = spirv.Load "Input" %[[INVOCATION_ID_ADDR]] : i32
160
132
// CHECK: %[[VAL_LANE_ID:.+]] = spirv.ISub %[[LANE_ID]], %[[OFFSET]] : i32
161
133
// CHECK: %[[CST0:.+]] = spirv.Constant 0 : i32
162
134
// CHECK: %[[VALID:.+]] = spirv.SGreaterThanEqual %[[VAL_LANE_ID]], %[[CST0]] : i32
0 commit comments