Skip to content

Commit 8f37daf

Browse files
fix loop issues
1 parent de2d885 commit 8f37daf

File tree

82 files changed

+26775
-413
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

82 files changed

+26775
-413
lines changed
Lines changed: 211 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,211 @@
1+
#--- source.hlsl
2+
RWStructuredBuffer<uint> _participant_bit : register(u0);
3+
RWStructuredBuffer<uint> _wave_op_index : register(u1);
4+
5+
[numthreads(4, 1, 1)]
6+
void main(uint3 tid : SV_DispatchThreadID) {
7+
uint result = 0;
8+
switch ((WaveGetLaneIndex() % 2)) {
9+
case 0: {
10+
for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) {
11+
if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) {
12+
if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) {
13+
result = (result + WaveActiveMax(result));
14+
uint temp = 0;
15+
InterlockedAdd(_wave_op_index[0], 3, temp);
16+
_participant_bit[temp] = ((28 << 6) | (i0 << 4));
17+
uint4 ballot = WaveActiveBallot(1);
18+
_participant_bit[(temp + 1)] = ballot.x;
19+
_participant_bit[(temp + 2)] = ballot.y;
20+
}
21+
if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) {
22+
result = (result + WaveActiveMax(WaveGetLaneIndex()));
23+
uint temp = 0;
24+
InterlockedAdd(_wave_op_index[0], 3, temp);
25+
_participant_bit[temp] = ((39 << 6) | (i0 << 4));
26+
uint4 ballot = WaveActiveBallot(1);
27+
_participant_bit[(temp + 1)] = ballot.x;
28+
_participant_bit[(temp + 2)] = ballot.y;
29+
}
30+
} else {
31+
if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) {
32+
result = (result + WaveActiveMax(WaveGetLaneIndex()));
33+
uint temp = 0;
34+
InterlockedAdd(_wave_op_index[0], 3, temp);
35+
_participant_bit[temp] = ((50 << 6) | (i0 << 4));
36+
uint4 ballot = WaveActiveBallot(1);
37+
_participant_bit[(temp + 1)] = ballot.x;
38+
_participant_bit[(temp + 2)] = ballot.y;
39+
}
40+
}
41+
if ((i0 == 1)) {
42+
continue;
43+
}
44+
if ((i0 == 2)) {
45+
break;
46+
}
47+
}
48+
break;
49+
}
50+
case 1: {
51+
switch ((WaveGetLaneIndex() % 3)) {
52+
case 0: {
53+
if ((WaveGetLaneIndex() < 8)) {
54+
result = (result + WaveActiveSum(1));
55+
uint temp = 0;
56+
InterlockedAdd(_wave_op_index[0], 3, temp);
57+
_participant_bit[temp] = (66 << 6);
58+
uint4 ballot = WaveActiveBallot(1);
59+
_participant_bit[(temp + 1)] = ballot.x;
60+
_participant_bit[(temp + 2)] = ballot.y;
61+
}
62+
break;
63+
}
64+
case 1: {
65+
if (((WaveGetLaneIndex() % 2) == 0)) {
66+
result = (result + WaveActiveSum(2));
67+
uint temp = 0;
68+
InterlockedAdd(_wave_op_index[0], 3, temp);
69+
_participant_bit[temp] = (75 << 6);
70+
uint4 ballot = WaveActiveBallot(1);
71+
_participant_bit[(temp + 1)] = ballot.x;
72+
_participant_bit[(temp + 2)] = ballot.y;
73+
}
74+
break;
75+
}
76+
case 2: {
77+
if (true) {
78+
result = (result + WaveActiveSum(3));
79+
uint temp = 0;
80+
InterlockedAdd(_wave_op_index[0], 3, temp);
81+
_participant_bit[temp] = (80 << 6);
82+
uint4 ballot = WaveActiveBallot(1);
83+
_participant_bit[(temp + 1)] = ballot.x;
84+
_participant_bit[(temp + 2)] = ballot.y;
85+
}
86+
break;
87+
}
88+
}
89+
break;
90+
}
91+
}
92+
switch ((WaveGetLaneIndex() % 3)) {
93+
case 0: {
94+
if ((WaveGetLaneIndex() < 8)) {
95+
result = (result + WaveActiveSum(1));
96+
uint temp = 0;
97+
InterlockedAdd(_wave_op_index[0], 3, temp);
98+
_participant_bit[temp] = (90 << 6);
99+
uint4 ballot = WaveActiveBallot(1);
100+
_participant_bit[(temp + 1)] = ballot.x;
101+
_participant_bit[(temp + 2)] = ballot.y;
102+
}
103+
break;
104+
}
105+
case 1: {
106+
for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) {
107+
if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) {
108+
if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) {
109+
result = (result + WaveActiveMin((WaveGetLaneIndex() + 3)));
110+
uint temp = 0;
111+
InterlockedAdd(_wave_op_index[0], 3, temp);
112+
_participant_bit[temp] = ((118 << 6) | (i1 << 4));
113+
uint4 ballot = WaveActiveBallot(1);
114+
_participant_bit[(temp + 1)] = ballot.x;
115+
_participant_bit[(temp + 2)] = ballot.y;
116+
}
117+
if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) {
118+
result = (result + WaveActiveMin((WaveGetLaneIndex() + 4)));
119+
uint temp = 0;
120+
InterlockedAdd(_wave_op_index[0], 3, temp);
121+
_participant_bit[temp] = ((131 << 6) | (i1 << 4));
122+
uint4 ballot = WaveActiveBallot(1);
123+
_participant_bit[(temp + 1)] = ballot.x;
124+
_participant_bit[(temp + 2)] = ballot.y;
125+
}
126+
} else {
127+
if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) {
128+
result = (result + WaveActiveMax(result));
129+
uint temp = 0;
130+
InterlockedAdd(_wave_op_index[0], 3, temp);
131+
_participant_bit[temp] = ((142 << 6) | (i1 << 4));
132+
uint4 ballot = WaveActiveBallot(1);
133+
_participant_bit[(temp + 1)] = ballot.x;
134+
_participant_bit[(temp + 2)] = ballot.y;
135+
}
136+
}
137+
if (((WaveGetLaneIndex() & 1) == 1)) {
138+
result = (result + WaveActiveMax(result));
139+
uint temp = 0;
140+
InterlockedAdd(_wave_op_index[0], 3, temp);
141+
_participant_bit[temp] = ((151 << 6) | (i1 << 4));
142+
uint4 ballot = WaveActiveBallot(1);
143+
_participant_bit[(temp + 1)] = ballot.x;
144+
_participant_bit[(temp + 2)] = ballot.y;
145+
}
146+
}
147+
break;
148+
}
149+
case 2: {
150+
if (true) {
151+
result = (result + WaveActiveSum(3));
152+
uint temp = 0;
153+
InterlockedAdd(_wave_op_index[0], 3, temp);
154+
_participant_bit[temp] = (156 << 6);
155+
uint4 ballot = WaveActiveBallot(1);
156+
_participant_bit[(temp + 1)] = ballot.x;
157+
_participant_bit[(temp + 2)] = ballot.y;
158+
}
159+
break;
160+
}
161+
}
162+
}
163+
164+
#--- pipeline.yaml
165+
---
166+
Shaders:
167+
- Stage: Compute
168+
Entry: main
169+
DispatchSize: [1, 1, 1] # Single dispatch for 4 threads
170+
Buffers:
171+
- Name: _participant_bit
172+
Format: UInt32
173+
Stride: 4
174+
Fill: 0
175+
Size: 42
176+
- Name: expected_bit_patterns
177+
Format: UInt32
178+
Stride: 4
179+
Data: [2496, 4, 0, 2512, 4, 0, 2528, 4, 0, 3200, 1, 0, 3216, 1, 0, 3232, 1, 0, 4224, 8, 0, 5760, 9, 0, 5760, 9, 0, 7552, 2, 0, 7568, 2, 0, 9664, 2, 0, 9680, 2, 0, 9984, 4, 0]
180+
- Name: _wave_op_index
181+
Format: UInt32
182+
Stride: 4
183+
Data: [0]
184+
Results:
185+
- Result: BitTrackingValidation
186+
Rule: BufferParticipantPattern
187+
GroupSize: 3
188+
Actual: _participant_bit
189+
Expected: expected_bit_patterns
190+
DescriptorSets:
191+
- Resources:
192+
- Name: _participant_bit
193+
Kind: RWStructuredBuffer
194+
DirectXBinding:
195+
Register: 0
196+
Space: 0
197+
VulkanBinding:
198+
Binding: 0
199+
- Name: _wave_op_index
200+
Kind: RWStructuredBuffer
201+
DirectXBinding:
202+
Register: 1
203+
Space: 0
204+
VulkanBinding:
205+
Binding: 1
206+
...
207+
#--- end
208+
209+
# RUN: split-file %s %t
210+
# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl
211+
# RUN: %offloader %t/pipeline.yaml %t.o

0 commit comments

Comments
 (0)