Skip to content

Commit 32d681c

Browse files
committed
refactoring pingpong test
1 parent 2aeccd7 commit 32d681c

File tree

1 file changed

+129
-110
lines changed

1 file changed

+129
-110
lines changed

pingpong/pingpong.cpp

+129-110
Original file line numberDiff line numberDiff line change
@@ -35,14 +35,17 @@ int main(int argc, char* argv[]) {
3535
// initial value of the counter
3636
unsigned int initValue = 1234;
3737

38+
enum P2PTest {
39+
LOCKFREE_ATOMIC_COUNTER = 0
40+
,LOCK_ATOMIC_COUNTER_SAME_CACHELINE
3841

39-
// use lock implementation of counter
40-
bool useLock = true;
41-
42+
,INVALID_P2P_TEST // last entry
43+
};
44+
P2PTest test = LOCKFREE_ATOMIC_COUNTER;
4245

4346
// process the command line arguments
4447
{
45-
const char* options = "h:i:p:";
48+
const char* options = "h:i:p:t:";
4649
int opt;
4750
while ((opt = getopt(argc, argv, options))!=-1) {
4851
switch(opt) {
@@ -55,6 +58,10 @@ int main(int argc, char* argv[]) {
5558
case 'p':
5659
maxPlayers = atoi(optarg);
5760
break;
61+
case 't':
62+
test = (P2PTest) atoi(optarg);
63+
assert(test < INVALID_P2P_TEST);
64+
break;
5865
default:
5966
abort();
6067
}
@@ -63,6 +70,7 @@ int main(int argc, char* argv[]) {
6370
printf("Max players: %d\n", maxPlayers);
6471
printf("# of hits: %d\n", hits);
6572
printf("Counter initial value: %d\n", initValue);
73+
printf("test: %d\n", test);
6674
}
6775

6876
am_status_t amStatus;
@@ -91,50 +99,28 @@ int main(int argc, char* argv[]) {
9199
unsigned int numGPUs = std::min((unsigned int)gpus.size(), maxPlayers);
92100

93101
char* hostPinned = nullptr;
102+
103+
std::atomic<unsigned int>* shared_counter = nullptr;
104+
std::atomic<unsigned int>* lock = nullptr;
94105

106+
switch(test) {
95107

108+
case LOCKFREE_ATOMIC_COUNTER:
109+
hostPinned = (char*) allocate_shared_mem(sizeof(std::atomic<unsigned int>), currentAccelerator);
110+
shared_counter = new(hostPinned) std::atomic<unsigned int>(initValue);
111+
break;
96112

113+
case LOCK_ATOMIC_COUNTER_SAME_CACHELINE:
114+
// create the counter and the lock on the same cacheline
115+
hostPinned = (char*) allocate_shared_mem(sizeof(std::atomic<unsigned int>)*2, currentAccelerator);
116+
shared_counter = new(hostPinned) std::atomic<unsigned int>(initValue);
117+
lock = new(hostPinned + sizeof(std::atomic<unsigned int>)) std::atomic<unsigned int>(0);
118+
break;
97119

98-
#if 1
99-
hostPinned = (char*) allocate_shared_mem(sizeof(std::atomic<unsigned int>), currentAccelerator);
100-
101-
#else
102-
#if USE_HC_AM
103-
hostPinned = hc::am_alloc(sizeof(std::atomic<unsigned int>), currentAccelerator
104-
, amHostCoherent
105-
);
106-
printf("shared memory address: %p\n",hostPinned);
107-
assert(hostPinned != nullptr);
108-
#else
109-
hsa_amd_memory_pool_t* alloc_region = static_cast<hsa_amd_memory_pool_t*>(currentAccelerator.get_hsa_am_finegrained_system_region());
110-
assert(alloc_region->handle != -1);
111-
112-
hsa_status_t hs;
113-
hs = hsa_amd_memory_pool_allocate(*alloc_region, sizeof(std::atomic<unsigned int>), 0, (void**)&hostPinned);
114-
assert(hs == HSA_STATUS_SUCCESS);
115-
116-
117-
hsa_agent_t agents[numGPUs];
118-
for (int i = 0; i < numGPUs; i++) {
119-
agents[i] = *(static_cast<hsa_agent_t*> (gpus[i].get_default_view().get_hsa_agent()));
120-
}
121-
hs = hsa_amd_agents_allow_access(numGPUs, agents, nullptr, hostPinned);
122-
assert(hs == HSA_STATUS_SUCCESS);
123-
#endif
124-
#endif
125-
126-
127-
128-
std::atomic<unsigned int>* shared_counter = new(hostPinned) std::atomic<unsigned int>(initValue);
129-
130-
131-
std::atomic<unsigned int>* lock = nullptr;
132-
if (useLock) {
133-
hostPinned = (char*) allocate_shared_mem(sizeof(std::atomic<unsigned int>), currentAccelerator);
134-
lock = new(hostPinned) std::atomic<unsigned int>(0);
120+
default:
121+
abort();
135122
}
136123

137-
138124
std::vector<hc::completion_future> futures;
139125
std::vector<hc::array_view<unsigned int,1>> finalValues;
140126

@@ -143,69 +129,110 @@ int main(int argc, char* argv[]) {
143129
hc::array_view<unsigned int,1> finalValue(1);
144130
finalValues.push_back(finalValue);
145131

146-
futures.push_back(
147-
hc::parallel_for_each(gpus[i].get_default_view()
132+
133+
switch (test) {
134+
case LOCKFREE_ATOMIC_COUNTER:
135+
136+
137+
futures.push_back(
138+
hc::parallel_for_each(gpus[i].get_default_view()
148139
, hc::extent<1>(1)
149140
, [=](hc::index<1> idx) [[hc]] {
150141

151-
// spin for a while here to ensure that all GPUs have started
152-
// and that each of them have loaded the inital value of
153-
// "shared_counter" into their cache
154-
#pragma nounroll
155-
for (int j = 0; j < (1024 * 1024 * 16); ++j) {
156-
if (shared_counter->load(std::memory_order_relaxed) == 0xFFFFFFFF)
157-
break;
158-
}
159-
160-
// counts how many times this GPU has updated the shared_counter
161-
unsigned int count = 0;
162-
163-
unsigned int gpuID = i;
164-
unsigned int next = initValue + gpuID;
165-
166-
// last known value of shared_counter observed by this GPU
167-
unsigned int last = shared_counter->load(std::memory_order_relaxed);
168-
169-
170-
// each GPU waits for its turn (according to the gpuID) to increment the shared_counter
171-
#pragma nounroll
172-
while (count < hits) {
173-
unsigned int expected = next;
174-
if (useLock) {
175-
unsigned int unlocked = 0;
176-
if (std::atomic_compare_exchange_weak_explicit(lock
177-
, &unlocked
178-
, (unsigned int)1
179-
, std::memory_order_seq_cst
180-
, std::memory_order_relaxed
181-
)) {
182-
183-
if (shared_counter->load(std::memory_order_relaxed) == expected) {
142+
// spin for a while here to ensure that all GPUs have started
143+
// and that each of them have loaded the inital value of
144+
// "shared_counter" into their cache
145+
#pragma nounroll
146+
for (int j = 0; j < (1024 * 1024 * 16); ++j) {
147+
if (shared_counter->load(std::memory_order_relaxed) == 0xFFFFFFFF)
148+
break;
149+
}
150+
151+
// counts how many times this GPU has updated the shared_counter
152+
unsigned int count = 0;
153+
154+
unsigned int gpuID = i;
155+
unsigned int next = initValue + gpuID;
156+
157+
// last known value of shared_counter observed by this GPU
158+
unsigned int last = shared_counter->load(std::memory_order_relaxed);
159+
160+
161+
// each GPU waits for its turn (according to the gpuID) to increment the shared_counter
162+
#pragma nounroll
163+
while (count < hits) {
164+
unsigned int expected = next;
165+
if (std::atomic_compare_exchange_weak_explicit(shared_counter
166+
, &expected
167+
, expected + 1
168+
, std::memory_order_seq_cst
169+
, std::memory_order_relaxed
170+
)) {
184171
last = expected;
185172
next+=numGPUs;
186173
count++;
187-
188-
shared_counter->store(expected + 1, std::memory_order_relaxed);
189-
lock->store(0, std::memory_order_release);
190174
}
175+
} // while(count < hits)
176+
finalValue[0] = last;
177+
})
178+
);
179+
break;
180+
181+
182+
case LOCK_ATOMIC_COUNTER_SAME_CACHELINE:
183+
184+
futures.push_back(
185+
hc::parallel_for_each(gpus[i].get_default_view()
186+
, hc::extent<1>(1)
187+
, [=](hc::index<1> idx) [[hc]] {
188+
189+
// spin for a while here to ensure that all GPUs have started
190+
// and that each of them have loaded the inital value of
191+
// "shared_counter" into their cache
192+
#pragma nounroll
193+
for (int j = 0; j < (1024 * 1024 * 16); ++j) {
194+
if (shared_counter->load(std::memory_order_relaxed) == 0xFFFFFFFF)
195+
break;
191196
}
192-
}
193-
else {
194-
if (std::atomic_compare_exchange_weak_explicit(shared_counter
195-
, &expected
196-
, expected + 1
197-
, std::memory_order_seq_cst
198-
, std::memory_order_relaxed
199-
)) {
200-
last = expected;
201-
next+=numGPUs;
202-
count++;
197+
198+
// counts how many times this GPU has updated the shared_counter
199+
unsigned int count = 0;
200+
201+
unsigned int gpuID = i;
202+
unsigned int next = initValue + gpuID;
203+
204+
// last known value of shared_counter observed by this GPU
205+
unsigned int last = shared_counter->load(std::memory_order_relaxed);
206+
207+
208+
// each GPU waits for its turn (according to the gpuID) to increment the shared_counter
209+
#pragma nounroll
210+
while (count < hits) {
211+
unsigned int expected = next;
212+
unsigned int unlocked = 0;
213+
if (std::atomic_compare_exchange_weak_explicit(lock
214+
, &unlocked
215+
, (unsigned int)1
216+
, std::memory_order_seq_cst
217+
, std::memory_order_relaxed
218+
)) {
219+
220+
if (shared_counter->load(std::memory_order_relaxed) == expected) {
221+
last = expected;
222+
next+=numGPUs;
223+
count++;
224+
shared_counter->store(expected + 1, std::memory_order_relaxed);
225+
}
226+
lock->store(0, std::memory_order_release);
227+
}
203228
}
204-
}
205-
}
206-
finalValue[0] = last;
207-
})
208-
);
229+
finalValue[0] = last;
230+
})
231+
);
232+
break;
233+
default:
234+
abort();
235+
}
209236

210237
std::cout << "GPU %" << i << "(" ;
211238
std::wcout<< gpus[i].get_description();
@@ -220,20 +247,12 @@ int main(int argc, char* argv[]) {
220247
, i, finalValues[i][0], initValue + (hits-1) * numGPUs + i);
221248
}
222249

223-
if (hostPinned) {
224-
#if 1
225-
hc::am_free(shared_counter);
226-
if (useLock) {
227-
hc::am_free(lock);
228-
}
229-
#else
230-
#if USE_HC_AM
231-
hc::am_free(hostPinned);
232-
#else
233-
hs = hsa_amd_memory_pool_free(hostPinned);
234-
assert(hs == HSA_STATUS_SUCCESS);
235-
#endif
236-
#endif
250+
switch(test) {
251+
case LOCKFREE_ATOMIC_COUNTER:
252+
case LOCK_ATOMIC_COUNTER_SAME_CACHELINE:
253+
hc::am_free(shared_counter);
254+
break;
255+
default: ;
237256
}
238257

239258
return 0;

0 commit comments

Comments
 (0)