Skip to content
This repository was archived by the owner on Mar 28, 2023. It is now read-only.

[SYCL][L0] Add tests for reuse of l0 events in plugin #967

Draft
wants to merge 4 commits into
base: intel
Choose a base branch
from
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 28 additions & 2 deletions SYCL/Plugin/level-zero-event-leak.cpp
Original file line number Diff line number Diff line change
@@ -3,7 +3,12 @@
// UNSUPPORTED: windows
//
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %level_zero_options %s -o %t.out
// RUN: env SYCL_DEVICE_FILTER=level_zero ZE_DEBUG=4 %GPU_RUN_PLACEHOLDER %t.out 2>&1 %GPU_CHECK_PLACEHOLDER
// RUN: env SYCL_DEVICE_FILTER=level_zero ZE_DEBUG=4 %GPU_RUN_PLACEHOLDER %t.out wait 2>&1 %GPU_CHECK_PLACEHOLDER
// RUN: env SYCL_DEVICE_FILTER=level_zero ZE_DEBUG=4 %GPU_RUN_PLACEHOLDER %t.out nowait 2>&1 %GPU_CHECK_PLACEHOLDER
//
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %level_zero_options %s -DCHECK_INORDER -o %t.inorder.out
// RUN: env SYCL_DEVICE_FILTER=level_zero ZE_DEBUG=4 %GPU_RUN_PLACEHOLDER %t.inorder.out wait 2>&1 %GPU_CHECK_PLACEHOLDER
// RUN: env SYCL_DEVICE_FILTER=level_zero ZE_DEBUG=4 %GPU_RUN_PLACEHOLDER %t.inorder.out nowait 2>&1 %GPU_CHECK_PLACEHOLDER
//
// CHECK-NOT: LEAK

@@ -36,10 +41,31 @@
#include <CL/sycl.hpp>
using namespace cl;
int main(int argc, char **argv) {
assert(argc == 2 && "Invalid number of arguments");
std::string use_queue_finish(argv[1]);

bool Use = false;
if (use_queue_finish == "wait") {
Use = true;
std::cerr << "Use queue::wait" << std::endl;
} else if (use_queue_finish == "nowait") {
std::cerr << "No wait. Ensure resources are released anyway" << std::endl;
} else {
assert(0 && "Unsupported parameter value");
}

#ifdef CHECK_INORDER
sycl::queue Q({sycl::property::queue::in_order{}});
#else
sycl::queue Q;
#endif

const unsigned n_chunk = 1000;
for (int i = 0; i < n_chunk; i++)
Q.single_task([=]() {});
Q.wait();

if (Use)
Q.wait();

return 0;
}
128 changes: 128 additions & 0 deletions SYCL/Plugin/level_zero_inorder.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
// REQUIRES: level_zero
//
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
//
// RUN: env SYCL_PI_LEVEL_ZERO_BATCH_SIZE=0 SYCL_DEVICE_FILTER=level_zero %GPU_RUN_PLACEHOLDER %t.out
// RUN: env SYCL_PI_LEVEL_ZERO_BATCH_SIZE=1 SYCL_DEVICE_FILTER=level_zero %GPU_RUN_PLACEHOLDER %t.out
// RUN: env SYCL_PI_LEVEL_ZERO_BATCH_SIZE=2 SYCL_DEVICE_FILTER=level_zero %GPU_RUN_PLACEHOLDER %t.out
// RUN: env SYCL_PI_LEVEL_ZERO_BATCH_SIZE=3 SYCL_DEVICE_FILTER=level_zero %GPU_RUN_PLACEHOLDER %t.out
//
// The test checks that the kernels are executed in-order, regardless of
// batching. IMPORTANT NOTE: this is a critical test, double-check if your
// changes are related to L0 events and links between commands.

#include <CL/sycl.hpp>
#include <cassert>
#include <iostream>
#include <numeric>

static constexpr int MAGIC_NUM1 = 2;
static constexpr int buffer_size = 100;
sycl::usm::alloc AllocType = sycl::usm::alloc::shared;

const size_t PartSize = 5;
const bool PartiallyPrint = buffer_size > 2 * PartSize;

void ValidationPrint(const std::string &vectName, int *vect,
const std::function<int(size_t)> &ExpectedVal) {
std::cerr << vectName;
if (!PartiallyPrint) {
for (size_t i = 0u; i < buffer_size; ++i) {
std::cerr << " " << vect[i];
}
} else {
for (size_t i = 0u; i < PartSize; ++i) {
std::cerr << " " << vect[i];
}
std::cerr << " ... ";
for (size_t i = buffer_size - PartSize; i < buffer_size; ++i) {
std::cerr << " " << vect[i];
}
}

std::cerr << std::endl << "expected[] = ";
if (!PartiallyPrint) {
for (size_t i = 0u; i < buffer_size; ++i) {
std::cerr << " " << ExpectedVal(i);
}
} else {
for (size_t i = 0u; i < PartSize; ++i) {
std::cerr << " " << ExpectedVal(i);
}
std::cerr << " ... ";
for (size_t i = buffer_size - PartSize; i < buffer_size; ++i) {
std::cerr << " " << ExpectedVal(i);
}
}
std::cerr << std::endl;
for (int i = 0; i < buffer_size; ++i) {
if (vect[i] != ExpectedVal(i)) {
std::cerr << "i = " << i << " is wrong!!! " << std::endl;
break;
}
}
std::cerr << std::endl;
}

void IfTrueIncrementByValue(sycl::queue Q, sycl::range<1> Range, int *Harray,
int ValueToCheck, int ValueToIncrement) {
Q.submit([&](sycl::handler &CGH) {
CGH.parallel_for<class increment_usm>(Range, [=](sycl::item<1> itemID) {
size_t i = itemID.get_id(0);
if (Harray[i] == ValueToCheck) {
Harray[i] += ValueToIncrement;
}
});
});
}

void RunCalculation(sycl::queue Q) {
sycl::range<1> Range(buffer_size);
auto Dev = Q.get_device();
if (!Dev.has(sycl::aspect::usm_shared_allocations))
return;

int *values = sycl::malloc<int>(buffer_size, Dev, Q.get_context(), AllocType);

try {
Q.submit([&](sycl::handler &cgh) {
cgh.parallel_for(Range, [=](sycl::item<1> itemID) {
size_t i = itemID.get_id(0);
values[i] = 1;
});
});

IfTrueIncrementByValue(Q, Range, values, 1, 10);

IfTrueIncrementByValue(Q, Range, values, 11, 100);

IfTrueIncrementByValue(Q, Range, values, 111, 1000);

IfTrueIncrementByValue(Q, Range, values, 1111, 10000);

IfTrueIncrementByValue(Q, Range, values, 11111, 100000);

Q.wait();

ValidationPrint("vector[] = ", values, [&](size_t i) { return 111111; });

for (int i = 0; i < buffer_size; ++i) {
int expected = 111111;
assert(values[i] == expected);
}

} catch (sycl::exception &e) {
std::cout << "Exception: " << std::string(e.what()) << std::endl;
}

free(values, Q);
}

int main(int argc, char *argv[]) {
sycl::queue Q({sycl::property::queue::in_order{}});

RunCalculation(Q);

std::cout << "The test passed." << std::endl;
return 0;
}
159 changes: 159 additions & 0 deletions SYCL/Plugin/level_zero_inorder_interleaving_kernel_copy.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
// REQUIRES: level_zero
//
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
//
// RUN: env SYCL_PI_LEVEL_ZERO_BATCH_SIZE=0 SYCL_DEVICE_FILTER=level_zero %GPU_RUN_PLACEHOLDER %t.out
// RUN: env SYCL_PI_LEVEL_ZERO_BATCH_SIZE=1 SYCL_DEVICE_FILTER=level_zero %GPU_RUN_PLACEHOLDER %t.out
// RUN: env SYCL_PI_LEVEL_ZERO_BATCH_SIZE=2 SYCL_DEVICE_FILTER=level_zero %GPU_RUN_PLACEHOLDER %t.out
// RUN: env SYCL_PI_LEVEL_ZERO_BATCH_SIZE=3 SYCL_DEVICE_FILTER=level_zero %GPU_RUN_PLACEHOLDER %t.out
//
// The test checks that interleaving using copy and kernel operations are
// performed in-order, regardless of batching. IMPORTANT NOTE: this is a
// critical test, double-check if your changes are related to L0 events and
// links between commands.

#include <CL/sycl.hpp>
#include <cassert>
#include <iostream>
#include <numeric>

static constexpr int MAGIC_NUM1 = 2;
static constexpr int buffer_size = 100;
sycl::usm::alloc AllocType = sycl::usm::alloc::device;

const size_t PartSize = 5;
const bool PartiallyPrint = buffer_size > 2 * PartSize;

void ValidationPrint(const std::string &vectName, const std::vector<int> &vect,
const std::function<int(size_t)> &ExpectedVal) {
std::cerr << vectName;
if (!PartiallyPrint) {
for (size_t i = 0u; i < buffer_size; ++i) {
std::cerr << " " << vect[i];
}
} else {
for (size_t i = 0u; i < PartSize; ++i) {
std::cerr << " " << vect[i];
}
std::cerr << " ... ";
for (size_t i = buffer_size - PartSize; i < buffer_size; ++i) {
std::cerr << " " << vect[i];
}
}

std::cerr << std::endl << "expected[] = ";
if (!PartiallyPrint) {
for (size_t i = 0u; i < buffer_size; ++i) {
std::cerr << " " << ExpectedVal(i);
}
} else {
for (size_t i = 0u; i < PartSize; ++i) {
std::cerr << " " << ExpectedVal(i);
}
std::cerr << " ... ";
for (size_t i = buffer_size - PartSize; i < buffer_size; ++i) {
std::cerr << " " << ExpectedVal(i);
}
}
std::cerr << std::endl;
for (int i = 0; i < buffer_size; ++i) {
if (vect[i] != ExpectedVal(i)) {
std::cerr << "i = " << i << " is wrong!!! " << std::endl;
break;
}
}
std::cerr << std::endl;
}

void RunCalculation(sycl::queue Q) {
sycl::range<1> Range(buffer_size);
auto Dev = Q.get_device();
if (!Dev.has(sycl::aspect::usm_device_allocations))
return;

int *Dvalues =
sycl::malloc<int>(buffer_size, Dev, Q.get_context(), AllocType);
int *DvaluesTmp =
sycl::malloc<int>(buffer_size, Dev, Q.get_context(), AllocType);

std::vector<int> Hvalues1(buffer_size, 0);
std::vector<int> HvaluesTmp(buffer_size, 0);
std::iota(Hvalues1.begin(), Hvalues1.end(), 0);

try {
Q.memcpy(Dvalues, Hvalues1.data(), buffer_size * sizeof(int));

Q.submit([&](sycl::handler &cgh) {
cgh.parallel_for(Range, [=](sycl::item<1> itemID) {
size_t i = itemID.get_id(0);
if (Dvalues[i] == i)
Dvalues[i] += 1;
});
});

Q.submit([&](sycl::handler &cgh) {
cgh.parallel_for(Range, [=](sycl::item<1> itemID) {
size_t i = itemID.get_id(0);
if (Dvalues[i] == i + 1)
Dvalues[i] += 10;
});
});

Q.memcpy(Hvalues1.data(), Dvalues, buffer_size * sizeof(int));
Q.memcpy(DvaluesTmp, Hvalues1.data(), buffer_size * sizeof(int));

Q.submit([&](sycl::handler &cgh) {
cgh.parallel_for(Range, [=](sycl::item<1> itemID) {
size_t i = itemID.get_id(0);
if (Dvalues[i] == i + 11)
if (DvaluesTmp[i] == i + 11)
Dvalues[i] += 100;
});
});

Q.submit([&](sycl::handler &cgh) {
cgh.parallel_for(Range, [=](sycl::item<1> itemID) {
size_t i = itemID.get_id(0);
if (Dvalues[i] == i + 111)
Dvalues[i] += 1000;
});
});

Q.submit([&](sycl::handler &cgh) {
cgh.parallel_for(Range, [=](sycl::item<1> itemID) {
size_t i = itemID.get_id(0);
if (Dvalues[i] == i + 1111)
Dvalues[i] += 10000;
});
});

Q.memcpy(Hvalues1.data(), Dvalues, buffer_size * sizeof(int));
Q.memcpy(HvaluesTmp.data(), DvaluesTmp, buffer_size * sizeof(int));
Q.wait();

ValidationPrint("vector1[] = ", Hvalues1,
[&](size_t i) { return i + 11111; });
ValidationPrint("vector2[] = ", HvaluesTmp,
[&](size_t i) { return i + 11; });

for (int i = 0; i < buffer_size; ++i) {
int expected = i + 11111;
assert(Hvalues1[i] == expected);
}

} catch (sycl::exception &e) {
std::cout << "Exception: " << std::string(e.what()) << std::endl;
}

free(Dvalues, Q);
free(DvaluesTmp, Q);
}

int main(int argc, char *argv[]) {
sycl::queue Q({sycl::property::queue::in_order{}});

RunCalculation(Q);

std::cout << "The test passed." << std::endl;
return 0;
}