Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
45 commits
Select commit Hold shift + click to select a range
d7808a8
lib/Support/Pipeline.cpp: Add optional field Fill and Size.
ArberSephirotheca Jun 30, 2025
96f5b4e
add memory convergence tests
ArberSephirotheca Jul 14, 2025
600bb81
update tests
ArberSephirotheca Jul 14, 2025
ef79474
add generated tests
ArberSephirotheca Aug 17, 2025
57ba801
reduce program
ArberSephirotheca Aug 18, 2025
4c64875
update reduced tests
ArberSephirotheca Aug 19, 2025
4a1a4e8
update tests for wave size of 64
ArberSephirotheca Aug 19, 2025
2422167
push more tests with wave size of 64
ArberSephirotheca Aug 19, 2025
f5b21e4
update more wave64 tests
ArberSephirotheca Aug 20, 2025
6db22a6
add more tests
ArberSephirotheca Aug 20, 2025
65a341b
more tests
ArberSephirotheca Aug 20, 2025
59f7ca2
more tests
ArberSephirotheca Aug 20, 2025
23b70fc
tests
ArberSephirotheca Aug 20, 2025
54265ab
more tests
ArberSephirotheca Aug 20, 2025
8959296
more tests
ArberSephirotheca Aug 20, 2025
b79aa01
more tess
ArberSephirotheca Aug 20, 2025
6575531
more tests
ArberSephirotheca Aug 21, 2025
5b1eb7a
more tests
ArberSephirotheca Aug 21, 2025
9bb4ad6
tests
ArberSephirotheca Aug 21, 2025
7d6fe67
wave size 64 tests
ArberSephirotheca Aug 21, 2025
b48ff4a
remove invalid tests
ArberSephirotheca Aug 21, 2025
66bf7d6
more tests
ArberSephirotheca Aug 22, 2025
b7bf815
waveSize 16 tests
ArberSephirotheca Aug 23, 2025
1d73130
wave size 16 tests
ArberSephirotheca Aug 23, 2025
d159b05
test wave size
ArberSephirotheca Aug 23, 2025
d71fb7b
fix typo
ArberSephirotheca Aug 23, 2025
15f7235
add more test
ArberSephirotheca Aug 24, 2025
ad956d6
add cfg file
ArberSephirotheca Aug 24, 2025
2baa055
add more tests for wave size 64
ArberSephirotheca Aug 25, 2025
e883551
add new buffer match rule to work with bittracking
ArberSephirotheca Aug 26, 2025
d1c48bf
update seeds for reproducibility
ArberSephirotheca Aug 26, 2025
3295c7f
update error message for testBufferParticipantPattern
ArberSephirotheca Aug 27, 2025
acab2f0
add more wave size 4 threads 4 tests
ArberSephirotheca Aug 27, 2025
204cf8e
tests wave op under 4 threads
ArberSephirotheca Aug 27, 2025
f8dbefb
update to use the correct binding index
ArberSephirotheca Aug 27, 2025
c1ee262
update to use correct binding
ArberSephirotheca Aug 27, 2025
a177ad9
more tests
ArberSephirotheca Aug 27, 2025
28d920c
more tests without reconvergence bug
ArberSephirotheca Aug 27, 2025
80188e6
fix loop issues
ArberSephirotheca Aug 27, 2025
e89da0e
formatting
ArberSephirotheca Aug 28, 2025
abfaebf
formatting, additional fields to setData
ArberSephirotheca Aug 28, 2025
e82ce96
formatting
ArberSephirotheca Aug 28, 2025
80dd2b3
make clang-tidy happy and remove invalid tests
ArberSephirotheca Aug 28, 2025
aae7c09
add tests fore wave size 32
ArberSephirotheca Aug 28, 2025
e196571
upload tests for wave size 16 and 64
ArberSephirotheca Aug 31, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ autoconf/autom4te.cache
.zed
# pythonenv for github Codespaces
pythonenv*
seeds
# clangd index. (".clangd" is a config file now, thus trailing slash)
.clangd/
.cache
46 changes: 46 additions & 0 deletions example_participant_pattern_test.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# Example test case using BufferParticipantPattern rule for WaveParticipantBitTracking
---
Shaders:
- Stage: Compute
Entry: main
DispatchSize: [8, 1, 1] # 8 threads
Buffers:
# Actual output from the shader (may have patterns in any order)
- Name: ParticipantOutput
Format: UInt32
ZeroInitSize: 96 # Space for multiple patterns (3 uint32 per pattern)

# Expected patterns - order doesn't matter, but pattern counts must match
- Name: ExpectedPatterns
Format: UInt32
Data: [
# Pattern 1: Wave op ID 69, loop iteration 0, participants 0,1,2,3
4416, 0x000F, 0x0000, # (69<<6)|0, mask for threads 0-3, high mask
4416, 0x000F, 0x0000, # Duplicate 1
4416, 0x000F, 0x0000, # Duplicate 2
4416, 0x000F, 0x0000, # Duplicate 3 (4 participants = 4 copies)

# Pattern 2: Wave op ID 70, loop iteration 1, participants 4,5,6,7
4496, 0x00F0, 0x0000, # (70<<6)|(1<<4), mask for threads 4-7, high mask
4496, 0x00F0, 0x0000, # Duplicate 1
4496, 0x00F0, 0x0000, # Duplicate 2
4496, 0x00F0, 0x0000, # Duplicate 3 (4 participants = 4 copies)
]

Results:
- Result: ValidateParticipantPatterns
Rule: BufferParticipantPattern
GroupSize: 3 # Each pattern consists of 3 uint32 values
Actual: ParticipantOutput
Expected: ExpectedPatterns

DescriptorSets:
- Resources:
- Name: ParticipantOutput
Kind: RWBuffer
DirectXBinding:
Register: 0
Space: 0
VulkanBinding:
Binding: 0
...
4 changes: 3 additions & 1 deletion include/Support/Pipeline.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ namespace offloadtest {

enum class Stages { Compute };

enum class Rule { BufferExact, BufferFloatULP, BufferFloatEpsilon };
enum class Rule { BufferExact, BufferFloatULP, BufferFloatEpsilon, BufferParticipantPattern };

enum class DenormMode { Any, FTZ, Preserve };

Expand Down Expand Up @@ -131,6 +131,7 @@ struct Result {
DenormMode DM = DenormMode::Any;
unsigned ULPT; // ULP Tolerance
double Epsilon;
unsigned GroupSize = 0; // For BufferParticipantPattern rule
};

struct Resource {
Expand Down Expand Up @@ -357,6 +358,7 @@ template <> struct ScalarEnumerationTraits<offloadtest::Rule> {
ENUM_CASE(BufferExact);
ENUM_CASE(BufferFloatULP);
ENUM_CASE(BufferFloatEpsilon);
ENUM_CASE(BufferParticipantPattern);
#undef ENUM_CASE
}
};
Expand Down
136 changes: 132 additions & 4 deletions lib/Support/Check.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@
#include "llvm/Support/Error.h"
#include "llvm/Support/raw_ostream.h"
#include <cmath>
#include <map>
#include <sstream>
#include <tuple>

constexpr uint16_t Float16BitSign = 0x8000;
constexpr uint16_t Float16BitExp = 0x7c00;
Expand Down Expand Up @@ -277,6 +279,127 @@ static bool testBufferFloatULP(offloadtest::Buffer *B1, offloadtest::Buffer *B2,
return false;
}

static bool testBufferParticipantPattern(offloadtest::Buffer *B1,
offloadtest::Buffer *B2,
unsigned GroupSize,
std::string &ErrorMsg) {
// Expect 3 x uint32_t: (combinedId, maskLow, maskHigh)
if (GroupSize == 0) {
ErrorMsg = "Invalid GroupSize (must be > 0)";
return false;
}

// Basic structural checks similar to testBufferExact
if (B1->ArraySize != B2->ArraySize || B1->size() != B2->size()) {
ErrorMsg = "Mismatched buffer shape (ArraySize or per-chunk size differs)";
return false;
}

// We operate on 32-bit words
if ((B1->size() % sizeof(uint32_t)) != 0) {
ErrorMsg = "Chunk size is not a multiple of 4 bytes";
return false;
}
if ((B2->size() % sizeof(uint32_t)) != 0) {
ErrorMsg = "Expected chunk size is not a multiple of 4 bytes";
return false;
}

const uint32_t WordsPerChunk =
static_cast<uint32_t>(B1->size() / sizeof(uint32_t));
if (WordsPerChunk % GroupSize != 0) {
ErrorMsg = "Words per chunk must be a multiple of GroupSize";
return false;
}

using PatternTuple = std::tuple<uint32_t, uint32_t, uint32_t>;
std::map<PatternTuple, unsigned> ActualPatterns;
std::map<PatternTuple, unsigned> ExpectedPatterns;

auto ReadU32 = [](const char *Base, uint32_t WordIndex) -> uint32_t {
uint32_t V;
std::memcpy(&V, Base + WordIndex * sizeof(uint32_t), sizeof(uint32_t));
return V;
};

// Accumulate patterns from all chunks
auto *B1It = B1->Data.begin();
auto *B2It = B2->Data.begin();
for (; B1It != B1->Data.end() && B2It != B2->Data.end(); ++B1It, ++B2It) {
const char *ABuf = B1It->get(); // unique_ptr<char[]> -> char*
const char *EBuf = B2It->get();

for (uint32_t I = 0; I + GroupSize <= WordsPerChunk; I += GroupSize) {
if (GroupSize == 3) {
// Actual
const PatternTuple Ap(ReadU32(ABuf, I + 0), ReadU32(ABuf, I + 1),
ReadU32(ABuf, I + 2));
++ActualPatterns[Ap];

// Expected
const PatternTuple Ep(ReadU32(EBuf, I + 0), ReadU32(EBuf, I + 1),
ReadU32(EBuf, I + 2));
++ExpectedPatterns[Ep];
} else {
// If you plan to support other group sizes later, handle here.
}
}
}

// Compare pattern multisets
std::stringstream Ss;
bool HasError = false;

if (ActualPatterns.size() != ExpectedPatterns.size()) {
Ss << "Pattern kind count mismatch: actual has " << ActualPatterns.size()
<< " unique patterns, expected has " << ExpectedPatterns.size()
<< " unique patterns\n";
HasError = true;
}

// Missing / count-mismatched patterns
for (const auto &[pattern, expCount] : ExpectedPatterns) {
auto It = ActualPatterns.find(pattern);
if (It == ActualPatterns.end()) {
if (!HasError)
Ss << "Pattern differences found:\n";
HasError = true;
Ss << " Missing pattern (combineId=" << std::get<0>(pattern)
<< ", maskLow=0x" << std::hex << std::get<1>(pattern)
<< ", maskHigh=0x" << std::get<2>(pattern) << std::dec
<< ") - expected count: " << expCount << ", actual count: 0\n";
} else if (It->second != expCount) {
if (!HasError)
Ss << "Pattern differences found:\n";
HasError = true;
Ss << " Pattern (combineId=" << std::get<0>(pattern) << ", maskLow=0x"
<< std::hex << std::get<1>(pattern) << ", maskHigh=0x"
<< std::get<2>(pattern) << std::dec
<< ") - expected count: " << expCount
<< ", actual count: " << It->second << "\n";
}
}

// Unexpected patterns
for (const auto &[pattern, actCount] : ActualPatterns) {
if (ExpectedPatterns.find(pattern) == ExpectedPatterns.end()) {
if (!HasError)
Ss << "Pattern differences found:\n";
HasError = true;
Ss << " Unexpected pattern (combineId=" << std::get<0>(pattern)
<< ", maskLow=0x" << std::hex << std::get<1>(pattern)
<< ", maskHigh=0x" << std::get<2>(pattern) << std::dec
<< ") - expected count: 0, actual count: " << actCount << "\n";
}
}

if (HasError) {
ErrorMsg = Ss.str();
return false;
}
return true;
}

template <typename T>
static std::string bitPatternAsHex64(const T &Val,
offloadtest::Rule ComparisonRule) {
Expand Down Expand Up @@ -391,10 +514,15 @@ llvm::Error verifyResult(offloadtest::Result R) {
case offloadtest::Rule::BufferFloatEpsilon: {
if (testBufferFloatEpsilon(R.ActualPtr, R.ExpectedPtr, R.Epsilon, R.DM))
return llvm::Error::success();

std::ostringstream Oss;
Oss << std::defaultfloat << R.Epsilon;
OS << "Comparison Rule: BufferFloatEpsilon\nEpsilon: " << Oss.str() << "\n";
break;
}
case offloadtest::Rule::BufferParticipantPattern: {
std::string ErrorMsg;
if (testBufferParticipantPattern(R.ActualPtr, R.ExpectedPtr, R.GroupSize,
ErrorMsg))
return llvm::Error::success();
// Return error with detailed message
OS << "Comparison Rule: BufferParticipantPattern\n" << ErrorMsg << "\n";
break;
}
}
Expand Down
67 changes: 52 additions & 15 deletions lib/Support/Pipeline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -108,28 +108,47 @@ void MappingTraits<offloadtest::DescriptorSet>::mapping(
template <typename T> static void setData(IO &I, offloadtest::Buffer &B) {
if (I.outputting()) {
if (B.ArraySize == 1) {
// single buffer output
llvm::MutableArrayRef<T> Arr(reinterpret_cast<T *>(B.Data.back().get()),
B.Size / sizeof(T));
I.mapRequired("Data", Arr);
} else {
// array of buffers output
llvm::SmallVector<llvm::MutableArrayRef<T>> Arrays;
Arrays.reserve(B.ArraySize);
for (const auto &D : B.Data)
Arrays.emplace_back(reinterpret_cast<T *>(D.get()), B.Size / sizeof(T));
I.mapRequired("Data", Arrays);
}
return;
}

// zero-initialized buffer(s)
int64_t ZeroInitSize;
int64_t ZeroInitSize = 0;
int64_t SizeElems = 0;
std::optional<T> Fill;
I.mapOptional("ZeroInitSize", ZeroInitSize, 0);
I.mapOptional("Fill", Fill);
I.mapOptional("Size", SizeElems, 0);

if (ZeroInitSize > 0) {
B.Size = ZeroInitSize;
for (uint32_t I = 0; I < B.ArraySize; I++) {
B.Data.clear();
for (uint32_t Idx = 0; Idx < B.ArraySize; ++Idx) {
B.Data.push_back(std::make_unique<char[]>(B.Size));
memset(B.Data.back().get(), 0, B.Size);
std::memset(B.Data.back().get(), 0, B.Size);
}
return;
}

if (Fill.has_value()) {
if (SizeElems == 0) {
I.setError("'Size' must be provided when using 'Fill'");
return;
}
B.Size = SizeElems * sizeof(T);
B.Data.clear();
for (uint32_t Idx = 0; Idx < B.ArraySize; ++Idx) {
B.Data.push_back(std::make_unique<char[]>(B.Size));
std::fill_n(reinterpret_cast<T *>(B.Data.back().get()), SizeElems,
Fill.value());
}
return;
}
Expand All @@ -139,29 +158,43 @@ template <typename T> static void setData(IO &I, offloadtest::Buffer &B) {
llvm::SmallVector<T, 64> Arr;
I.mapRequired("Data", Arr);
B.Size = Arr.size() * sizeof(T);
B.Data.clear();
B.Data.push_back(std::make_unique<char[]>(B.Size));
memcpy(B.Data.back().get(), Arr.data(), B.Size);
std::memcpy(B.Data.back().get(), Arr.data(), B.Size);
return;
}

// array of buffers input
llvm::SmallVector<llvm::SmallVector<T>> Arrays;
I.mapRequired("Data", Arrays);
B.Size = Arrays.back().size() * sizeof(T);

uint32_t ActualSize = 0;
for (auto Arr : Arrays) {
if (Arrays.size() != B.ArraySize) {
I.setError(llvm::Twine("Expected ") + std::to_string(B.ArraySize) +
" buffers, found " + std::to_string(Arrays.size()));
return;
}

if (Arrays.empty()) {
B.Size = 0;
B.Data.clear();
for (uint32_t Idx = 0; Idx < B.ArraySize; ++Idx)
B.Data.push_back(std::make_unique<char[]>(0));
return;
}

B.Size = Arrays.front().size() * sizeof(T);
for (const auto &Arr : Arrays) {
if (Arr.size() * sizeof(T) != B.Size) {
I.setError("All buffers must have the same size.");
return;
}
}

B.Data.clear();
for (const auto &Arr : Arrays) {
B.Data.push_back(std::make_unique<char[]>(B.Size));
memcpy(B.Data.back().get(), Arr.data(), B.Size);
ActualSize++;
std::memcpy(B.Data.back().get(), Arr.data(), B.Size);
}
if (ActualSize != B.ArraySize)
I.setError(Twine("Expected ") + std::to_string(B.ArraySize) +
" buffers, found " + std::to_string(ActualSize));
}

// Counter(s) can contain one counter value for a singular resource
Expand Down Expand Up @@ -347,6 +380,10 @@ void MappingTraits<offloadtest::Result>::mapping(IO &I,
I.mapOptional("DenormMode", R.DM);
break;
}
case Rule::BufferParticipantPattern: {
I.mapRequired("GroupSize", R.GroupSize);
break;
}
default:
break;
}
Expand Down
Loading
Loading