Skip to content

Implement lane-wise modulo. #86

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 44 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 33 commits
Commits
Show all changes
44 commits
Select commit Hold shift + click to select a range
295d602
new literals
jamierpond Apr 16, 2024
dbf8115
PR feedback
jamierpond Apr 19, 2024
2327807
tidy tests
jamierpond Apr 19, 2024
7c10d29
tidy reused code for booleanswaer
jamierpond Apr 19, 2024
0b8b214
update name
jamierpond Apr 19, 2024
e3a802d
style
jamierpond Apr 19, 2024
7ed6e88
test style
jamierpond Apr 19, 2024
03edd02
fix
jamierpond Apr 19, 2024
0648bb1
undef util
jamierpond Apr 19, 2024
0d0a833
simplify again
jamierpond Apr 19, 2024
33183cd
fmt
jamierpond Apr 19, 2024
a7d744d
Add to Array
jamierpond Apr 19, 2024
06af01c
rename
jamierpond Apr 20, 2024
ffc1120
nailed it
jamierpond Apr 20, 2024
5b837e6
cleanup
jamierpond Apr 20, 2024
c97318f
array tests?
jamierpond Apr 20, 2024
3dd0296
indentaion
jamierpond Apr 20, 2024
da9ccb9
update indent
jamierpond Apr 20, 2024
da64b7d
from array
jamierpond Apr 20, 2024
11ccd9e
more updates
jamierpond May 13, 2024
aab033a
wip
jamierpond May 15, 2024
ba6a5ba
rm unused
jamierpond May 15, 2024
f3d9f42
respect 80 chars
jamierpond May 15, 2024
783f189
undo clang formatting
jamierpond May 15, 2024
f9e28b1
format boolean swar
jamierpond May 15, 2024
d636521
improvementws
jamierpond May 26, 2024
d880691
indentation
jamierpond May 26, 2024
bbad583
make sure we understand equality
jamierpond May 26, 2024
f6a04f1
works
jamierpond May 26, 2024
e39d298
implement modulo
jamierpond May 26, 2024
0ca3600
modulo works!
jamierpond May 26, 2024
f7ffe70
snifae
jamierpond May 26, 2024
53e56f5
better example
jamierpond May 26, 2024
b62355a
Update SWAR.h
thecppzoo May 26, 2024
bf93d56
Update SWAR.h
thecppzoo May 26, 2024
79d3847
Attempt to sidestep MSVC bug
thecppzoo May 26, 2024
a585ad9
camelCawe
jamierpond May 27, 2024
711bb08
Merge branch 'jp/swar-to-array' into jp/ml-basics
jamierpond May 27, 2024
f1452cb
indentatino
jamierpond May 27, 2024
f45632a
auto
jamierpond May 27, 2024
79148d3
add utils
jamierpond May 27, 2024
08a73e1
tidy up
jamierpond May 27, 2024
0228573
weren't using that anyway
jamierpond May 27, 2024
0829a7e
tidy up tests
jamierpond May 27, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 76 additions & 8 deletions inc/zoo/swar/SWAR.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

#include "zoo/meta/log.h"

#include <array>
#include <type_traits>

#ifdef _MSC_VER
Expand All @@ -12,6 +13,16 @@

namespace zoo { namespace swar {

template <int NBits, typename T>
struct SWAR;

template <int NumBits, typename BaseType> struct Literals_t {
constexpr static void (SWAR<NumBits, BaseType>::*value)() = nullptr;
};

template <int NumBits, typename BaseType>
constexpr Literals_t<NumBits, BaseType> Literals{};

using u64 = uint64_t;
using u32 = uint32_t;
using u16 = uint16_t;
Expand Down Expand Up @@ -52,6 +63,7 @@ constexpr std::make_unsigned_t<T> lsbIndex(T v) noexcept {
template<int NBits_, typename T = uint64_t>
struct SWAR {
using type = std::make_unsigned_t<T>;
constexpr static auto Literal = Literals<NBits_, T>;
constexpr static inline type
NBits = NBits_,
BitWidth = sizeof(T) * 8,
Expand All @@ -62,13 +74,53 @@ struct SWAR {
AllOnes = ~std::make_unsigned_t<T>{0} >> PaddingBitsCount, // Also constructed in RobinHood utils: possible bug?
LeastSignificantBit = meta::BitmaskMaker<T, std::make_unsigned_t<T>{1}, NBits>::value,
MostSignificantBit = LeastSignificantBit << (NBits - 1),
LeastSignificantLaneMask =
sizeof(T) * 8 == NBits ? // needed to avoid shifting all bits
~T(0) :
~(~T(0) << NBits),
LeastSignificantLaneMask = []() {
if constexpr (NBits < sizeof(T) * 8) {
return (T(1) << NBits) - 1;
} else {
return ~T(0);
}
}(),
// Use LowerBits in favor of ~MostSignificantBit to not pollute
// "don't care" bits when non-power-of-two bit lane sizes are supported
LowerBits = MostSignificantBit - LeastSignificantBit;
LowerBits = MostSignificantBit - LeastSignificantBit,
MaxUnsignedLaneValue = LeastSignificantLaneMask;

template <typename InputIt>
constexpr static auto from_range(InputIt first, InputIt last) noexcept {
auto result = T{0};
for (; first != last; ++first) {
result = (result << NBits) | *first;
}
return result;
}

template <typename U>
constexpr static auto from_array(const U (&values)[Lanes]) noexcept {
using std::begin; using std::end;
return SWAR{from_range(begin(values), end(values))};
}

template <typename U>
constexpr static auto from_array(const std::array<T, Lanes> &values) noexcept {
using std::begin; using std::end;
return SWAR{from_range(begin(values), end(values))};
}

constexpr SWAR(const std::array<T, Lanes> &array) : m_v{from_range(array.begin(), array.end())} {}

template <typename Arg, std::size_t N, typename = std::enable_if_t<N == Lanes, int>>
constexpr
SWAR(Literals_t<NBits, T>, const Arg (&values)[N]) : m_v{from_array(values)} {}

constexpr std::array<T, Lanes> to_array() const noexcept {
std::array<T, Lanes> result = {};
for (int i = 0; i < Lanes; ++i) {
auto otherEnd = Lanes - i - 1;
result[otherEnd] = at(i);
}
return result;
}

SWAR() = default;
constexpr explicit SWAR(T v): m_v(v) {}
Expand Down Expand Up @@ -161,6 +213,12 @@ struct SWAR {
T m_v;
};

template <int NBits, typename T, typename Arg>
SWAR(Literals_t<NBits, T>, const Arg (&values)[SWAR<NBits, T>::Lanes]) -> SWAR<NBits, T>;

template <int NBits, typename T>
SWAR(Literals_t<NBits, T>, const std::array<T, SWAR<NBits, T>::Lanes>&) -> SWAR<NBits, T>;

/// Defining operator== on base SWAR types is entirely too error prone. Force a verbose invocation.
template<int NBits, typename T = uint64_t>
constexpr auto horizontalEquality(SWAR<NBits, T> left, SWAR<NBits, T> right) {
Expand Down Expand Up @@ -231,6 +289,10 @@ template<int NBits, typename T>
struct BooleanSWAR: SWAR<NBits, T> {
using Base = SWAR<NBits, T>;

template <std::size_t N>
constexpr BooleanSWAR(Literals_t<NBits, T>, const bool (&values)[N])
: Base(Literals<NBits, T>, values) { this->m_v <<= (NBits - 1); }

// Booleanness is stored in the MSBs
static constexpr auto MaskMSB =
broadcast<NBits, T>(Base(T(1) << (NBits -1)));
Expand All @@ -240,7 +302,7 @@ struct BooleanSWAR: SWAR<NBits, T> {
static constexpr auto MaskNonLSB = ~MaskLSB;
static constexpr auto MaskNonMSB = ~MaskMSB;
constexpr explicit BooleanSWAR(T v): Base(v) {}

constexpr BooleanSWAR clear(int bit) const noexcept {
constexpr auto Bit = T(1) << (NBits - 1);
return this->m_v ^ (Bit << (NBits * bit)); }
Expand All @@ -256,7 +318,7 @@ struct BooleanSWAR: SWAR<NBits, T> {
constexpr auto operator ~() const noexcept {
return BooleanSWAR(Base{Base::MostSignificantBit} ^ *this);
}

constexpr auto operator not() const noexcept {
return BooleanSWAR(MaskMSB ^ *this);
}
Expand Down Expand Up @@ -305,6 +367,12 @@ struct BooleanSWAR: SWAR<NBits, T> {
convertToBooleanSWAR(SWAR<NB, TT> arg) noexcept;
};

template <int NBits, typename T>
BooleanSWAR(
Literals_t<NBits, T>,
const bool (&values)[BooleanSWAR<NBits, T>::Lanes])
-> BooleanSWAR<NBits, T>;

template<int NBits, typename T>
constexpr BooleanSWAR<NBits, T>
convertToBooleanSWAR(SWAR<NBits, T> arg) noexcept {
Expand Down Expand Up @@ -381,7 +449,7 @@ greaterEqual(SWAR<NBits, T> left, SWAR<NBits, T> right) noexcept {
using S = swar::SWAR<NBits, T>;
const auto h = S::MostSignificantBit, x = left.value(), y = right.value(); // x=left, y= right is x < y
const auto z = (x|h) - (y&~h);
// bitwise ternary median!
// bitwise ternary median!
const auto t = h & ~median(x, ~y, z);
return ~BooleanSWAR<NBits, T>{static_cast<T>(t)}; // ~(x<y) === x >= y
}
Expand Down
75 changes: 75 additions & 0 deletions inc/zoo/swar/math.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
#pragma once
#include "SWAR.h"
#include <cstddef>
#include <cstdint>
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

SWAR.h includes this, or its equivalent, stdint.h, I see no point in things like std::uint64_t instead of just uint64_t


namespace zoo::math {
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You know, should you have used maths, I would have let it slide. But since you didn't, if you try to use the British maths I will reject the renaming on the basis that there is no need to make a change like that
;-)

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wouldn't have let 'maths' slide.


template <typename IntegerType = size_t>
constexpr static
std::enable_if_t<std::is_integral_v<IntegerType>, bool>
is_power_of_two(IntegerType x) noexcept {
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why snake_case instead of the normal camelCase and DromedaryCamelCase customary in the rest of zoo for these things?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

habit, let's move to camelCase to be consistent. will start email thread about case to ask if you have strong preference to avoid polluting PR with unrelated discussion.

return x > 0 && (x & (x - 1)) == 0;
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Indentation.
Why the X > 0 instead of the customary X or bool(X)?

}

template <typename IntegerType = size_t, IntegerType X>
constexpr static
std::enable_if_t<std::is_integral_v<IntegerType>, bool>
is_power_of_two() noexcept {
return is_power_of_two(X);
}
static_assert(is_power_of_two<int, 4>());


template <size_t N, typename IntegerType = size_t>
constexpr static
std::enable_if_t<
std::is_integral_v<IntegerType> &&
is_power_of_two<size_t, N>(), size_t>
modulo_power_of_two(IntegerType x) noexcept {
return x & (N - 1);
}

static_assert(modulo_power_of_two<4>(0) == 0);
static_assert(modulo_power_of_two<8>(9) == 1);
static_assert(modulo_power_of_two<4096>(4097) == 1);

}

// SWAR power of two

namespace zoo::swar {
template <typename S>
constexpr static auto subtract_one_unsafe(S x) noexcept {
constexpr auto Ones = S::LeastSignificantBit;
auto x_minus_1 = S{x.value() - Ones};
return x_minus_1;
}
// todo subtract K unsafe using BitmaskMaker
// todo subtract K "saturated" using BitmaskMaker

template <typename S>
constexpr static auto is_power_of_two(S x) noexcept {
constexpr auto NBits = S::NBits;
using T = typename S::type;
auto greater_than_0 = greaterEqual_MSB_off(x, S{0});
auto x_minus_1 = subtract_one_unsafe(x);
auto zero = equals(S{x_minus_1.value() & x.value()}, S{0});
return greater_than_0 & zero;
}

template <size_t N, typename S>
constexpr static
std::enable_if_t<zoo::math::is_power_of_two<size_t, N>(), S>
modulo_power_of_two(const S x) noexcept {
constexpr auto NBits = S::NBits;
using T = typename S::type;
constexpr auto N_minus_1 = N - 1;
constexpr auto N_in_lanes = zoo::meta::BitmaskMaker<T, N_minus_1, NBits>::value;
T y = x.value() & N_in_lanes;
return S{y};
}


} // namespace zoo::swar

2 changes: 1 addition & 1 deletion test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ else()
set(
ZOO_TEST_SOURCES
${CATCH2_MAIN_SOURCE} ${TYPE_ERASURE_SOURCES} ${ALGORITHM_SOURCES}
${SWAR_SOURCES}
${SWAR_SOURCES}
${MISCELLANEA_SOURCES}
)

Expand Down
Loading