Skip to content

Plural right shift #91

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 26 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
[![License](https://img.shields.io/badge/license-MIT-blue.svg)](https://opensource.org/licenses/MIT)
[![License](https://img.shields.io/badge/license-MIT-blue.svg)](https://opensource.org/licenses/MIT)
[![C++ CI](https://github.com/thecppzoo/zoo/actions/workflows/master.yaml/badge.svg)](https://github.com/thecppzoo/zoo/actions/workflows/master.yaml)

## Build suggestion
Expand Down
33 changes: 24 additions & 9 deletions inc/zoo/swar/SWAR.h
Original file line number Diff line number Diff line change
Expand Up @@ -76,14 +76,25 @@ struct SWAR {
AllOnes = ~std::make_unsigned_t<T>{0} >> PaddingBitsCount, // Also constructed in RobinHood utils: possible bug?
LeastSignificantBit = meta::BitmaskMaker<T, std::make_unsigned_t<T>{1}, NBits>::value,
MostSignificantBit = LeastSignificantBit << (NBits - 1),
LeastSignificantLaneMask =
sizeof(T) * 8 == NBits ? // needed to avoid shifting all bits
~T(0) :
~(~T(0) << NBits),
LeastSignificantLaneMask = []() {
if constexpr (NBits < sizeof(T) * 8) {
return (T(1) << NBits) - 1;
} else {
return ~T(0);
}
}(),
// Use LowerBits in favor of ~MostSignificantBit to not pollute
// "don't care" bits when non-power-of-two bit lane sizes are supported
LowerBits = MostSignificantBit - LeastSignificantBit;

static_assert(std::is_unsigned_v<T>,
"You should not use an unsigned type as the base for a SWAR type. "
"If you have used `int` or `long`, please use `uint32_t` or `uint64_t` instead. "
"This type parameter is only used to determine the total width of the SWAR register. "
"The signed-ness of the type has no *intentional* semantic meaning to what you're defining and "
"furthermore, some bitwise operations are different for signed and unsigned types."
);

SWAR() = default;
constexpr explicit SWAR(T v): m_v(v) {}
constexpr explicit operator T() const noexcept { return m_v; }
Expand All @@ -104,9 +115,13 @@ struct SWAR {
SWAR_BINARY_OPERATORS_X_LIST
#undef X

constexpr static T laneMask(int laneIndex) noexcept {
return LeastSignificantLaneMask << (NBits * laneIndex);
}

// Returns lane at position with other lanes cleared.
constexpr T isolateLane(int position) const noexcept {
return m_v & (LeastSignificantLaneMask << (NBits * position));
constexpr T isolateLane(int laneIndex) const noexcept {
return m_v & laneMask(laneIndex);
}

// Returns lane value at position, in lane 0, rest of SWAR cleared.
Expand Down Expand Up @@ -254,7 +269,7 @@ struct BooleanSWAR: SWAR<NBits, T> {
static constexpr auto MaskNonLSB = ~MaskLSB;
static constexpr auto MaskNonMSB = ~MaskMSB;
constexpr explicit BooleanSWAR(T v): Base(v) {}

constexpr BooleanSWAR clear(int bit) const noexcept {
constexpr auto Bit = T(1) << (NBits - 1);
return this->m_v ^ (Bit << (NBits * bit)); }
Expand All @@ -270,7 +285,7 @@ struct BooleanSWAR: SWAR<NBits, T> {
constexpr auto operator ~() const noexcept {
return BooleanSWAR(Base{Base::MostSignificantBit} ^ *this);
}

constexpr auto operator not() const noexcept {
return BooleanSWAR(MaskMSB ^ *this);
}
Expand Down Expand Up @@ -395,7 +410,7 @@ greaterEqual(SWAR<NBits, T> left, SWAR<NBits, T> right) noexcept {
using S = swar::SWAR<NBits, T>;
const auto h = S::MostSignificantBit, x = left.value(), y = right.value(); // x=left, y= right is x < y
const auto z = (x|h) - (y&~h);
// bitwise ternary median!
// bitwise ternary median!
const auto t = h & ~median(x, ~y, z);
return ~BooleanSWAR<NBits, T>{static_cast<T>(t)}; // ~(x<y) === x >= y
}
Expand Down
48 changes: 43 additions & 5 deletions inc/zoo/swar/associative_iteration.h
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
#ifndef ZOO_SWAR_ASSOCIATIVE_ITERATION_H
#define ZOO_SWAR_ASSOCIATIVE_ITERATION_H

#include "zoo/meta/BitmaskMaker.h"
#include "zoo/swar/SWAR.h"
#include <cstdint>
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

?


//#define ZOO_DEVELOPMENT_DEBUGGING
#ifdef ZOO_DEVELOPMENT_DEBUGGING
Expand Down Expand Up @@ -260,7 +262,8 @@ template<int NB, typename B>
constexpr auto makeLaneMaskFromMSB(SWAR<NB, B> input) {
using S = SWAR<NB, B>;
auto msb = input & S{S::MostSignificantBit};
auto msbCopiedToLSB = S{msb.value() >> (NB - 1)};
B val = msb.value() >> (NB - 1);
auto msbCopiedToLSB = S{val};
Comment on lines +265 to +266
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why this change?

return impl::makeLaneMaskFromMSB_and_LSB(msb, msbCopiedToLSB);
}

Expand Down Expand Up @@ -392,8 +395,13 @@ template<
typename CountHalver
>
constexpr auto associativeOperatorIterated_regressive(
Base base, Base neutral, IterationCount count, IterationCount forSquaring,
Operator op, unsigned log2Count, CountHalver ch
Base base,
Base neutral,
IterationCount count,
IterationCount forSquaring,
Operator op,
unsigned log2Count,
CountHalver ch
) {
auto result = neutral;
if(!log2Count) { return result; }
Expand All @@ -419,10 +427,12 @@ constexpr auto multiplication_OverflowUnsafe_SpecificBitCount(

auto halver = [](auto counts) {
auto msbCleared = counts & ~S{S::MostSignificantBit};
return S{msbCleared.value() << 1};
T res = msbCleared.value() << 1;
return S{res};
Comment on lines +430 to +431
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why so many unnecessary changes?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

was getting seemingly random type errors.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

will try and resolve

};

multiplier = S{multiplier.value() << (NB - ActualBits)};
T val = multiplier.value() << (NB - ActualBits);
multiplier = S{val};
return associativeOperatorIterated_regressive(
multiplicand, S{0}, multiplier, S{S::MostSignificantBit}, operation,
ActualBits, halver
Expand Down Expand Up @@ -483,6 +493,34 @@ constexpr auto exponentiation_OverflowUnsafe_SpecificBitCount(
);
}

/** Transforms a binary number into it's unary representation (in binary).
* E.g. 0b0011 (3) -> 0b0111
* It seems that getting the lane width exactly is overflowy */
template <typename S>
constexpr auto binaryToUnary_Plural(S input) {
constexpr auto two = S{meta::BitmaskMaker<typename S::type, 2, S::NBits>::value};
constexpr auto one = S::LeastSignificantBit;
constexpr auto max_size = S::LeastSignificantLaneMask;
typename S::type v = exponentiation_OverflowUnsafe_SpecificBitCount<S::NBits>(two, input).value() - one;
return S{v};
}

template <typename S>
constexpr auto rightShift_Plural(S input, S shifts) {
auto minimumMask = ~binaryToUnary_Plural(shifts);
auto inputMasked = input.value() & minimumMask.value();

typename S::type result = 0;
for (int i = 0; i < S::Lanes; i++) {
auto laneMask = S::laneMask(i);
auto currentShiftAmount = shifts.at(i);
auto masked = inputMasked & laneMask;
auto shifted = masked >> currentShiftAmount;
result |= shifted;
}
return S{result};
}

template<int NB, typename T>
constexpr auto multiplication_OverflowUnsafe(
SWAR<NB, T> multiplicand,
Expand Down
99 changes: 93 additions & 6 deletions test/swar/BasicOperations.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,11 @@ constexpr auto PrecisionFixtureTest = 0x89ABCDEF;
constexpr auto Doubled =
doublePrecision(SWAR<4, uint32_t>{PrecisionFixtureTest});

static_assert(makeLaneMaskFromMSB(SWAR<4, uint16_t>{
0b1000'0000'1000'0000}).value() ==
0b1111'0000'1111'0000
);

static_assert(0x090B0D0F == Doubled.even.value());
static_assert(0x080A0C0E == Doubled.odd.value());
static_assert(PrecisionFixtureTest == halvePrecision(Doubled.even, Doubled.odd).value());
Expand Down Expand Up @@ -357,23 +362,23 @@ TEST_CASE(
const auto left = S2_16{0}.blitElement(1, i);
const auto right = S2_16{S2_16::AllOnes}.blitElement(1, i-1);
const auto test = S2_16{0}.blitElement(1, 2);
CHECK(test.value() == greaterEqual<2, u16>(left, right).value());
CHECK(test.value() == greaterEqual<2, u16>(left, right).value());
}
}
SECTION("single") {
for (uint32_t i = 1; i < 15; i++) {
const auto large = S4_32{0}.blitElement(1, i+1);
const auto small = S4_32{S4_32::AllOnes}.blitElement(1, i-1);
const auto test = S4_32{0}.blitElement(1, 8);
CHECK(test.value() == greaterEqual<4, u32>(large, small).value());
CHECK(test.value() == greaterEqual<4, u32>(large, small).value());
}
}
SECTION("allLanes") {
for (uint32_t i = 1; i < 15; i++) {
const auto small = S4_32(S4_32::LeastSignificantBit * (i-1));
const auto large = S4_32(S4_32::LeastSignificantBit * (i+1));
const auto test = S4_32(S4_32::LeastSignificantBit * 8);
CHECK(test.value() == greaterEqual<4, u32>(large, small).value());
CHECK(test.value() == greaterEqual<4, u32>(large, small).value());
}
}
}
Expand Down Expand Up @@ -425,7 +430,7 @@ TEST_CASE(
"BooleanSWAR MSBtoLaneMask",
"[swar]"
) {
// BooleanSWAR as a mask:
// BooleanSWAR as a mask:
auto bswar =BooleanSWAR<4, u32>(0x0808'0000);
auto mask = S4_32(0x0F0F'0000);
CHECK(bswar.MSBtoLaneMask().value() == mask.value());
Expand All @@ -452,6 +457,88 @@ TEST_CASE(
CHECK(SWAR<4, u16>(0x0400).value() == saturatingUnsignedAddition(SWAR<4, u16>(0x0100), SWAR<4, u16>(0x0300)).value());
CHECK(SWAR<4, u16>(0x0B00).value() == saturatingUnsignedAddition(SWAR<4, u16>(0x0800), SWAR<4, u16>(0x0300)).value());
CHECK(SWAR<4, u16>(0x0F00).value() == saturatingUnsignedAddition(SWAR<4, u16>(0x0800), SWAR<4, u16>(0x0700)).value());
CHECK(SWAR<4, u16>(0x0F00).value() == saturatingUnsignedAddition(SWAR<4, u16>(0x0800), SWAR<4, u16>(0x0800)).value());
CHECK(S4_32(0x0F0C'F000).value() == saturatingUnsignedAddition(S4_32(0x0804'F000), S4_32(0x0808'F000)).value());
CHECK(SWAR<4, u16>(0x0F00).value() == saturatingUnsignedAddition(SWAR<4, u16>(0x0800), SWAR<4, u16>(0x0800)).value());
CHECK(S4_32(0x0F0C'F000).value() == saturatingUnsignedAddition(S4_32(0x0804'F000), S4_32(0x0808'F000)).value());
}

template <int NB, typename IntType, IntType Input, IntType Expected>
constexpr static bool binaryToUnary_Plural_Test() {
return binaryToUnary_Plural(SWAR<NB, IntType>{Input}).value() == Expected;
};

static_assert(binaryToUnary_Plural_Test<4, uint16_t,
0b0001'0010'0011'0011,
0b0001'0011'0111'0111
>());

static_assert(binaryToUnary_Plural_Test<4, uint16_t,
0b0000'0001'0010'0011,
0b0000'0001'0011'0111
>());

static_assert(binaryToUnary_Plural_Test<4, uint16_t,
0b0100'0001'0010'0011,
0b1111'0001'0011'0111
>());

static_assert(binaryToUnary_Plural_Test<4, uint16_t,
0b0000'0000'0000'0001,
0b0000'0000'0000'0001
>());

static_assert(binaryToUnary_Plural_Test<8, uint16_t,
0b000000111'00000101, // 7 ' 5
0b001111111'00011111 // seven ones, fives ones!
>());

template <int NB, typename IntType, IntType Input, IntType Count, IntType Expected>
constexpr static bool rightShift_Plural_Test() {
using S = SWAR<NB, IntType>;
return rightShift_Plural(S{Input}, S{Count}).value() == Expected;
};

static_assert(rightShift_Plural_Test<4, uint16_t,
0b0111'0111'0111'0111, // input
0b0010'0010'0010'0010, // 2 ' 2 ' 2 ' 2
0b0001'0001'0001'0001 // notice, input, shifted over two to right!
>());

static_assert(rightShift_Plural_Test<4, uint16_t,
0b0000'0000'1111'0001,
0b0000'0000'0000'0001,
0b0000'0000'1111'0000
>());

static_assert(rightShift_Plural_Test<4, uint16_t,
0b0000'1000'1000'1000,
0b0100'0011'0010'0001,
0b0000'0001'0010'0100
>());

static_assert(rightShift_Plural_Test<4, uint16_t,
0b1111'1111'1111'1111,
0b0001'0001'0001'0001,
0b0111'0111'0111'0111
>());

static_assert(rightShift_Plural_Test<4, uint16_t,
0b0000'0000'1111'0001,
0b0000'0000'0000'0000,
0b0000'0000'1111'0001
>());

static_assert(rightShift_Plural_Test<4, uint16_t,
0b0000'0000'1111'0001,
0b0000'0000'0001'0001,
0b0000'0000'0111'0000
>());

using S = SWAR<4, uint16_t>;
static_assert(S::LeastSignificantLaneMask == 0b0000'0000'0000'1111);
static_assert(S::laneMask(0) == 0b0000'0000'0000'1111);
static_assert(S::laneMask(1) == 0b0000'0000'1111'0000);
static_assert(S::laneMask(2) == 0b0000'1111'0000'0000);
static_assert(S::laneMask(3) == 0b1111'0000'0000'0000);
static_assert(S{S::laneMask(3)}.at(3) == 0b0000'0000'0000'1111);