FEniCS · jorgensd · May 20, 2025 · May 7, 2025 · May 7, 2025 · May 7, 2025
diff --git a/cpp/dolfinx/common/sort.h b/cpp/dolfinx/common/sort.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2021 Igor Baratta
+// Copyright (C) 2021-2025 Igor Baratta and Paul T. Kühner
 //
 // This file is part of DOLFINx (https://www.fenicsproject.org)
 //
@@ -7,11 +7,13 @@
 #pragma once
 
 #include <algorithm>
+#include <bit>
 #include <cassert>
 #include <concepts>
 #include <cstdint>
 #include <functional>
 #include <iterator>
+#include <limits>
 #include <numeric>
 #include <span>
 #include <type_traits>
@@ -20,6 +22,34 @@
 
 namespace dolfinx
 {
+
+struct __unsigned_projection
+{
+  // Transforms the projected value to an unsigned int (if signed), while
+  // maintaining relative order by
+  //    x ↦ x + |std::numeric_limits<I>::min()|
+  template <std::signed_integral T>
+  constexpr std::make_unsigned_t<T> operator()(T e) const noexcept
+  {
+    using uT = std::make_unsigned_t<T>;
+
+    // Assert binary structure for bit shift
+    static_assert(static_cast<uT>(std::numeric_limits<T>::min())
+                      + static_cast<uT>(std::numeric_limits<T>::max())
+                  == static_cast<uT>(T(-1)));
+    static_assert(std::numeric_limits<uT>::digits
+                  == std::numeric_limits<T>::digits + 1);
+    static_assert(std::bit_cast<uT>(std::numeric_limits<T>::min())
+                  == (uT(1) << (sizeof(T) * 8 - 1)));
+
+    return std::bit_cast<uT>(std::forward<T>(e))
+           ^ (uT(1) << (sizeof(T) * 8 - 1));
+  }
+};
+
+/// Projection from signed to signed int
+inline constexpr __unsigned_projection unsigned_projection{};
+
 struct __radix_sort
 {
   /// @brief Sort a range with radix sorting algorithm. The bucket size
@@ -46,10 +76,11 @@ struct __radix_sort
   /// @tparam BITS The number of bits to sort at a time.
   /// @param[in, out] range The range to sort.
   /// @param[in] P Element projection.
-  template <
-      std::ranges::random_access_range R, typename P = std::identity,
-      std::remove_cvref_t<std::invoke_result_t<P, std::iter_value_t<R>>> BITS
-      = 8>
+  template <std::ranges::random_access_range R, typename P = std::identity,
+            std::make_unsigned_t<std::remove_cvref_t<
+                std::invoke_result_t<P, std::iter_value_t<R>>>>
+                BITS
+            = 8>
     requires std::integral<decltype(BITS)>
   constexpr void operator()(R&& range, P proj = {}) const
   {
@@ -58,19 +89,36 @@ struct __radix_sort
 
     // index type (if no projection is provided it holds I == T)
     using I = std::remove_cvref_t<std::invoke_result_t<P, T>>;
+    using uI = std::make_unsigned_t<I>;
+
+    if constexpr (!std::is_same_v<uI, I>)
+    {
+      __radix_sort()(std::forward<R>(range), [&](const T& e) -> uI
+                     { return unsigned_projection(proj(e)); });
+      return;
+    }
 
     if (range.size() <= 1)
       return;
 
-    T max_value = proj(*std::ranges::max_element(range, std::less{}, proj));
+    uI max_value = proj(*std::ranges::max_element(range, std::less{}, proj));
 
     // Sort N bits at a time
-    constexpr I bucket_size = 1 << BITS;
-    T mask = (T(1) << BITS) - 1;
+    constexpr uI bucket_size = 1 << BITS;
+    uI mask = (uI(1) << BITS) - 1;
 
     // Compute number of iterations, most significant digit (N bits) of
     // maxvalue
     I its = 0;
+
+    // optimize for case where all first bits are set - then order will not
+    // depend on it
+    bool all_first_bit = std::ranges::all_of(
+        range, [&](const auto& e)
+        { return proj(e) & (uI(1) << (sizeof(uI) * 8 - 1)); });
+    if (all_first_bit)
+      max_value = max_value & ~(uI(1) << (sizeof(uI) * 8 - 1));
+
     while (max_value)
     {
       max_value >>= BITS;
@@ -81,7 +129,7 @@ struct __radix_sort
     std::array<I, bucket_size> counter;
     std::array<I, bucket_size + 1> offset;
 
-    I mask_offset = 0;
+    uI mask_offset = 0;
     std::vector<T> buffer(range.size());
     std::span<T> current_perm = range;
     std::span<T> next_perm = buffer;
@@ -100,8 +148,8 @@ struct __radix_sort
                        std::next(offset.begin()));
       for (const auto& c : current_perm)
       {
-        I bucket = (proj(c) & mask) >> mask_offset;
-        I new_pos = offset[bucket + 1] - counter[bucket];
+        uI bucket = (proj(c) & mask) >> mask_offset;
+        uI new_pos = offset[bucket + 1] - counter[bucket];
         next_perm[new_pos] = c;
         counter[bucket]--;
       }

diff --git a/cpp/test/common/sort.cpp b/cpp/test/common/sort.cpp
@@ -1,29 +1,35 @@
-// Copyright (C) 2021 Igor A. Baratta
+// Copyright (C) 2021-2025 Igor A. Baratta and Paul T. Kühner
 //
 // This file is part of DOLFINx (https://www.fenicsproject.org)
 //
 // SPDX-License-Identifier:    LGPL-3.0-or-later
 
 #include <algorithm>
 #include <array>
+#include <bitset>
 #include <catch2/catch_template_test_macros.hpp>
 #include <catch2/catch_test_macros.hpp>
 #include <catch2/generators/catch_generators.hpp>
+#include <cstdint>
 #include <dolfinx/common/sort.h>
 #include <functional>
+#include <iostream>
+#include <limits>
 #include <numeric>
 #include <random>
+#include <type_traits>
 #include <vector>
 
-TEMPLATE_TEST_CASE("Test radix sort", "[vector][template]", std::int32_t,
-                   std::int64_t)
+TEMPLATE_TEST_CASE("Test radix sort", "[vector][template]", std::int16_t,
+                   std::int32_t, std::int64_t, std::uint16_t, std::uint32_t,
+                   std::uint64_t)
 {
   auto vec_size = GENERATE(100, 1000, 10000);
   std::vector<TestType> vec;
   vec.reserve(vec_size);
 
   // Generate a vector of ints with a Uniform Int distribution
-  std::uniform_int_distribution<TestType> distribution(0, 10000);
+  std::uniform_int_distribution<TestType> distribution(-10000, 10000);
   std::mt19937 engine;
   auto generator = std::bind(distribution, engine);
   std::generate_n(std::back_inserter(vec), vec_size, generator);
@@ -35,12 +41,31 @@ TEMPLATE_TEST_CASE("Test radix sort", "[vector][template]", std::int32_t,
   REQUIRE(std::ranges::is_sorted(vec));
 }
 
+TEMPLATE_TEST_CASE("Test radix sort (limits)", "[vector][template]",
+                   std::int16_t, std::int32_t, std::int64_t, std::uint16_t,
+                   std::uint32_t, std::uint64_t)
+{
+  std::vector<TestType> vec{0, std::numeric_limits<TestType>::max(),
+                            std::numeric_limits<TestType>::min()};
+  dolfinx::radix_sort(vec);
+  REQUIRE(std::ranges::is_sorted(vec));
+  REQUIRE(std::ranges::equal(
+      vec, std::vector<TestType>{std::numeric_limits<TestType>::min(), 0,
+                                 std::numeric_limits<TestType>::max()}));
+}
+
 TEMPLATE_TEST_CASE("Test radix sort (projection)", "[radix]", std::int16_t,
-                   std::int32_t, std::int64_t)
+                   std::int32_t, std::int64_t, std::uint16_t, std::uint32_t,
+                   std::uint64_t)
 {
   // Check projection into same type array
   {
     std::vector<TestType> vec = {3, 6, 2, 1, 5, 4, 0};
+    if constexpr (std::is_signed_v<TestType>)
+    {
+      vec[1] *= -1;
+      vec[4] *= -1;
+    }
     std::vector<TestType> indices(vec.size());
     std::iota(indices.begin(), indices.end(), 0);
 
@@ -53,6 +78,11 @@ TEMPLATE_TEST_CASE("Test radix sort (projection)", "[radix]", std::int16_t,
   {
     std::vector<std::array<TestType, 1>> vec_array{{3}, {6}, {2}, {1},
                                                    {5}, {4}, {0}};
+    if constexpr (std::is_signed_v<TestType>)
+    {
+      vec_array[1][0] *= -1;
+      vec_array[4][0] *= -1;
+    }
     std::vector<TestType> indices(vec_array.size());
     std::iota(indices.begin(), indices.end(), 0);