Skip to content

Commit 34e2045

Browse files
authored
Mark more constexpr functions as device-available (#17545)
Contributes to #7795. Also contributes to rapidsai/build-planning#76. Authors: - Vyas Ramasubramani (https://github.com/vyasr) Approvers: - Nghia Truong (https://github.com/ttnghia) - Yunsong Wang (https://github.com/PointKernel) - Bradley Dice (https://github.com/bdice) - David Wendt (https://github.com/davidwendt) URL: #17545
1 parent 1a67646 commit 34e2045

32 files changed

+302
-199
lines changed

ci/build_docs.sh

+6
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,10 @@ rapids-mamba-retry install \
3535

3636
export RAPIDS_DOCS_DIR="$(mktemp -d)"
3737

38+
EXITCODE=0
39+
trap "EXITCODE=1" ERR
40+
set +e
41+
3842
rapids-logger "Build CPP docs"
3943
pushd cpp/doxygen
4044
aws s3 cp s3://rapidsai-docs/librmm/html/${RAPIDS_VERSION_MAJOR_MINOR}/rmm.tag . || echo "Failed to download rmm Doxygen tag"
@@ -58,3 +62,5 @@ mv build/dirhtml/* "${RAPIDS_DOCS_DIR}/dask-cudf/html"
5862
popd
5963

6064
RAPIDS_VERSION_NUMBER="${RAPIDS_VERSION_MAJOR_MINOR}" rapids-upload-docs
65+
66+
exit ${EXITCODE}

cpp/include/cudf/column/column_device_view.cuh

+10-8
Original file line numberDiff line numberDiff line change
@@ -33,11 +33,13 @@
3333
#include <rmm/cuda_stream_view.hpp>
3434

3535
#include <cuda/std/optional>
36+
#include <cuda/std/type_traits>
3637
#include <thrust/iterator/counting_iterator.h>
3738
#include <thrust/iterator/transform_iterator.h>
3839
#include <thrust/pair.h>
3940

4041
#include <algorithm>
42+
#include <type_traits>
4143

4244
/**
4345
* @file column_device_view.cuh
@@ -56,8 +58,8 @@ namespace CUDF_EXPORT cudf {
5658
*
5759
*/
5860
struct nullate {
59-
struct YES : std::bool_constant<true> {};
60-
struct NO : std::bool_constant<false> {};
61+
struct YES : cuda::std::bool_constant<true> {};
62+
struct NO : cuda::std::bool_constant<false> {};
6163
/**
6264
* @brief `nullate::DYNAMIC` defers the determination of nullability to run time rather than
6365
* compile time. The calling code is responsible for specifying whether or not nulls are
@@ -80,7 +82,7 @@ struct nullate {
8082
* @return `true` if nulls are expected in the operation in which this object is applied,
8183
* otherwise false
8284
*/
83-
constexpr operator bool() const noexcept { return value; }
85+
CUDF_HOST_DEVICE constexpr operator bool() const noexcept { return value; }
8486
bool value; ///< True if nulls are expected
8587
};
8688
};
@@ -319,14 +321,14 @@ class alignas(16) column_device_view_base {
319321
}
320322

321323
template <typename C, typename T, typename = void>
322-
struct has_element_accessor_impl : std::false_type {};
324+
struct has_element_accessor_impl : cuda::std::false_type {};
323325

324326
template <typename C, typename T>
325327
struct has_element_accessor_impl<
326328
C,
327329
T,
328-
void_t<decltype(std::declval<C>().template element<T>(std::declval<size_type>()))>>
329-
: std::true_type {};
330+
void_t<decltype(cuda::std::declval<C>().template element<T>(cuda::std::declval<size_type>()))>>
331+
: cuda::std::true_type {};
330332
};
331333
// @cond
332334
// Forward declaration
@@ -534,7 +536,7 @@ class alignas(16) column_device_view : public detail::column_device_view_base {
534536
* @return `true` if `column_device_view::element<T>()` has a valid overload, `false` otherwise
535537
*/
536538
template <typename T>
537-
static constexpr bool has_element_accessor()
539+
CUDF_HOST_DEVICE static constexpr bool has_element_accessor()
538540
{
539541
return has_element_accessor_impl<column_device_view, T>::value;
540542
}
@@ -1044,7 +1046,7 @@ class alignas(16) mutable_column_device_view : public detail::column_device_view
10441046
* @return `true` if `mutable_column_device_view::element<T>()` has a valid overload, `false`
10451047
*/
10461048
template <typename T>
1047-
static constexpr bool has_element_accessor()
1049+
CUDF_HOST_DEVICE static constexpr bool has_element_accessor()
10481050
{
10491051
return has_element_accessor_impl<mutable_column_device_view, T>::value;
10501052
}

cpp/include/cudf/detail/aggregation/aggregation.cuh

+1-1
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@
3636
namespace cudf {
3737
namespace detail {
3838
template <typename T>
39-
constexpr bool is_product_supported()
39+
CUDF_HOST_DEVICE constexpr bool is_product_supported()
4040
{
4141
return is_numeric<T>();
4242
}

cpp/include/cudf/detail/utilities/cuda.cuh

+6-5
Original file line numberDiff line numberDiff line change
@@ -74,9 +74,10 @@ class grid_1d {
7474
* @param num_threads_per_block The number of threads per block
7575
* @return thread_index_type The global thread index
7676
*/
77-
static constexpr thread_index_type global_thread_id(thread_index_type thread_id,
78-
thread_index_type block_id,
79-
thread_index_type num_threads_per_block)
77+
__device__ static constexpr thread_index_type global_thread_id(
78+
thread_index_type thread_id,
79+
thread_index_type block_id,
80+
thread_index_type num_threads_per_block)
8081
{
8182
return thread_id + block_id * num_threads_per_block;
8283
}
@@ -114,8 +115,8 @@ class grid_1d {
114115
* @param num_threads_per_block The number of threads per block
115116
* @return thread_index_type The global thread index
116117
*/
117-
static constexpr thread_index_type grid_stride(thread_index_type num_threads_per_block,
118-
thread_index_type num_blocks_per_grid)
118+
__device__ static constexpr thread_index_type grid_stride(thread_index_type num_threads_per_block,
119+
thread_index_type num_blocks_per_grid)
119120
{
120121
return num_threads_per_block * num_blocks_per_grid;
121122
}

cpp/include/cudf/detail/utilities/device_operators.cuh

+16-14
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@
2929
#include <cudf/utilities/error.hpp>
3030
#include <cudf/utilities/traits.hpp>
3131

32+
#include <cuda/std/functional>
33+
3234
#include <type_traits>
3335

3436
namespace cudf {
@@ -42,7 +44,7 @@ template <typename LHS,
4244
std::enable_if_t<cudf::is_relationally_comparable<LHS, RHS>()>* = nullptr>
4345
CUDF_HOST_DEVICE inline auto min(LHS const& lhs, RHS const& rhs)
4446
{
45-
return std::min(lhs, rhs);
47+
return cuda::std::min(lhs, rhs);
4648
}
4749

4850
/**
@@ -53,7 +55,7 @@ template <typename LHS,
5355
std::enable_if_t<cudf::is_relationally_comparable<LHS, RHS>()>* = nullptr>
5456
CUDF_HOST_DEVICE inline auto max(LHS const& lhs, RHS const& rhs)
5557
{
56-
return std::max(lhs, rhs);
58+
return cuda::std::max(lhs, rhs);
5759
}
5860
} // namespace detail
5961

@@ -68,20 +70,20 @@ struct DeviceSum {
6870
}
6971

7072
template <typename T, std::enable_if_t<cudf::is_timestamp<T>()>* = nullptr>
71-
static constexpr T identity()
73+
CUDF_HOST_DEVICE static constexpr T identity()
7274
{
7375
return T{typename T::duration{0}};
7476
}
7577

7678
template <typename T,
7779
std::enable_if_t<!cudf::is_timestamp<T>() && !cudf::is_fixed_point<T>()>* = nullptr>
78-
static constexpr T identity()
80+
CUDF_HOST_DEVICE static constexpr T identity()
7981
{
8082
return T{0};
8183
}
8284

8385
template <typename T, std::enable_if_t<cudf::is_fixed_point<T>()>* = nullptr>
84-
static constexpr T identity()
86+
CUDF_HOST_DEVICE static constexpr T identity()
8587
{
8688
#ifndef __CUDA_ARCH__
8789
CUDF_FAIL("fixed_point does not yet support device operator identity");
@@ -109,7 +111,7 @@ struct DeviceCount {
109111
}
110112

111113
template <typename T>
112-
static constexpr T identity()
114+
CUDF_HOST_DEVICE static constexpr T identity()
113115
{
114116
return T{};
115117
}
@@ -129,7 +131,7 @@ struct DeviceMin {
129131
template <typename T,
130132
std::enable_if_t<!std::is_same_v<T, cudf::string_view> && !cudf::is_dictionary<T>() &&
131133
!cudf::is_fixed_point<T>()>* = nullptr>
132-
static constexpr T identity()
134+
CUDF_HOST_DEVICE static constexpr T identity()
133135
{
134136
// chrono types do not have std::numeric_limits specializations and should use T::max()
135137
// https://eel.is/c++draft/numeric.limits.general#6
@@ -143,7 +145,7 @@ struct DeviceMin {
143145
}
144146

145147
template <typename T, std::enable_if_t<cudf::is_fixed_point<T>()>* = nullptr>
146-
static constexpr T identity()
148+
CUDF_HOST_DEVICE static constexpr T identity()
147149
{
148150
#ifndef __CUDA_ARCH__
149151
CUDF_FAIL("fixed_point does not yet support DeviceMin identity");
@@ -161,7 +163,7 @@ struct DeviceMin {
161163
}
162164

163165
template <typename T, std::enable_if_t<cudf::is_dictionary<T>()>* = nullptr>
164-
static constexpr T identity()
166+
CUDF_HOST_DEVICE static constexpr T identity()
165167
{
166168
return static_cast<T>(T::max_value());
167169
}
@@ -181,7 +183,7 @@ struct DeviceMax {
181183
template <typename T,
182184
std::enable_if_t<!std::is_same_v<T, cudf::string_view> && !cudf::is_dictionary<T>() &&
183185
!cudf::is_fixed_point<T>()>* = nullptr>
184-
static constexpr T identity()
186+
CUDF_HOST_DEVICE static constexpr T identity()
185187
{
186188
// chrono types do not have std::numeric_limits specializations and should use T::min()
187189
// https://eel.is/c++draft/numeric.limits.general#6
@@ -195,7 +197,7 @@ struct DeviceMax {
195197
}
196198

197199
template <typename T, std::enable_if_t<cudf::is_fixed_point<T>()>* = nullptr>
198-
static constexpr T identity()
200+
CUDF_HOST_DEVICE static constexpr T identity()
199201
{
200202
#ifndef __CUDA_ARCH__
201203
CUDF_FAIL("fixed_point does not yet support DeviceMax identity");
@@ -212,7 +214,7 @@ struct DeviceMax {
212214
}
213215

214216
template <typename T, std::enable_if_t<cudf::is_dictionary<T>()>* = nullptr>
215-
static constexpr T identity()
217+
CUDF_HOST_DEVICE static constexpr T identity()
216218
{
217219
return static_cast<T>(T::lowest_value());
218220
}
@@ -229,13 +231,13 @@ struct DeviceProduct {
229231
}
230232

231233
template <typename T, std::enable_if_t<!cudf::is_fixed_point<T>()>* = nullptr>
232-
static constexpr T identity()
234+
CUDF_HOST_DEVICE static constexpr T identity()
233235
{
234236
return T{1};
235237
}
236238

237239
template <typename T, std::enable_if_t<cudf::is_fixed_point<T>()>* = nullptr>
238-
static constexpr T identity()
240+
CUDF_HOST_DEVICE static constexpr T identity()
239241
{
240242
#ifndef __CUDA_ARCH__
241243
CUDF_FAIL("fixed_point does not yet support DeviceProduct identity");

cpp/include/cudf/detail/utilities/integer_utils.hpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ constexpr S round_down_safe(S number_to_round, S modulus) noexcept
8686
* `modulus` is positive and does not check for overflow.
8787
*/
8888
template <typename S>
89-
constexpr S round_up_unsafe(S number_to_round, S modulus) noexcept
89+
CUDF_HOST_DEVICE constexpr S round_up_unsafe(S number_to_round, S modulus) noexcept
9090
{
9191
auto remainder = number_to_round % modulus;
9292
if (remainder == 0) { return number_to_round; }
@@ -187,7 +187,7 @@ constexpr bool is_a_power_of_two(I val) noexcept
187187
* @return Absolute value if value type is signed.
188188
*/
189189
template <typename T>
190-
constexpr auto absolute_value(T value) -> T
190+
CUDF_HOST_DEVICE constexpr auto absolute_value(T value) -> T
191191
{
192192
if constexpr (cuda::std::is_signed<T>()) return numeric::detail::abs(value);
193193
return value;

cpp/include/cudf/fixed_point/detail/floating_conversion.hpp

+4-3
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
#include <cuda/std/cmath>
2323
#include <cuda/std/limits>
2424
#include <cuda/std/type_traits>
25+
#include <cuda/std/utility>
2526

2627
#include <cstring>
2728

@@ -183,7 +184,7 @@ struct floating_converter {
183184
* @param integer_rep The bit-casted floating value to extract the exponent from
184185
* @return The stored base-2 exponent and significand, shifted for denormals
185186
*/
186-
CUDF_HOST_DEVICE inline static std::pair<IntegralType, int> get_significand_and_pow2(
187+
CUDF_HOST_DEVICE inline static cuda::std::pair<IntegralType, int> get_significand_and_pow2(
187188
IntegralType integer_rep)
188189
{
189190
// Extract the significand
@@ -1008,7 +1009,7 @@ CUDF_HOST_DEVICE inline auto shift_to_binary_pospow(DecimalRep decimal_rep, int
10081009
}
10091010

10101011
// Our shifting_rep is now the integer mantissa, return it and the powers of 2
1011-
return std::pair{shifting_rep, pow2};
1012+
return cuda::std::pair{shifting_rep, pow2};
10121013
}
10131014

10141015
/**
@@ -1075,7 +1076,7 @@ CUDF_HOST_DEVICE inline auto shift_to_binary_negpow(DecimalRep decimal_rep, int
10751076
}
10761077

10771078
// Our shifting_rep is now the integer mantissa, return it and the powers of 2
1078-
return std::pair{shifting_rep, pow2};
1079+
return cuda::std::pair{shifting_rep, pow2};
10791080
}
10801081

10811082
/**

cpp/include/cudf/hashing/detail/hash_functions.cuh

+3-2
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,8 @@
1818

1919
#include <cudf/utilities/traits.hpp>
2020

21-
#include <limits>
21+
#include <cuda/std/cmath>
22+
#include <cuda/std/limits>
2223

2324
namespace cudf::hashing::detail {
2425

@@ -29,7 +30,7 @@ template <typename T>
2930
T __device__ inline normalize_nans(T const& key)
3031
{
3132
if constexpr (cudf::is_floating_point<T>()) {
32-
if (std::isnan(key)) { return std::numeric_limits<T>::quiet_NaN(); }
33+
if (cuda::std::isnan(key)) { return cuda::std::numeric_limits<T>::quiet_NaN(); }
3334
}
3435
return key;
3536
}

cpp/include/cudf/hashing/detail/hashing.hpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ std::unique_ptr<column> xxhash_64(table_view const& input,
8282
* @param rhs The second hash value
8383
* @return Combined hash value
8484
*/
85-
constexpr uint32_t hash_combine(uint32_t lhs, uint32_t rhs)
85+
CUDF_HOST_DEVICE constexpr uint32_t hash_combine(uint32_t lhs, uint32_t rhs)
8686
{
8787
return lhs ^ (rhs + 0x9e37'79b9 + (lhs << 6) + (lhs >> 2));
8888
}

0 commit comments

Comments
 (0)