Skip to content

Commit

Permalink
assorted fixes (#1473)
Browse files Browse the repository at this point in the history
* remove constraints on `stdexec::connect` per C++ 26 std::execution.

* remove uses of deprecated CUB interfaces in `nvexec/`.

* improve type error reporting of `stopped_as_optional`

* replace erroneous uses of `tag_invocable<Tag...>` with `__callable<Tag...>`

* clean up the definitions of `set_value`, `set_error`, and `set_stopped`.

* squash new warnings from the nvhpc compiler
  • Loading branch information
ericniebler authored Feb 7, 2025
1 parent 9514e7b commit 3302dda
Show file tree
Hide file tree
Showing 29 changed files with 260 additions and 136 deletions.
22 changes: 14 additions & 8 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -271,22 +271,28 @@ target_compile_definitions(
stdexec_executable_flags INTERFACE
$<$<NOT:$<AND:$<CXX_COMPILER_ID:NVHPC>,$<COMPILE_LANGUAGE:CXX>>>:STDEXEC_ENABLE_EXTRA_TYPE_CHECKING>)

# Support target for examples and tests
add_library(nvexec_executable_flags INTERFACE)

target_compile_options(nvexec_executable_flags INTERFACE
$<$<AND:$<CXX_COMPILER_ID:NVHPC>,$<COMPILE_LANGUAGE:CXX>>:-gpu=nomanaged>)
target_link_options(nvexec_executable_flags INTERFACE
$<$<AND:$<CXX_COMPILER_ID:NVHPC>,$<COMPILE_LANGUAGE:CXX>>:-gpu=nomanaged>)

# Set up nvexec library
option(STDEXEC_ENABLE_CUDA "Enable CUDA targets for non-nvc++ compilers" OFF)

if(CMAKE_CXX_COMPILER_ID STREQUAL "NVHPC")
set(STDEXEC_ENABLE_CUDA ON)
# Unset these if using nvc++
disable_compiler(LANG CUDA)

set(_nvhpc_seperate_memory_flags "-gpu=nomanaged")
# if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 24.5.0)
# set(_nvhpc_seperate_memory_flags "-gpu=mem:separate")
# endif()
endif()

# Support target for examples and tests
add_library(nvexec_executable_flags INTERFACE)

target_compile_options(nvexec_executable_flags INTERFACE
$<$<AND:$<CXX_COMPILER_ID:NVHPC>,$<COMPILE_LANGUAGE:CXX>>:${_nvhpc_seperate_memory_flags}>)
target_link_options(nvexec_executable_flags INTERFACE
$<$<AND:$<CXX_COMPILER_ID:NVHPC>,$<COMPILE_LANGUAGE:CXX>>:${_nvhpc_seperate_memory_flags}>)

if(STDEXEC_ENABLE_CUDA)
file(GLOB_RECURSE nvexec_headers CONFIGURE_DEPENDS include/nvexec/*.cuh)
add_library(nvexec INTERFACE)
Expand Down
5 changes: 5 additions & 0 deletions examples/nvexec/maxwell/cuda.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@
#include "common.cuh"
#include "nvexec/detail/throw_on_cuda_error.cuh"

STDEXEC_PRAGMA_PUSH()
STDEXEC_PRAGMA_IGNORE_EDG(cuda_compile)

template <int BlockThreads, class Action>
__launch_bounds__(BlockThreads) __global__ void kernel(std::size_t cells, Action action) {
std::size_t cell_id = threadIdx.x + blockIdx.x * BlockThreads;
Expand Down Expand Up @@ -62,3 +65,5 @@ void run_cuda(

cudaStreamDestroy(stream);
}

STDEXEC_PRAGMA_POP()
18 changes: 6 additions & 12 deletions include/exec/any_sender_of.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1150,27 +1150,21 @@ namespace exec {
}

template <class... _As>
requires stdexec::tag_invocable<stdexec::set_value_t, __receiver_base, _As...>
requires stdexec::__callable<stdexec::set_value_t, __receiver_base, _As...>
void set_value(_As&&... __as) noexcept {
stdexec::tag_invoke(
stdexec::set_value,
static_cast<__receiver_base&&>(__receiver_),
static_cast<_As&&>(__as)...);
stdexec::set_value(static_cast<__receiver_base&&>(__receiver_), static_cast<_As&&>(__as)...);
}

template <class _Error>
requires stdexec::tag_invocable<stdexec::set_error_t, __receiver_base, _Error>
requires stdexec::__callable<stdexec::set_error_t, __receiver_base, _Error>
void set_error(_Error&& __err) noexcept {
stdexec::tag_invoke(
stdexec::set_error,
static_cast<__receiver_base&&>(__receiver_),
static_cast<_Error&&>(__err));
stdexec::set_error(static_cast<__receiver_base&&>(__receiver_), static_cast<_Error&&>(__err));
}

void set_stopped() noexcept
requires stdexec::tag_invocable<stdexec::set_stopped_t, __receiver_base>
requires stdexec::__callable<stdexec::set_stopped_t, __receiver_base>
{
stdexec::tag_invoke(stdexec::set_stopped, static_cast<__receiver_base&&>(__receiver_));
stdexec::set_stopped(static_cast<__receiver_base&&>(__receiver_));
}

auto get_env() const noexcept -> stdexec::env_of_t<__receiver_base> {
Expand Down
33 changes: 15 additions & 18 deletions include/exec/materialize.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ namespace exec {

template <__decays_to<_Sender> _Sndr>
__t(_Sndr&& __sender)
: __sender_{static_cast<_Sndr&&>(__sender)} {
: __sndr_{static_cast<_Sndr&&>(__sender)} {
}

template <__decays_to<__t> _Self, class _Receiver>
Expand All @@ -83,7 +83,7 @@ namespace exec {
noexcept(__nothrow_connectable<__copy_cvref_t<_Self, _Sender>, __receiver_t<_Receiver>>)
-> connect_result_t<__copy_cvref_t<_Self, _Sender>, __receiver_t<_Receiver>> {
return stdexec::connect(
static_cast<_Self&&>(__self).__sender_,
static_cast<_Self&&>(__self).__sndr_,
__receiver_t<_Receiver>{static_cast<_Receiver&&>(__receiver)});
}

Expand All @@ -93,23 +93,23 @@ namespace exec {
template <class _Err>
using __materialize_error = completion_signatures<set_value_t(set_error_t, _Err)>;

template <class... _Env>
using __completion_signatures_for_t = //
template <class _Self, class... _Env>
using __completions_t = //
__transform_completion_signatures<
__completion_signatures_of_t<_Sender, _Env...>,
__completion_signatures_of_t<__copy_cvref_t<_Self, _Sender>, _Env...>,
__materialize_value,
__materialize_error,
completion_signatures<set_value_t(set_stopped_t)>,
__mconcat<__qq<completion_signatures>>::__f>;

template <__decays_to<__t> _Self, class... _Env>
static auto get_completion_signatures(_Self&&, _Env&&...) //
-> __completion_signatures_for_t<_Env...> {
-> __completions_t<_Self, _Env...> {
return {};
}

private:
_Sender __sender_;
_Sender __sndr_;
};
};

Expand Down Expand Up @@ -145,7 +145,7 @@ namespace exec {
}

template <__completion_tag _Tag, class... _Args>
requires tag_invocable<_Tag, _Receiver&&, _Args...>
requires __callable<_Tag, _Receiver, _Args...>
void set_value(_Tag, _Args&&... __args) noexcept {
_Tag()(static_cast<_Receiver&&>(__upstream_), static_cast<_Args&&>(__args)...);
}
Expand Down Expand Up @@ -182,7 +182,7 @@ namespace exec {

template <__decays_to<_Sender> _Sndr>
__t(_Sndr&& __sndr) noexcept(__nothrow_decay_copyable<_Sndr>)
: __sender_{static_cast<_Sndr&&>(__sndr)} {
: __sndr_{static_cast<_Sndr&&>(__sndr)} {
}

template <__decays_to<__t> _Self, class _Receiver>
Expand All @@ -191,32 +191,29 @@ namespace exec {
noexcept(__nothrow_connectable<__copy_cvref_t<_Self, _Sender>, __receiver_t<_Receiver>>)
-> connect_result_t<__copy_cvref_t<_Self, _Sender>, __receiver_t<_Receiver>> {
return stdexec::connect(
static_cast<_Self&&>(__self).__sender_,
static_cast<_Self&&>(__self).__sndr_,
__receiver_t<_Receiver>{static_cast<_Receiver&&>(__receiver)});
}

template <class _Tag, class... _Args>
requires __completion_tag<__decay_t<_Tag>>
using __dematerialize_value = completion_signatures<__decay_t<_Tag>(_Args...)>;

template <class... Ts>
using __foo = __meval<__dematerialize_value, Ts...>;

template <class... _Env>
using __completion_signatures_for_t = //
template <class _Self, class... _Env>
using __completions_t = //
transform_completion_signatures<
__completion_signatures_of_t<_Sender, _Env...>,
__completion_signatures_of_t<__copy_cvref_t<_Self, _Sender>, _Env...>,
completion_signatures<>,
__mtry_q<__dematerialize_value>::template __f>;

template <__decays_to<__t> _Self, class... _Env>
static auto get_completion_signatures(_Self&&, _Env&&...) //
-> __completion_signatures_for_t<_Env...> {
-> __completions_t<_Self, _Env...> {
return {};
}

private:
_Sender __sender_;
_Sender __sndr_;
};
};

Expand Down
5 changes: 5 additions & 0 deletions include/nvexec/multi_gpu_context.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@

#include "stream_context.cuh"

STDEXEC_PRAGMA_PUSH()
STDEXEC_PRAGMA_IGNORE_EDG(cuda_compile)

namespace nvexec {
namespace STDEXEC_STREAM_DETAIL_NS {
template <sender Sender, std::integral Shape, class Fun>
Expand Down Expand Up @@ -275,3 +278,5 @@ namespace nvexec {
}
};
} // namespace nvexec

STDEXEC_PRAGMA_POP()
5 changes: 5 additions & 0 deletions include/nvexec/stream/bulk.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@

#include "common.cuh"

STDEXEC_PRAGMA_PUSH()
STDEXEC_PRAGMA_IGNORE_EDG(cuda_compile)

namespace nvexec::STDEXEC_STREAM_DETAIL_NS {

namespace _bulk {
Expand Down Expand Up @@ -384,3 +387,5 @@ namespace stdexec::__detail {
nvexec::STDEXEC_STREAM_DETAIL_NS::multi_gpu_bulk_sender_t<__name_of<__t<SenderId>>, Shape, Fun>>
__name_of_v<nvexec::STDEXEC_STREAM_DETAIL_NS::multi_gpu_bulk_sender_t<SenderId, Shape, Fun>>{};
} // namespace stdexec::__detail

STDEXEC_PRAGMA_POP()
6 changes: 6 additions & 0 deletions include/nvexec/stream/common.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,10 @@
#include "../detail/throw_on_cuda_error.cuh"
#include "../detail/queue.cuh"
#include "../detail/variant.cuh"
#include "stdexec/__detail/__config.hpp"

STDEXEC_PRAGMA_PUSH()
STDEXEC_PRAGMA_IGNORE_EDG(cuda_compile)

namespace nvexec {
using stdexec::operator""_mstr;
Expand Down Expand Up @@ -813,3 +817,5 @@ namespace nvexec {

inline constexpr STDEXEC_STREAM_DETAIL_NS::get_stream_t get_stream{};
} // namespace nvexec

STDEXEC_PRAGMA_POP()
5 changes: 5 additions & 0 deletions include/nvexec/stream/ensure_started.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@
#include "../detail/throw_on_cuda_error.cuh"
#include "common.cuh"

STDEXEC_PRAGMA_PUSH()
STDEXEC_PRAGMA_IGNORE_EDG(cuda_compile)

namespace nvexec::STDEXEC_STREAM_DETAIL_NS {
namespace _ensure_started {
template <class Tag, class... As, class Variant>
Expand Down Expand Up @@ -386,3 +389,5 @@ namespace stdexec::__detail {
nvexec::STDEXEC_STREAM_DETAIL_NS::ensure_started_sender_t<__name_of<__t<SenderId>>>>
__name_of_v<nvexec::STDEXEC_STREAM_DETAIL_NS::ensure_started_sender_t<SenderId>>{};
} // namespace stdexec::__detail

STDEXEC_PRAGMA_POP()
1 change: 1 addition & 0 deletions include/nvexec/stream/launch.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@

STDEXEC_PRAGMA_PUSH()
STDEXEC_PRAGMA_IGNORE_GNU("-Wmissing-braces")
STDEXEC_PRAGMA_IGNORE_EDG(cuda_compile)

namespace nvexec {
namespace STDEXEC_STREAM_DETAIL_NS {
Expand Down
5 changes: 5 additions & 0 deletions include/nvexec/stream/let_xxx.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@

#include "common.cuh"

STDEXEC_PRAGMA_PUSH()
STDEXEC_PRAGMA_IGNORE_EDG(cuda_compile)

namespace nvexec::STDEXEC_STREAM_DETAIL_NS {
namespace let_xxx {
template <class... As, class Fun, class ResultSenderT>
Expand Down Expand Up @@ -270,3 +273,5 @@ namespace stdexec::__detail {
nvexec::STDEXEC_STREAM_DETAIL_NS::let_sender_t<__name_of<__t<SenderId>>, Fun, Set>>
__name_of_v<nvexec::STDEXEC_STREAM_DETAIL_NS::let_sender_t<SenderId, Fun, Set>>{};
} // namespace stdexec::__detail

STDEXEC_PRAGMA_POP()
4 changes: 2 additions & 2 deletions include/nvexec/stream/reduce.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -133,13 +133,13 @@ namespace nvexec {
using __sender =
stdexec::__t<reduce_::sender_t<stdexec::__id<__decay_t<Sender>>, InitT, Fun>>;

template <sender Sender, __movable_value InitT, __movable_value Fun = cub::Sum>
template <sender Sender, __movable_value InitT, __movable_value Fun = cuda::std::plus<>>
__sender<Sender, InitT, Fun> operator()(Sender&& sndr, InitT init, Fun fun) const {
return __sender<Sender, InitT, Fun>{
{}, static_cast<Sender&&>(sndr), static_cast<InitT&&>(init), static_cast<Fun&&>(fun)};
}

template <class InitT, class Fun = cub::Sum>
template <class InitT, class Fun = cuda::std::plus<>>
STDEXEC_ATTRIBUTE((always_inline)) auto operator()(InitT init, Fun fun = {}) const -> __binder_back<reduce_t, InitT, Fun> {
return {
{static_cast<InitT&&>(init), static_cast<Fun&&>(fun)},
Expand Down
5 changes: 5 additions & 0 deletions include/nvexec/stream/schedule_from.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@
#include "common.cuh"
#include "../detail/variant.cuh"

STDEXEC_PRAGMA_PUSH()
STDEXEC_PRAGMA_IGNORE_EDG(cuda_compile)

namespace nvexec::STDEXEC_STREAM_DETAIL_NS {

namespace _sched_from {
Expand Down Expand Up @@ -239,3 +242,5 @@ namespace stdexec::__detail {
nvexec::STDEXEC_STREAM_DETAIL_NS::schedule_from_sender_t<_Scheduler, __name_of<__t<_SenderId>>>>
__name_of_v<nvexec::STDEXEC_STREAM_DETAIL_NS::schedule_from_sender_t<_Scheduler, _SenderId>>;
} // namespace stdexec::__detail

STDEXEC_PRAGMA_POP()
5 changes: 5 additions & 0 deletions include/nvexec/stream/split.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@
#include "common.cuh"
#include "../detail/throw_on_cuda_error.cuh"

STDEXEC_PRAGMA_PUSH()
STDEXEC_PRAGMA_IGNORE_EDG(cuda_compile)

namespace nvexec::STDEXEC_STREAM_DETAIL_NS {
namespace _split {
inline auto __make_env(
Expand Down Expand Up @@ -362,3 +365,5 @@ namespace stdexec::__detail {
extern __mconst<nvexec::STDEXEC_STREAM_DETAIL_NS::split_sender_t<__name_of<__t<SenderId>>>>
__name_of_v<nvexec::STDEXEC_STREAM_DETAIL_NS::split_sender_t<SenderId>>;
} // namespace stdexec::__detail

STDEXEC_PRAGMA_POP()
5 changes: 5 additions & 0 deletions include/nvexec/stream/then.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@

#include "common.cuh"

STDEXEC_PRAGMA_PUSH()
STDEXEC_PRAGMA_IGNORE_EDG(cuda_compile)

namespace nvexec::STDEXEC_STREAM_DETAIL_NS {

namespace _then {
Expand Down Expand Up @@ -207,3 +210,5 @@ namespace stdexec::__detail {
nvexec::STDEXEC_STREAM_DETAIL_NS::then_sender_t<__name_of<__t<SenderId>>, Fun>>
__name_of_v<nvexec::STDEXEC_STREAM_DETAIL_NS::then_sender_t<SenderId, Fun>>{};
} // namespace stdexec::__detail

STDEXEC_PRAGMA_POP()
5 changes: 5 additions & 0 deletions include/nvexec/stream/upon_error.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@

#include "common.cuh"

STDEXEC_PRAGMA_PUSH()
STDEXEC_PRAGMA_IGNORE_EDG(cuda_compile)

namespace nvexec::STDEXEC_STREAM_DETAIL_NS {

namespace _upon_error {
Expand Down Expand Up @@ -189,3 +192,5 @@ namespace stdexec::__detail {
nvexec::STDEXEC_STREAM_DETAIL_NS::upon_error_sender_t<__name_of<__t<SenderId>>, Fun>>
__name_of_v<nvexec::STDEXEC_STREAM_DETAIL_NS::upon_error_sender_t<SenderId, Fun>>{};
} // namespace stdexec::__detail

STDEXEC_PRAGMA_POP()
5 changes: 5 additions & 0 deletions include/nvexec/stream/upon_stopped.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@

#include "common.cuh"

STDEXEC_PRAGMA_PUSH()
STDEXEC_PRAGMA_IGNORE_EDG(cuda_compile)

namespace nvexec::STDEXEC_STREAM_DETAIL_NS {

namespace _upon_stopped {
Expand Down Expand Up @@ -161,3 +164,5 @@ namespace stdexec::__detail {
nvexec::STDEXEC_STREAM_DETAIL_NS::upon_stopped_sender_t<__name_of<__t<SenderId>>, Fun>>
__name_of_v<nvexec::STDEXEC_STREAM_DETAIL_NS::upon_stopped_sender_t<SenderId, Fun>>{};
} // namespace stdexec::__detail

STDEXEC_PRAGMA_POP()
5 changes: 5 additions & 0 deletions include/nvexec/stream/when_all.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@
#include "../detail/queue.cuh"
#include "../detail/throw_on_cuda_error.cuh"

STDEXEC_PRAGMA_PUSH()
STDEXEC_PRAGMA_IGNORE_EDG(cuda_compile)

namespace nvexec::STDEXEC_STREAM_DETAIL_NS {

namespace _when_all {
Expand Down Expand Up @@ -447,3 +450,5 @@ namespace stdexec::__detail {
__name_of_v<nvexec::STDEXEC_STREAM_DETAIL_NS::
when_all_sender_t<WithCompletionScheduler, Scheduler, SenderIds...>>{};
} // namespace stdexec::__detail

STDEXEC_PRAGMA_POP()
Loading

0 comments on commit 3302dda

Please sign in to comment.