From 7151c5f0968629a3d6d341b62439255eaba122a6 Mon Sep 17 00:00:00 2001 From: Abhishek Bagusetty Date: Wed, 12 Apr 2023 09:18:14 +0000 Subject: [PATCH 1/2] initial updates to sycl backend --- CMakeLists.txt | 11 +++- include/camp/defines.hpp | 4 ++ include/camp/resource/sycl.hpp | 110 +++++++++++++++++---------------- test/CMakeLists.txt | 4 ++ test/resource.cpp | 104 ++++++++++++++++++++++++++++--- 5 files changed, 171 insertions(+), 62 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 452ab3a7..a1c0eafd 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -9,6 +9,8 @@ set(camp_VERSION_MINOR ${PROJECT_VERSION_MINOR}) set(camp_VERSION_PATCH ${PROJECT_VERSION_PATCH}) include(CheckCXXCompilerFlag) +include(FindPackageHandleStandardArgs) +include(AddCXXCompilerFlag) if(NOT DEFINED BLT_CXX_STD) set(CXX_VERSIONS 17 14) foreach(cxxver ${CXX_VERSIONS}) @@ -113,6 +115,14 @@ if (ENABLE_HIP) endif() endif () +if (ENABLE_SYCL) + check_cxx_compiler_flag("-fsycl" CXX_HAS_FSYCL) + find_package_handle_standard_args( SYCL + REQUIRED_VARS CXX_HAS_FSYCL + ) + add_compile_options("-fsycl") +endif () + # end backends # Configure the config header file to allow config time options @@ -189,4 +199,3 @@ if(CAMP_ENABLE_TESTS) enable_testing() add_subdirectory(test) endif() - diff --git a/include/camp/defines.hpp b/include/camp/defines.hpp index 1d5e1e11..2fea7765 100644 --- a/include/camp/defines.hpp +++ b/include/camp/defines.hpp @@ -25,6 +25,10 @@ For details about use and distribution, please read LICENSE and NOTICE from #include #endif +#ifdef CAMP_ENABLE_SYCL +#include +#endif + namespace camp { diff --git a/include/camp/resource/sycl.hpp b/include/camp/resource/sycl.hpp index 6823d49f..2d4916b8 100644 --- a/include/camp/resource/sycl.hpp +++ b/include/camp/resource/sycl.hpp @@ -16,10 +16,9 @@ For details about use and distribution, please read LICENSE and NOTICE from #include "camp/resource/platform.hpp" #ifdef CAMP_ENABLE_SYCL -#include +#include #include #include -using namespace cl; namespace camp { @@ -32,7 +31,9 @@ namespace resources { public: SyclEvent(sycl::queue *qu) { m_event = sycl::event(); } - bool check() const { return true; } + bool check() const { + return (m_event.get_info() == sycl::info::event_command_status::complete); + } void wait() const { getSyclEvent_t().wait(); } sycl::event getSyclEvent_t() const { return m_event; } @@ -42,15 +43,13 @@ namespace resources class Sycl { - static sycl::queue *get_a_queue(sycl::context &syclContext, + static sycl::queue *get_a_queue(sycl::context *syclContext, int num, bool useContext) { - static sycl::gpu_selector gpuSelector; + static sycl::device gpuSelector { sycl::gpu_selector_v }; static sycl::property_list propertyList = sycl::property_list(sycl::property::queue::in_order()); - static sycl::context privateContext; - static sycl::context *contextInUse = NULL; static std::map> queueMap; @@ -59,47 +58,45 @@ namespace resources // User passed a context, use it if (useContext) { - contextInUse = &syclContext; - if (queueMap.find(contextInUse) == queueMap.end()) { - queueMap[contextInUse] = { - sycl::queue(*contextInUse, gpuSelector, propertyList), - sycl::queue(*contextInUse, gpuSelector, propertyList), - sycl::queue(*contextInUse, gpuSelector, propertyList), - sycl::queue(*contextInUse, gpuSelector, propertyList), - sycl::queue(*contextInUse, gpuSelector, propertyList), - sycl::queue(*contextInUse, gpuSelector, propertyList), - sycl::queue(*contextInUse, gpuSelector, propertyList), - sycl::queue(*contextInUse, gpuSelector, propertyList), - sycl::queue(*contextInUse, gpuSelector, propertyList), - sycl::queue(*contextInUse, gpuSelector, propertyList), - sycl::queue(*contextInUse, gpuSelector, propertyList), - sycl::queue(*contextInUse, gpuSelector, propertyList), - sycl::queue(*contextInUse, gpuSelector, propertyList), - sycl::queue(*contextInUse, gpuSelector, propertyList), - sycl::queue(*contextInUse, gpuSelector, propertyList), - sycl::queue(*contextInUse, gpuSelector, propertyList)}; + queueMap[syclContext] = { + sycl::queue(*syclContext, gpuSelector, propertyList), + sycl::queue(*syclContext, gpuSelector, propertyList), + sycl::queue(*syclContext, gpuSelector, propertyList), + sycl::queue(*syclContext, gpuSelector, propertyList), + sycl::queue(*syclContext, gpuSelector, propertyList), + sycl::queue(*syclContext, gpuSelector, propertyList), + sycl::queue(*syclContext, gpuSelector, propertyList), + sycl::queue(*syclContext, gpuSelector, propertyList), + sycl::queue(*syclContext, gpuSelector, propertyList), + sycl::queue(*syclContext, gpuSelector, propertyList), + sycl::queue(*syclContext, gpuSelector, propertyList), + sycl::queue(*syclContext, gpuSelector, propertyList), + sycl::queue(*syclContext, gpuSelector, propertyList), + sycl::queue(*syclContext, gpuSelector, propertyList), + sycl::queue(*syclContext, gpuSelector, propertyList), + sycl::queue(*syclContext, gpuSelector, propertyList)}; } } else { // User did not pass context, use last used or private one - if (contextInUse == NULL) { - contextInUse = &privateContext; - queueMap[contextInUse] = { - sycl::queue(*contextInUse, gpuSelector, propertyList), - sycl::queue(*contextInUse, gpuSelector, propertyList), - sycl::queue(*contextInUse, gpuSelector, propertyList), - sycl::queue(*contextInUse, gpuSelector, propertyList), - sycl::queue(*contextInUse, gpuSelector, propertyList), - sycl::queue(*contextInUse, gpuSelector, propertyList), - sycl::queue(*contextInUse, gpuSelector, propertyList), - sycl::queue(*contextInUse, gpuSelector, propertyList), - sycl::queue(*contextInUse, gpuSelector, propertyList), - sycl::queue(*contextInUse, gpuSelector, propertyList), - sycl::queue(*contextInUse, gpuSelector, propertyList), - sycl::queue(*contextInUse, gpuSelector, propertyList), - sycl::queue(*contextInUse, gpuSelector, propertyList), - sycl::queue(*contextInUse, gpuSelector, propertyList), - sycl::queue(*contextInUse, gpuSelector, propertyList), - sycl::queue(*contextInUse, gpuSelector, propertyList)}; + if (syclContext == nullptr) { + sycl::context* privateContext = new sycl::context(gpuSelector); + queueMap[privateContext] = { + sycl::queue(*privateContext, gpuSelector, propertyList), + sycl::queue(*privateContext, gpuSelector, propertyList), + sycl::queue(*privateContext, gpuSelector, propertyList), + sycl::queue(*privateContext, gpuSelector, propertyList), + sycl::queue(*privateContext, gpuSelector, propertyList), + sycl::queue(*privateContext, gpuSelector, propertyList), + sycl::queue(*privateContext, gpuSelector, propertyList), + sycl::queue(*privateContext, gpuSelector, propertyList), + sycl::queue(*privateContext, gpuSelector, propertyList), + sycl::queue(*privateContext, gpuSelector, propertyList), + sycl::queue(*privateContext, gpuSelector, propertyList), + sycl::queue(*privateContext, gpuSelector, propertyList), + sycl::queue(*privateContext, gpuSelector, propertyList), + sycl::queue(*privateContext, gpuSelector, propertyList), + sycl::queue(*privateContext, gpuSelector, propertyList), + sycl::queue(*privateContext, gpuSelector, propertyList)}; } } m_mtx.unlock(); @@ -116,16 +113,14 @@ namespace resources return &queueMap[contextInUse][num % 16]; } - public: Sycl(int group = -1) { - sycl::context temp; - qu = get_a_queue(temp, group, false); + qu = get_a_queue(nullptr, group, false); } Sycl(sycl::context &syclContext, int group = -1) - : qu(get_a_queue(syclContext, group, true)) + : qu(get_a_queue(&syclContext, group, true)) { } @@ -138,12 +133,22 @@ namespace resources } SyclEvent get_event() { return SyclEvent(get_queue()); } Event get_event_erased() { return Event{SyclEvent(get_queue())}; } - void wait() { qu->wait(); } + void wait() { + #if defined(SYCL_EXT_ONEAPI_ENQUEUE_BARRIER) + qu->ext_oneapi_submit_barrier(); + #else + qu->wait(); + #endif + } void wait_for(Event *e) { auto *sycl_event = e->try_get(); if (sycl_event) { + #if defined(SYCL_EXT_ONEAPI_ENQUEUE_BARRIER) + qu->ext_oneapi_submit_barrier( {sycl_event->getSyclEvent_t()} ); + #else (sycl_event->getSyclEvent_t()).wait(); + #endif } else { e->wait(); } @@ -155,7 +160,6 @@ namespace resources { T *ret = nullptr; if (size > 0) { - ret = sycl::malloc_shared(size, *qu); switch (ma) { case MemoryAccess::Unknown: case MemoryAccess::Device: @@ -181,13 +185,13 @@ namespace resources void memcpy(void *dst, const void *src, size_t size) { if (size > 0) { - qu->memcpy(dst, src, size).wait(); + qu->memcpy(dst, src, size); } } void memset(void *p, int val, size_t size) { if (size > 0) { - qu->memset(p, val, size).wait(); + qu->memset(p, val, size); } } diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 1f4be5a3..5895cc3e 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -24,6 +24,10 @@ function(camp_add_test TESTNAME) list(APPEND _depends blt::hip) endif() + if(ENABLE_SYCL) + list(APPEND _depends sycl) + endif() + if(ABT_DEPENDS_ON) list(APPEND _depends ${ABT_DEPENDS_ON}) endif() diff --git a/test/resource.cpp b/test/resource.cpp index f32824a0..5f4df5ac 100644 --- a/test/resource.cpp +++ b/test/resource.cpp @@ -47,6 +47,9 @@ TEST(CampResource, GetPlatform) #ifdef CAMP_HAVE_HIP ASSERT_EQ(Resource(Hip()).get_platform(), Platform::hip); #endif +#ifdef CAMP_HAVE_SYCL + ASSERT_EQ(Resource(Sycl()).get_platform(), Platform::sycl); +#endif #ifdef CAMP_HAVE_OMP_OFFLOAD ASSERT_EQ(Resource(Omp()).get_platform(), Platform::omp_target); #endif @@ -121,6 +124,7 @@ TEST(CampResource, GetEvent) cudaStreamCreate(&s); Event evc{CudaEvent(s)}; ASSERT_EQ(typeid(evc), typeid(ev2)); + cudaStreamDestroy(s); } TEST(CampEvent, Get) @@ -141,6 +145,7 @@ TEST(CampEvent, Get) ASSERT_EQ(typeid(host_event), typeid(pure_host_event)); ASSERT_EQ(typeid(cuda_event), typeid(pure_cuda_event)); + cudaStreamDestroy(s); } #endif #if defined(CAMP_HAVE_HIP) @@ -181,15 +186,15 @@ TEST(CampResource, StreamSelect) TEST(CampResource, Get) { Resource dev_host{Host()}; - Resource dev_cuda{Hip()}; + Resource dev_hip{Hip()}; auto erased_host = dev_host.get(); Host pure_host; ASSERT_EQ(typeid(erased_host), typeid(pure_host)); - auto erased_cuda = dev_cuda.get(); - Hip pure_cuda; - ASSERT_EQ(typeid(erased_cuda), typeid(pure_cuda)); + auto erased_hip = dev_hip.get(); + Hip pure_hip; + ASSERT_EQ(typeid(erased_hip), typeid(pure_hip)); } TEST(CampResource, GetEvent) @@ -206,6 +211,7 @@ TEST(CampResource, GetEvent) hipStreamCreate(&s); Event evc{HipEvent(s)}; ASSERT_EQ(typeid(evc), typeid(ev2)); + hipStreamDestroy(s); } TEST(CampEvent, Get) @@ -214,20 +220,102 @@ TEST(CampEvent, Get) Resource c1{Hip()}; Event erased_host_event = h1.get_event(); - Event erased_cuda_event = c1.get_event(); + Event erased_hip_event = c1.get_event(); auto pure_host_event = erased_host_event.get(); - auto pure_cuda_event = erased_cuda_event.get(); + auto pure_hip_event = erased_hip_event.get(); HostEvent host_event; hipStream_t s; hipStreamCreate(&s); - HipEvent cuda_event(s); + HipEvent hip_event(s); ASSERT_EQ(typeid(host_event), typeid(pure_host_event)); - ASSERT_EQ(typeid(cuda_event), typeid(pure_cuda_event)); + ASSERT_EQ(typeid(hip_event), typeid(pure_hip_event)); + hipStreamDestroy(s); } #endif +#if defined(CAMP_HAVE_SYCL) +TEST(CampResource, Reassignment) +{ + Resource h1{Host()}; + Resource c1{Sycl()}; + h1 = Sycl(); + ASSERT_EQ(typeid(c1), typeid(h1)); + + Resource h2{Host()}; + Resource c2{Sycl()}; + c2 = Host(); + ASSERT_EQ(typeid(c2), typeid(h2)); +} + +TEST(CampResource, StreamSelect) +{ + sycl::device dev(sycl::gpu_selector_v); + sycl::context ctxt(dev); + sycl::property_list propertyList = + sycl::property_list(sycl::property::queue::in_order()); + sycl::queue stream1(ctxt, dev, propertyList), stream2(ctxt, dev, propertyList); + + Resource c1{Sycl(stream1.get_context())}; + Resource c2{Sycl(stream2.get_context())}; + + const int N = 5; + int* d_array1 = c1.allocate(N); + int* d_array2 = c2.allocate(N); + + c1.deallocate(d_array1); + c2.deallocate(d_array2); +} + +TEST(CampResource, Get) +{ + Resource dev_host{Host()}; + Resource dev_sycl{Sycl()}; + + auto erased_host = dev_host.get(); + Host pure_host; + ASSERT_EQ(typeid(erased_host), typeid(pure_host)); + + auto erased_sycl = dev_sycl.get(); + Sycl pure_sycl; + ASSERT_EQ(typeid(erased_sycl), typeid(pure_sycl)); +} + +TEST(CampResource, GetEvent) +{ + Resource h1{Host()}; + Resource c1{Sycl()}; + + auto ev1 = h1.get_event(); + Event evh{HostEvent()}; + ASSERT_EQ(typeid(evh), typeid(ev1)); + + auto ev2 = c1.get_event(); + sycl::queue s(sycl::gpu_selector_v) + Event evc{SyclEvent(&s)}; + ASSERT_EQ(typeid(evc), typeid(ev2)); +} + +TEST(CampEvent, Get) +{ + Resource h1{Host()}; + Resource c1{Sycl()}; + + Event erased_host_event = h1.get_event(); + Event erased_sycl_event = c1.get_event(); + + auto pure_host_event = erased_host_event.get(); + auto pure_sycl_event = erased_sycl_event.get(); + + HostEvent host_event; + sycl::queue s(sycl::gpu_selector_v) + SyclEvent sycl_event(&s); + + ASSERT_EQ(typeid(host_event), typeid(pure_host_event)); + ASSERT_EQ(typeid(sycl_event), typeid(pure_sycl_event)); +} +#endif // CAMP_HAVE_SYCL template static EventProxy do_stuff(Res r) From c58ee2fc61390e625851c2b710cd332ff419b271 Mon Sep 17 00:00:00 2001 From: Abhishek Bagusetty Date: Wed, 12 Apr 2023 09:48:17 +0000 Subject: [PATCH 2/2] fix build error with cmake macro --- CMakeLists.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index a1c0eafd..e628b76f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -10,7 +10,6 @@ set(camp_VERSION_PATCH ${PROJECT_VERSION_PATCH}) include(CheckCXXCompilerFlag) include(FindPackageHandleStandardArgs) -include(AddCXXCompilerFlag) if(NOT DEFINED BLT_CXX_STD) set(CXX_VERSIONS 17 14) foreach(cxxver ${CXX_VERSIONS})