Merge pull request #9 from CNugteren/development

Updated to version 8.0
CNugteren · Sep 27, 2016 · 75d7f83 · 75d7f83
2 parents 182e068 + ef8b220
commit 75d7f83
Show file tree

Hide file tree

Showing 6 changed files with 88 additions and 8 deletions.
diff --git a/CHANGELOG b/CHANGELOG
@@ -1,4 +1,10 @@
 
+Version 8.0 (2016-09-27):
+- Several minor fixes
+- Added new methods to the API:
+  * GetAllPlatforms
+  * A new constructor for the Program class based on a binary or IR string (both OpenCL and CUDA)
+
 Version 7.0 (2016-08-03):
 - Re-wrote the OpenCL event implementation with proper memory management
 - Updated some return types of device-query information to fix issues on 32-bit systems

diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -31,7 +31,7 @@
 # CMake project details
 cmake_minimum_required(VERSION 2.8.10)
 project("CLCudaAPI" CXX)
-set(CLCudaAPI_VERSION_MAJOR 7)
+set(CLCudaAPI_VERSION_MAJOR 8)
 set(CLCudaAPI_VERSION_MINOR 0)
 
 # ==================================================================================================

diff --git a/doc/api.md b/doc/api.md
@@ -29,6 +29,11 @@ Constructor(s):
 * `Platform(const size_t platform_id)`:
 When using the OpenCL back-end, this initializes a new OpenCL platform (e.g. AMD SDK, Intel SDK, NVIDIA SDK) specified by the integer `platform_id`. When using the CUDA back-end, this initializes the CUDA driver API. The `platform_id` argument is ignored: there is only one platform.
 
+Non-member function(s):
+
+* `std::vector<Platform> GetAllPlatforms()`:
+Retrieves a vector containing all available platforms.
+
 
 CLCudaAPI::Device
 -------------
@@ -128,6 +133,9 @@ Constructor(s):
 * `Program(const Context &context, std::string source)`:
 Creates a new OpenCL or CUDA program on a given context. A program is a collection of one or more device kernels which form a single compilation unit together. The device-code is passed as a string. Such a string can for example be generated, hard-coded, or read from file at run-time. If passed as an r-value (e.g. using `std::move`), the device-code string is moved instead of copied into the class' member variable.
 
+* `Program(const Device &device, const Context &context, const std::string& binary)`:
+As above, but now the program is constructed based on an already compiled IR or binary of the device kernels. This requires a context corresponding to the binary. This constructor for OpenCL is based on the `clCreateProgramWithBinary` function.
+
 Public method(s):
 
 * `BuildStatus Build(const Device &device, std::vector<std::string> &options)`:

diff --git a/include/clpp11.h b/include/clpp11.h
@@ -12,7 +12,7 @@
 // Portability here means that a similar header exists for CUDA with the same classes and
 // interfaces. In other words, moving from the OpenCL API to the CUDA API becomes a one-line change.
 //
-// This is version 7.0 of CLCudaAPI.
+// This is version 8.0 of CLCudaAPI.
 //
 // =================================================================================================
 //
@@ -149,6 +149,17 @@ class Platform {
   cl_platform_id platform_;
 };
 
+// Retrieves a vector with all platforms
+inline std::vector<Platform> GetAllPlatforms() {
+  auto num_platforms = cl_uint{0};
+  CheckError(clGetPlatformIDs(0, nullptr, &num_platforms));
+  auto all_platforms = std::vector<Platform>();
+  for (size_t platform_id = 0; platform_id < static_cast<size_t>(num_platforms); ++platform_id) {
+    all_platforms.push_back(Platform(platform_id));
+  }
+  return all_platforms;
+}
+
 // =================================================================================================
 
 // C++11 version of 'cl_device_id'
@@ -199,7 +210,7 @@ class Device {
     return GetInfoVector<size_t>(CL_DEVICE_MAX_WORK_ITEM_SIZES);
   }
   unsigned long LocalMemSize() const {
-    return GetInfo<cl_ulong>(CL_DEVICE_LOCAL_MEM_SIZE);
+    return static_cast<unsigned long>(GetInfo<cl_ulong>(CL_DEVICE_LOCAL_MEM_SIZE));
   }
   std::string Capabilities() const { return GetInfoString(CL_DEVICE_EXTENSIONS); }
   size_t CoreClock() const {

diff --git a/include/cupp11.h b/include/cupp11.h
@@ -12,7 +12,7 @@
 // Portability here means that a similar header exists for OpenCL with the same classes and
 // interfaces. In other words, moving from the CUDA API to the OpenCL API becomes a one-line change.
 //
-// This is version 6.0 of CLCudaAPI.
+// This is version 8.0 of CLCudaAPI.
 //
 // =================================================================================================
 //
@@ -139,6 +139,12 @@ class Platform {
   size_t platform_id_;
 };
 
+// Retrieves a vector with all platforms. Note that there is just one platform in CUDA.
+inline std::vector<Platform> GetAllPlatforms() {
+  auto all_platforms = std::vector<Platform>{ Platform(size_t{0}) };
+  return all_platforms;
+}
+
 // =================================================================================================
 
 // C++11 version of 'CUdevice'
@@ -279,12 +285,22 @@ class Program {
       program_(new nvrtcProgram, [](nvrtcProgram* p) { CheckError(nvrtcDestroyProgram(p));
                                                        delete p; }),
       source_(std::move(source)),
-      source_ptr_(&source_[0]) {
+      source_ptr_(&source_[0]),
+      from_binary_(false) {
     CheckError(nvrtcCreateProgram(program_.get(), source_ptr_, nullptr, 0, nullptr, nullptr));
   }
 
+  // PTX-based constructor
+  explicit Program(const Device &device, const Context &context, const std::string& binary):
+      program_(nullptr), // not used
+      source_(binary),
+      source_ptr_(&source_[0]), // not used
+      from_binary_(true) {
+  }
+
   // Compiles the device program and returns whether or not there where any warnings/errors
   BuildStatus Build(const Device &, std::vector<std::string> &options) {
+    if (from_binary_) { return BuildStatus::kSuccess; }
     auto raw_options = std::vector<const char*>();
     for (const auto &option: options) {
       raw_options.push_back(option.c_str());
@@ -304,6 +320,7 @@ class Program {
 
   // Retrieves the warning/error message from the compiler (if any)
   std::string GetBuildInfo(const Device &) const {
+    if (from_binary_) { return std::string{}; }
     auto bytes = size_t{0};
     CheckError(nvrtcGetProgramLogSize(*program_, &bytes));
     auto result = std::string{};
@@ -314,6 +331,7 @@ class Program {
 
   // Retrieves an intermediate representation of the compiled program (i.e. PTX)
   std::string GetIR() const {
+    if (from_binary_) { return source_; } // holds the PTX
     auto bytes = size_t{0};
     CheckError(nvrtcGetPTXSize(*program_, &bytes));
     auto result = std::string{};
@@ -328,6 +346,7 @@ class Program {
   std::shared_ptr<nvrtcProgram> program_;
   std::string source_;
   const char* source_ptr_;
+  const bool from_binary_;
 };
 
 // =================================================================================================

diff --git a/test/unit_tests.cc b/test/unit_tests.cc
@@ -124,6 +124,17 @@ SCENARIO("platforms can be created and used", "[Platform]") {
 
 // =================================================================================================
 
+TEST_CASE("a list of all platforms can be retrieved", "[Platform]") {
+  auto all_platforms = CLCudaAPI::GetAllPlatforms();
+  REQUIRE(all_platforms.size() > 0);
+  for (auto &platform : all_platforms) {
+    auto num_devices = platform.NumDevices();
+    REQUIRE(num_devices > 0);
+  }
+}
+
+// =================================================================================================
+
 SCENARIO("devices can be created and used", "[Device][Platform]") {
   GIVEN("An example device on a platform") {
     auto platform = CLCudaAPI::Platform(kPlatformID);
@@ -220,10 +231,35 @@ SCENARIO("programs can be created and used", "[Program][Context][Device][Platfor
     auto platform = CLCudaAPI::Platform(kPlatformID);
     auto device = CLCudaAPI::Device(platform, kDeviceID);
     auto context = CLCudaAPI::Context(device);
-    auto source = std::string{""};
-    auto program = CLCudaAPI::Program(context, source);
+    #if USE_OPENCL
+      auto source = R"(
+      __kernel void add(__global const float* a, __global const float* b, __global float* c) {
+        unsigned idx = get_global_id(0);
+        c[idx] = a[idx] + b[idx];
+      })";
+
+    // ... or use CUDA instead
+    #else
+      auto source = R"(
+      extern "C" __global__ void add(const float* a, const float* b, float* c) {
+        unsigned idx = threadIdx.x + blockDim.x*blockIdx.x;
+        c[idx] = a[idx] + b[idx];
+      })";
+    #endif
+    auto options = std::vector<std::string>();
 
-    // TODO: Fill in
+    auto program = CLCudaAPI::Program(context, source);
+    auto build_result = program.Build(device, options);
+    REQUIRE(build_result == CLCudaAPI::BuildStatus::kSuccess);
+
+    WHEN("an compiled IR is generated from the compiled program") {
+      auto ir = program.GetIR();
+      THEN("a new program can be created based on the IR") {
+        auto new_program = CLCudaAPI::Program(device, context, ir);
+        auto new_build_result = new_program.Build(device, options);
+        REQUIRE(new_build_result == CLCudaAPI::BuildStatus::kSuccess);
+      }
+    }
   }
 }