Skip to content

Commit

Permalink
Merge pull request #5 from CNugteren/development
Browse files Browse the repository at this point in the history
Updated to version 6.0
  • Loading branch information
CNugteren authored Jun 29, 2016
2 parents 4a81c2b + dc02f78 commit 68a3882
Show file tree
Hide file tree
Showing 7 changed files with 110 additions and 38 deletions.
8 changes: 8 additions & 0 deletions CHANGELOG
Original file line number Diff line number Diff line change
@@ -1,4 +1,12 @@

Version 6.0 (2016-06-29):
- Added the possibility to use Event pointers, adjusted the Kernel::Launch function to do so
- Added a new constructor for Program based on a binary (OpenCL only)
- Fixed a bug when OpenCL 2.0 or newer is installed but the device doesn't support it
- Added new methods to the API:
* Device::VersionNumber (integer version of the string-getter Device::Version)
* Device::IsCPU, Device::IsGPU, Device::IsAMD, Device::IsNVIDIA, Device::IsIntel, Device::IsARM

Version 5.0 (2016-04-21):
- Buffers can now also be 'not owned' to disable automatic memory freeing afterwards
- Made 'Buffer::Read' and 'Buffer::ReadAsync' constant methods
Expand Down
28 changes: 14 additions & 14 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
# CMake project details
cmake_minimum_required(VERSION 2.8.10)
project("CLCudaAPI" CXX)
set(CLCudaAPI_VERSION_MAJOR 5)
set(CLCudaAPI_VERSION_MAJOR 6)
set(CLCudaAPI_VERSION_MINOR 0)

# ==================================================================================================
Expand All @@ -51,32 +51,32 @@ endif()
# ==================================================================================================

# Compiler-version check (requires at least CMake 2.8.10)
if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.7)
if(CMAKE_CXX_COMPILER_ID STREQUAL GNU)
if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.7)
message(FATAL_ERROR "GCC version must be at least 4.7")
endif()
elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 3.3)
elseif(CMAKE_CXX_COMPILER_ID STREQUAL Clang)
if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 3.3)
message(FATAL_ERROR "Clang version must be at least 3.3")
endif()
elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "AppleClang")
if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 5.0)
elseif(CMAKE_CXX_COMPILER_ID STREQUAL AppleClang)
if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 5.0)
message(FATAL_ERROR "AppleClang version must be at least 5.0")
endif()
elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Intel")
if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 14.0)
elseif(CMAKE_CXX_COMPILER_ID STREQUAL Intel)
if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 14.0)
message(FATAL_ERROR "ICC version must be at least 14.0")
endif()
elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC")
if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 18.0)
elseif(MSVC)
if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 18.0)
message(FATAL_ERROR "MS Visual Studio version must be at least 18.0")
endif()
endif()

# C++ compiler settings
if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC")
if(MSVC)
set(FLAGS "/Ox")
else ()
else()
set(FLAGS "-O3 -std=c++11 -Wall -Wno-comment")
endif()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${FLAGS}")
Expand Down Expand Up @@ -127,7 +127,7 @@ endforeach()
# ==================================================================================================

# Optional: Enable inclusion of the test-suite
if (ENABLE_TESTS)
if(ENABLE_TESTS)
enable_testing()
include_directories(${CLCudaAPI_SOURCE_DIR}/test)
add_executable(unit_tests test/unit_tests.cc)
Expand Down
72 changes: 59 additions & 13 deletions include/clpp11.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
// Portability here means that a similar header exists for CUDA with the same classes and
// interfaces. In other words, moving from the OpenCL API to the CUDA API becomes a one-line change.
//
// This is version 5.0 of CLCudaAPI.
// This is version 6.0 of CLCudaAPI.
//
// =================================================================================================
//
Expand Down Expand Up @@ -162,6 +162,15 @@ class Device {

// Methods to retrieve device information
std::string Version() const { return GetInfoString(CL_DEVICE_VERSION); }
size_t VersionNumber() const
{
std::string version_string = Version().substr(7);
// Space separates the end of the OpenCL version number from the beginning of the
// vendor-specific information.
size_t next_whitespace = version_string.find(' ');
size_t version = (size_t) (100.0 * std::stod(version_string.substr(0, next_whitespace)));
return version;
}
std::string Vendor() const { return GetInfoString(CL_DEVICE_VENDOR); }
std::string Name() const { return GetInfoString(CL_DEVICE_NAME); }
std::string Type() const {
Expand Down Expand Up @@ -206,6 +215,14 @@ class Device {
return true;
}

// Query for a specific type of device or brand
bool IsCPU() const { return Type() == "CPU"; }
bool IsGPU() const { return Type() == "GPU"; }
bool IsAMD() const { return Vendor() == "AMD" || Vendor() == "Advanced Micro Devices, Inc."; }
bool IsNVIDIA() const { return Vendor() == "NVIDIA" || Vendor() == "NVIDIA Corporation"; }
bool IsIntel() const { return Vendor() == "Intel" || Vendor() == "GenuineIntel"; }
bool IsARM() const { return Vendor() == "ARM"; }

// Accessor to the private data-member
const cl_device_id& operator()() const { return device_; }
private:
Expand Down Expand Up @@ -268,10 +285,14 @@ class Context {

// Accessor to the private data-member
const cl_context& operator()() const { return *context_; }
cl_context* pointer() const { return &(*context_); }
private:
std::shared_ptr<cl_context> context_;
};

// Pointer to an OpenCL context
using ContextPointer = cl_context*;

// =================================================================================================

// Enumeration of build statuses of the run-time compilation process
Expand All @@ -282,7 +303,7 @@ class Program {
public:
// Note that there is no constructor based on the regular OpenCL data-type because of extra state

// Regular constructor with memory management
// Source-based constructor with memory management
explicit Program(const Context &context, std::string source):
program_(new cl_program, [](cl_program* p) { CheckError(clReleaseProgram(*p)); delete p; }),
length_(source.length()),
Expand All @@ -293,6 +314,22 @@ class Program {
CheckError(status);
}

// Binary-based constructor with memory management
explicit Program(const Device &device, const Context &context, const std::string& binary):
program_(new cl_program, [](cl_program* p) { CheckError(clReleaseProgram(*p)); delete p; }),
length_(binary.length()),
source_(binary),
source_ptr_(&source_[0]) {
auto status1 = CL_SUCCESS;
auto status2 = CL_SUCCESS;
const cl_device_id dev = device();
*program_ = clCreateProgramWithBinary(context(), 1, &dev, &length_,
reinterpret_cast<const unsigned char**>(&source_ptr_),
&status1, &status2);
CheckError(status1);
CheckError(status2);
}

// Compiles the device program and returns whether or not there where any warnings/errors
BuildStatus Build(const Device &device, std::vector<std::string> &options) {
auto options_string = std::accumulate(options.begin(), options.end(), std::string{" "});
Expand Down Expand Up @@ -321,7 +358,7 @@ class Program {
return result;
}

// Retrieves an intermediate representation of the compiled program
// Retrieves a binary or an intermediate representation of the compiled program
std::string GetIR() const {
auto bytes = size_t{0};
CheckError(clGetProgramInfo(*program_, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &bytes, nullptr));
Expand All @@ -337,7 +374,7 @@ class Program {
private:
std::shared_ptr<cl_program> program_;
size_t length_;
std::string source_;
std::string source_; // Note: the source can also be a binary or IR
const char* source_ptr_;
};

Expand All @@ -359,8 +396,16 @@ class Queue {
delete s; }) {
auto status = CL_SUCCESS;
#ifdef CL_VERSION_2_0
cl_queue_properties properties[] = {CL_QUEUE_PROPERTIES, CL_QUEUE_PROFILING_ENABLE, 0};
*queue_ = clCreateCommandQueueWithProperties(context(), device(), properties, &status);
size_t ocl_version = device.VersionNumber();
if (ocl_version >= 200)
{
cl_queue_properties properties[] = {CL_QUEUE_PROPERTIES, CL_QUEUE_PROFILING_ENABLE, 0};
*queue_ = clCreateCommandQueueWithProperties(context(), device(), properties, &status);
}
else
{
*queue_ = clCreateCommandQueue(context(), device(), CL_QUEUE_PROFILING_ENABLE, &status);
}
#else
*queue_ = clCreateCommandQueue(context(), device(), CL_QUEUE_PROFILING_ENABLE, &status);
#endif
Expand Down Expand Up @@ -611,15 +656,15 @@ class Kernel {

// Launches a kernel onto the specified queue
void Launch(const Queue &queue, const std::vector<size_t> &global,
const std::vector<size_t> &local, Event &event) {
const std::vector<size_t> &local, EventPointer event) {
CheckError(clEnqueueNDRangeKernel(queue(), *kernel_, static_cast<cl_uint>(global.size()),
nullptr, global.data(), local.data(),
0, nullptr, &(event())));
0, nullptr, event));
}

// As above, but with an event waiting list
void Launch(const Queue &queue, const std::vector<size_t> &global,
const std::vector<size_t> &local, Event &event,
const std::vector<size_t> &local, EventPointer event,
std::vector<Event>& waitForEvents) {
if (waitForEvents.size() == 0) { return Launch(queue, global, local, event); }

Expand All @@ -632,15 +677,16 @@ class Kernel {
// Launches the kernel while waiting for other events
CheckError(clEnqueueNDRangeKernel(queue(), *kernel_, static_cast<cl_uint>(global.size()),
nullptr, global.data(), local.data(),
waitForEventsPlain.size(), waitForEventsPlain.data(),
&(event())));
static_cast<cl_uint>(waitForEventsPlain.size()),
waitForEventsPlain.data(),
event));
}

// As above, but with the default local workgroup size
void Launch(const Queue &queue, const std::vector<size_t> &global, Event &event) {
void Launch(const Queue &queue, const std::vector<size_t> &global, EventPointer event) {
CheckError(clEnqueueNDRangeKernel(queue(), *kernel_, static_cast<cl_uint>(global.size()),
nullptr, global.data(), nullptr,
0, nullptr, &(event())));
0, nullptr, event));
}

// Accessor to the private data-member
Expand Down
34 changes: 26 additions & 8 deletions include/cupp11.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
// Portability here means that a similar header exists for OpenCL with the same classes and
// interfaces. In other words, moving from the CUDA API to the OpenCL API becomes a one-line change.
//
// This is version 5.0 of CLCudaAPI.
// This is version 6.0 of CLCudaAPI.
//
// =================================================================================================
//
Expand Down Expand Up @@ -107,13 +107,14 @@ class Event {
// Accessors to the private data-members
const CUevent& start() const { return *start_; }
const CUevent& end() const { return *end_; }
Event* pointer() { return this; }
private:
std::shared_ptr<CUevent> start_;
std::shared_ptr<CUevent> end_;
};

// Pointer to a CUDA event
using EventPointer = CUevent*;
using EventPointer = Event*;

// =================================================================================================

Expand Down Expand Up @@ -160,6 +161,11 @@ class Device {
CheckError(cuDriverGetVersion(&result));
return "CUDA driver "+std::to_string(result);
}
size_t VersionNumber() const {
auto result = 0;
CheckError(cuDriverGetVersion(&result));
return static_cast<size_t>(result);
}
std::string Vendor() const { return "NVIDIA Corporation"; }
std::string Name() const {
auto result = std::string{};
Expand Down Expand Up @@ -207,6 +213,14 @@ class Device {
return true;
}

// Query for a specific type of device or brand
bool IsCPU() const { return false; }
bool IsGPU() const { return true; }
bool IsAMD() const { return false; }
bool IsNVIDIA() const { return true; }
bool IsIntel() const { return false; }
bool IsARM() const { return false; }

// Accessor to the private data-member
const CUdevice& operator()() const { return device_; }
private:
Expand Down Expand Up @@ -240,10 +254,14 @@ class Context {

// Accessor to the private data-member
const CUcontext& operator()() const { return *context_; }
CUcontext* pointer() const { return &(*context_); }
private:
std::shared_ptr<CUcontext> context_;
};

// Pointer to an OpenCL context
using ContextPointer = CUcontext*;

// =================================================================================================

// Enumeration of build statuses of the run-time compilation process
Expand All @@ -254,7 +272,7 @@ class Program {
public:
// Note that there is no constructor based on the regular CUDA data-type because of extra state

// Regular constructor with memory management
// Source-based constructor with memory management
explicit Program(const Context &, std::string source):
program_(new nvrtcProgram, [](nvrtcProgram* p) { CheckError(nvrtcDestroyProgram(p));
delete p; }),
Expand Down Expand Up @@ -556,7 +574,7 @@ class Kernel {

// Launches a kernel onto the specified queue
void Launch(const Queue &queue, const std::vector<size_t> &global,
const std::vector<size_t> &local, Event &event) {
const std::vector<size_t> &local, EventPointer event) {

// Creates the grid (number of threadblocks) and sets the block sizes (threads per block)
auto grid = std::vector<size_t>{1, 1, 1};
Expand All @@ -572,24 +590,24 @@ class Kernel {
}

// Launches the kernel, its execution time is recorded by events
CheckError(cuEventRecord(event.start(), queue()));
CheckError(cuEventRecord(event->start(), queue()));
CheckError(cuLaunchKernel(kernel_, grid[0], grid[1], grid[2], block[0], block[1], block[2],
0, queue(), pointers.data(), nullptr));
CheckError(cuEventRecord(event.end(), queue()));
CheckError(cuEventRecord(event->end(), queue()));
}

// As above, but with an event waiting list
// TODO: Implement this function
void Launch(const Queue &queue, const std::vector<size_t> &global,
const std::vector<size_t> &local, Event &event,
const std::vector<size_t> &local, EventPointer event,
std::vector<Event>& waitForEvents) {
if (waitForEvents.size() == 0) { return Launch(queue, global, local, event); }
Error("launching with an event waiting list is not implemented for the CUDA back-end");
}

// As above, but with the default local workgroup size
// TODO: Implement this function
void Launch(const Queue &, const std::vector<size_t> &, Event &) {
void Launch(const Queue &, const std::vector<size_t> &, EventPointer) {
Error("launching with a default workgroup size is not implemented for the CUDA back-end");
}

Expand Down
2 changes: 1 addition & 1 deletion samples/advanced.cc
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ int main() {
// Enqueues the kernel and waits for the result. Note that launching the kernel is always
// a-synchronous and thus requires finishing the queue in order to complete the operation.
printf("## Running the kernel...\n");
kernel.Launch(queue, global, local, event);
kernel.Launch(queue, global, local, event.pointer());
queue.Finish(event);
printf(" > Took %.3lf ms\n", event.GetElapsedTime());

Expand Down
2 changes: 1 addition & 1 deletion samples/simple.cc
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ int main() {
// Enqueues the kernel and waits for the result. Note that launching the kernel is always
// a-synchronous and thus requires finishing the queue in order to complete the operation.
printf("## Running the kernel...\n");
kernel.Launch(queue, global, local, event);
kernel.Launch(queue, global, local, event.pointer());
queue.Finish(event);
printf(" > Took %.3lf ms\n", event.GetElapsedTime());

Expand Down
2 changes: 1 addition & 1 deletion samples/smallest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ int main() {
program.Build(device, compiler_options);
auto kernel = CLCudaAPI::Kernel(program, "add");
kernel.SetArguments(a, b, c);
kernel.Launch(queue, {elements}, {128}, event);
kernel.Launch(queue, {elements}, {128}, event.pointer());
queue.Finish(event);

// Reads the results back to the host memory
Expand Down

0 comments on commit 68a3882

Please sign in to comment.