Skip to content

Commit

Permalink
Refactor runtime multi-threading/synchronization support
Browse files Browse the repository at this point in the history
First stage of multi-threading support refactor by providing a mutex wrapper
class around current routines and using std::scoped_lock to follow RAII.
  • Loading branch information
ericxu233 authored and pcolberg committed Oct 5, 2022
1 parent 1847ee9 commit fa0934f
Show file tree
Hide file tree
Showing 28 changed files with 1,416 additions and 1,568 deletions.
40 changes: 36 additions & 4 deletions include/acl_thread.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@
#include <stdarg.h>
#include <stdio.h>

// System headers.
#include <mutex>
#include <thread>

#if defined(__cplusplus)
extern "C" {
#endif
Expand All @@ -23,10 +27,15 @@ extern "C" {
#define ACL_TLS __declspec(thread)
#endif

// Foward declaration
class acl_mutex_wrapper_t;

extern ACL_TLS int acl_global_lock_count;
extern ACL_TLS int acl_inside_sig_flag;
extern ACL_TLS int acl_inside_sig_old_lock_count;

extern acl_mutex_wrapper_t acl_mutex_wrapper;

// -- signal handler functions --
// When we enter a signal handler, we save "acl_global_lock_count" to
// "acl_inside_sig_old_lock_count" temporarily. This is because the signal
Expand Down Expand Up @@ -75,10 +84,6 @@ static inline void acl_sig_unblock_signals() {

// -- global lock functions --

void acl_lock();
void acl_unlock();
int acl_suspend_lock();
void acl_resume_lock(int lock_count);
void acl_wait_for_device_update(cl_context context);
void acl_signal_device_update();

Expand All @@ -105,4 +110,31 @@ void acl_yield_lock_and_thread();
} /* extern "C" */
#endif

// -- RAII wrapper classes --

// To follow RAII, provide a mutex class acl_mutex_wrapper_t which may be used
// with std::scoped_lock and std::unique_lock. Note that std::scoped_lock may
// only be constructed with a single instance of acl_mutex_wrapper_t since the
// latter only implements BasicLockable but not Lockable, due to a lack of
// try_lock() functionality in acl_threadsupport.
class acl_mutex_wrapper_t {
public:
void lock();
void unlock();
int suspend_lock();
void resume_lock(int lock_count);
};

class acl_suspend_lock_guard {
public:
explicit acl_suspend_lock_guard(acl_mutex_wrapper_t &mutex) : mutex(mutex) {
lock_count = mutex.suspend_lock();
};
~acl_suspend_lock_guard() { mutex.resume_lock(lock_count); }

private:
int lock_count;
acl_mutex_wrapper_t &mutex;
};

#endif // ACL_THREAD_H
37 changes: 4 additions & 33 deletions include/acl_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -116,17 +116,6 @@ void acl_dump_mem(cl_mem mem);
#endif
/////////////////////

#define UNLOCK_RETURN(ret) \
do { \
acl_unlock(); \
return (ret); \
} while (0)
#define UNLOCK_RETURN_VOID \
do { \
acl_unlock(); \
return; \
} while (0)

// This macro is used to signal failure from a function via "errcode_ret"
// and return 0.
#define BAIL(STATUS) \
Expand All @@ -136,13 +125,6 @@ void acl_dump_mem(cl_mem mem);
} \
return 0; \
} while (0)
#define UNLOCK_BAIL(STATUS) \
do { \
if (errcode_ret) { \
*errcode_ret = (STATUS); \
} \
UNLOCK_RETURN(0); \
} while (0)

// This is used to callback for a context error, assuming C is an
// initialized context.
Expand All @@ -151,42 +133,31 @@ void acl_dump_mem(cl_mem mem);
acl_context_callback(C, STR); \
BAIL(STATUS); \
} while (0)
#define UNLOCK_BAIL_INFO(STATUS, C, STR) \
do { \
acl_context_callback(C, STR); \
UNLOCK_BAIL(STATUS); \
} while (0)

#define ERR_RET(STATUS, C, STR) \
do { \
acl_context_callback(C, STR); \
return STATUS; \
} while (0)
#define UNLOCK_ERR_RET(STATUS, C, STR) \
do { \
acl_context_callback(C, STR); \
UNLOCK_RETURN(STATUS); \
} while (0)

// Caller only partly specified the buffer?
// Caller isn't asking for any info at all?
#define UNLOCK_VALIDATE_ARRAY_OUT_ARGS(buf_size, buf, answer_size_out, \
context) \
#define VALIDATE_ARRAY_OUT_ARGS(buf_size, buf, answer_size_out, context) \
do { \
if (buf && buf_size <= 0) { \
acl_context_callback(context, \
#buf " is specified but " #buf_size " is zero"); \
UNLOCK_RETURN(CL_INVALID_VALUE); \
return CL_INVALID_VALUE; \
} \
if (buf == 0 && buf_size > 0) { \
acl_context_callback(context, #buf " is not specified but " #buf_size \
" is positive"); \
UNLOCK_RETURN(CL_INVALID_VALUE); \
return CL_INVALID_VALUE; \
} \
if (answer_size_out == 0 && buf == 0) { \
acl_context_callback(context, \
#buf " and " #answer_size_out " are both zero"); \
UNLOCK_RETURN(CL_INVALID_VALUE); \
return CL_INVALID_VALUE; \
} \
} while (0)

Expand Down
46 changes: 23 additions & 23 deletions src/acl_command.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,22 +38,22 @@
ACL_EXPORT
CL_API_ENTRY cl_int CL_API_CALL
clEnqueueBarrierIntelFPGA(cl_command_queue command_queue) {
acl_lock();
std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};

if (!acl_command_queue_is_valid(command_queue)) {
UNLOCK_RETURN(CL_INVALID_COMMAND_QUEUE);
return CL_INVALID_COMMAND_QUEUE;
}

// For in order queue, since every event is executed in sequence,
// there is an implicit barrier after each event.
// enqueue barrier does not need to do anything
if (!(command_queue->properties & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE)) {
UNLOCK_RETURN(CL_SUCCESS);
return CL_SUCCESS;
}
// OpenCL 1.2 spec: If event_wait_list is NULL, then this particular command
// waits until all previous enqueued commands to command_queue have completed.
cl_int status = clEnqueueBarrierWithWaitList(command_queue, 0, 0, NULL);
UNLOCK_RETURN(status);
return status;
}

ACL_EXPORT
Expand All @@ -66,18 +66,18 @@ ACL_EXPORT
CL_API_ENTRY cl_int CL_API_CALL
clEnqueueMarkerIntelFPGA(cl_command_queue command_queue, cl_event *event) {
cl_int result;
acl_lock();
std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};

if (!acl_command_queue_is_valid(command_queue)) {
UNLOCK_RETURN(CL_INVALID_COMMAND_QUEUE);
return CL_INVALID_COMMAND_QUEUE;
}

if (!event)
UNLOCK_RETURN(CL_INVALID_VALUE);
return CL_INVALID_VALUE;

result = acl_create_event(command_queue, 0, 0, CL_COMMAND_MARKER, event);

UNLOCK_RETURN(result);
return result;
}

ACL_EXPORT
Expand All @@ -91,13 +91,13 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueWaitForEventsIntelFPGA(
cl_command_queue command_queue, cl_uint num_event, const cl_event *events) {
cl_int result;

acl_lock();
std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};

if (!acl_command_queue_is_valid(command_queue)) {
UNLOCK_RETURN(CL_INVALID_COMMAND_QUEUE);
return CL_INVALID_COMMAND_QUEUE;
}
if (num_event == 0 || events == 0) {
UNLOCK_RETURN(CL_INVALID_VALUE);
return CL_INVALID_VALUE;
}
cl_event event = NULL;
result = acl_create_event(command_queue, num_event, events,
Expand All @@ -110,7 +110,7 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueWaitForEventsIntelFPGA(
result = CL_INVALID_EVENT;
}

UNLOCK_RETURN(result);
return result;
}

ACL_EXPORT
Expand All @@ -129,16 +129,16 @@ clWaitForEventsIntelFPGA(cl_uint num_events, const cl_event *event_list) {
cl_context context;
bool first_yield_to_hal = true;

acl_lock();
std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};

if (num_events == 0 || event_list == 0) {
UNLOCK_RETURN(CL_INVALID_VALUE);
return CL_INVALID_VALUE;
}

#ifndef REMOVE_VALID_CHECKS
result = acl_check_events(num_events, event_list);
if (result != CL_SUCCESS) {
UNLOCK_RETURN(CL_INVALID_EVENT);
return CL_INVALID_EVENT;
}
#endif

Expand Down Expand Up @@ -193,12 +193,12 @@ clWaitForEventsIntelFPGA(cl_uint num_events, const cl_event *event_list) {
cl_uint i = 0;
for (i = 0; i < num_events; ++i) {
if (event_list[i]->execution_status < 0)
UNLOCK_RETURN(CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST);
return CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST;
}
}
#endif

UNLOCK_RETURN(result);
return result;
}

ACL_EXPORT
Expand All @@ -214,10 +214,10 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueMarkerWithWaitListIntelFPGA(
cl_int result;
cl_event ret_event = NULL;

acl_lock();
std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};

if (!acl_command_queue_is_valid(command_queue)) {
UNLOCK_RETURN(CL_INVALID_COMMAND_QUEUE);
return CL_INVALID_COMMAND_QUEUE;
}

// Spec says:
Expand Down Expand Up @@ -248,7 +248,7 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueMarkerWithWaitListIntelFPGA(
if (ret_event)
clReleaseEvent(ret_event); // free the ret event if the caller doesn't want
// to return it
UNLOCK_RETURN(result);
return result;
}

ACL_EXPORT
Expand All @@ -265,12 +265,12 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueBarrierWithWaitListIntelFPGA(
const cl_event *event_wait_list, cl_event *event) {
cl_int result;
cl_event local_event;
acl_lock();
std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};

result = clEnqueueMarkerWithWaitList(command_queue, num_events_in_wait_list,
event_wait_list, &local_event);
if (result != CL_SUCCESS) {
UNLOCK_RETURN(result);
return result;
}

if (command_queue->properties & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) {
Expand All @@ -282,7 +282,7 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueBarrierWithWaitListIntelFPGA(
} else {
clReleaseEvent(local_event);
}
UNLOCK_RETURN(result);
return result;
}

ACL_EXPORT
Expand Down
Loading

0 comments on commit fa0934f

Please sign in to comment.