Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Multi-threading support refactor #152

Merged
merged 1 commit into from
Oct 6, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 36 additions & 4 deletions include/acl_thread.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@
#include <stdarg.h>
#include <stdio.h>

// System headers.
#include <mutex>
#include <thread>

#if defined(__cplusplus)
extern "C" {
#endif
Expand All @@ -23,10 +27,15 @@ extern "C" {
#define ACL_TLS __declspec(thread)
#endif

// Foward declaration
class acl_mutex_wrapper_t;

extern ACL_TLS int acl_global_lock_count;
extern ACL_TLS int acl_inside_sig_flag;
extern ACL_TLS int acl_inside_sig_old_lock_count;

extern acl_mutex_wrapper_t acl_mutex_wrapper;

// -- signal handler functions --
// When we enter a signal handler, we save "acl_global_lock_count" to
// "acl_inside_sig_old_lock_count" temporarily. This is because the signal
Expand Down Expand Up @@ -75,10 +84,6 @@ static inline void acl_sig_unblock_signals() {

// -- global lock functions --

void acl_lock();
void acl_unlock();
int acl_suspend_lock();
void acl_resume_lock(int lock_count);
void acl_wait_for_device_update(cl_context context);
void acl_signal_device_update();

Expand All @@ -105,4 +110,31 @@ void acl_yield_lock_and_thread();
} /* extern "C" */
#endif

// -- RAII wrapper classes --

// To follow RAII, provide a mutex class acl_mutex_wrapper_t which may be used
// with std::scoped_lock and std::unique_lock. Note that std::scoped_lock may
// only be constructed with a single instance of acl_mutex_wrapper_t since the
// latter only implements BasicLockable but not Lockable, due to a lack of
// try_lock() functionality in acl_threadsupport.
class acl_mutex_wrapper_t {
public:
void lock();
void unlock();
int suspend_lock();
void resume_lock(int lock_count);
};

class acl_suspend_lock_guard {
public:
explicit acl_suspend_lock_guard(acl_mutex_wrapper_t &mutex) : mutex(mutex) {
lock_count = mutex.suspend_lock();
};
~acl_suspend_lock_guard() { mutex.resume_lock(lock_count); }

private:
int lock_count;
acl_mutex_wrapper_t &mutex;
};

#endif // ACL_THREAD_H
37 changes: 4 additions & 33 deletions include/acl_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -116,17 +116,6 @@ void acl_dump_mem(cl_mem mem);
#endif
/////////////////////

#define UNLOCK_RETURN(ret) \
do { \
acl_unlock(); \
return (ret); \
} while (0)
#define UNLOCK_RETURN_VOID \
do { \
acl_unlock(); \
return; \
} while (0)

// This macro is used to signal failure from a function via "errcode_ret"
// and return 0.
#define BAIL(STATUS) \
Expand All @@ -136,13 +125,6 @@ void acl_dump_mem(cl_mem mem);
} \
return 0; \
} while (0)
#define UNLOCK_BAIL(STATUS) \
do { \
if (errcode_ret) { \
*errcode_ret = (STATUS); \
} \
UNLOCK_RETURN(0); \
} while (0)

// This is used to callback for a context error, assuming C is an
// initialized context.
Expand All @@ -151,42 +133,31 @@ void acl_dump_mem(cl_mem mem);
acl_context_callback(C, STR); \
BAIL(STATUS); \
} while (0)
#define UNLOCK_BAIL_INFO(STATUS, C, STR) \
do { \
acl_context_callback(C, STR); \
UNLOCK_BAIL(STATUS); \
} while (0)

#define ERR_RET(STATUS, C, STR) \
do { \
acl_context_callback(C, STR); \
return STATUS; \
} while (0)
#define UNLOCK_ERR_RET(STATUS, C, STR) \
do { \
acl_context_callback(C, STR); \
UNLOCK_RETURN(STATUS); \
} while (0)

// Caller only partly specified the buffer?
// Caller isn't asking for any info at all?
#define UNLOCK_VALIDATE_ARRAY_OUT_ARGS(buf_size, buf, answer_size_out, \
context) \
#define VALIDATE_ARRAY_OUT_ARGS(buf_size, buf, answer_size_out, context) \
do { \
if (buf && buf_size <= 0) { \
acl_context_callback(context, \
#buf " is specified but " #buf_size " is zero"); \
UNLOCK_RETURN(CL_INVALID_VALUE); \
return CL_INVALID_VALUE; \
} \
if (buf == 0 && buf_size > 0) { \
acl_context_callback(context, #buf " is not specified but " #buf_size \
" is positive"); \
UNLOCK_RETURN(CL_INVALID_VALUE); \
return CL_INVALID_VALUE; \
} \
if (answer_size_out == 0 && buf == 0) { \
acl_context_callback(context, \
#buf " and " #answer_size_out " are both zero"); \
UNLOCK_RETURN(CL_INVALID_VALUE); \
return CL_INVALID_VALUE; \
} \
} while (0)

Expand Down
46 changes: 23 additions & 23 deletions src/acl_command.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,22 +38,22 @@
ACL_EXPORT
CL_API_ENTRY cl_int CL_API_CALL
clEnqueueBarrierIntelFPGA(cl_command_queue command_queue) {
acl_lock();
std::scoped_lock lock{acl_mutex_wrapper};

if (!acl_command_queue_is_valid(command_queue)) {
UNLOCK_RETURN(CL_INVALID_COMMAND_QUEUE);
return CL_INVALID_COMMAND_QUEUE;
}

// For in order queue, since every event is executed in sequence,
// there is an implicit barrier after each event.
// enqueue barrier does not need to do anything
if (!(command_queue->properties & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE)) {
UNLOCK_RETURN(CL_SUCCESS);
return CL_SUCCESS;
}
// OpenCL 1.2 spec: If event_wait_list is NULL, then this particular command
// waits until all previous enqueued commands to command_queue have completed.
cl_int status = clEnqueueBarrierWithWaitList(command_queue, 0, 0, NULL);
UNLOCK_RETURN(status);
return status;
}

ACL_EXPORT
Expand All @@ -66,18 +66,18 @@ ACL_EXPORT
CL_API_ENTRY cl_int CL_API_CALL
clEnqueueMarkerIntelFPGA(cl_command_queue command_queue, cl_event *event) {
cl_int result;
acl_lock();
std::scoped_lock lock{acl_mutex_wrapper};

if (!acl_command_queue_is_valid(command_queue)) {
UNLOCK_RETURN(CL_INVALID_COMMAND_QUEUE);
return CL_INVALID_COMMAND_QUEUE;
}

if (!event)
UNLOCK_RETURN(CL_INVALID_VALUE);
return CL_INVALID_VALUE;

result = acl_create_event(command_queue, 0, 0, CL_COMMAND_MARKER, event);

UNLOCK_RETURN(result);
return result;
}

ACL_EXPORT
Expand All @@ -91,13 +91,13 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueWaitForEventsIntelFPGA(
cl_command_queue command_queue, cl_uint num_event, const cl_event *events) {
cl_int result;

acl_lock();
std::scoped_lock lock{acl_mutex_wrapper};

if (!acl_command_queue_is_valid(command_queue)) {
UNLOCK_RETURN(CL_INVALID_COMMAND_QUEUE);
return CL_INVALID_COMMAND_QUEUE;
}
if (num_event == 0 || events == 0) {
UNLOCK_RETURN(CL_INVALID_VALUE);
return CL_INVALID_VALUE;
}
cl_event event = NULL;
result = acl_create_event(command_queue, num_event, events,
Expand All @@ -110,7 +110,7 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueWaitForEventsIntelFPGA(
result = CL_INVALID_EVENT;
}

UNLOCK_RETURN(result);
return result;
}

ACL_EXPORT
Expand All @@ -129,16 +129,16 @@ clWaitForEventsIntelFPGA(cl_uint num_events, const cl_event *event_list) {
cl_context context;
bool first_yield_to_hal = true;

acl_lock();
std::scoped_lock lock{acl_mutex_wrapper};

if (num_events == 0 || event_list == 0) {
UNLOCK_RETURN(CL_INVALID_VALUE);
return CL_INVALID_VALUE;
}

#ifndef REMOVE_VALID_CHECKS
result = acl_check_events(num_events, event_list);
if (result != CL_SUCCESS) {
UNLOCK_RETURN(CL_INVALID_EVENT);
return CL_INVALID_EVENT;
}
#endif

Expand Down Expand Up @@ -193,12 +193,12 @@ clWaitForEventsIntelFPGA(cl_uint num_events, const cl_event *event_list) {
cl_uint i = 0;
for (i = 0; i < num_events; ++i) {
if (event_list[i]->execution_status < 0)
UNLOCK_RETURN(CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST);
return CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST;
}
}
#endif

UNLOCK_RETURN(result);
return result;
}

ACL_EXPORT
Expand All @@ -214,10 +214,10 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueMarkerWithWaitListIntelFPGA(
cl_int result;
cl_event ret_event = NULL;

acl_lock();
std::scoped_lock lock{acl_mutex_wrapper};

if (!acl_command_queue_is_valid(command_queue)) {
UNLOCK_RETURN(CL_INVALID_COMMAND_QUEUE);
return CL_INVALID_COMMAND_QUEUE;
}

// Spec says:
Expand Down Expand Up @@ -248,7 +248,7 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueMarkerWithWaitListIntelFPGA(
if (ret_event)
clReleaseEvent(ret_event); // free the ret event if the caller doesn't want
// to return it
UNLOCK_RETURN(result);
return result;
}

ACL_EXPORT
Expand All @@ -265,12 +265,12 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueBarrierWithWaitListIntelFPGA(
const cl_event *event_wait_list, cl_event *event) {
cl_int result;
cl_event local_event;
acl_lock();
std::scoped_lock lock{acl_mutex_wrapper};

result = clEnqueueMarkerWithWaitList(command_queue, num_events_in_wait_list,
event_wait_list, &local_event);
if (result != CL_SUCCESS) {
UNLOCK_RETURN(result);
return result;
}

if (command_queue->properties & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) {
Expand All @@ -282,7 +282,7 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueBarrierWithWaitListIntelFPGA(
} else {
clReleaseEvent(local_event);
}
UNLOCK_RETURN(result);
return result;
}

ACL_EXPORT
Expand Down
Loading