diff --git a/include/acl_thread.h b/include/acl_thread.h index 753adf47..64a3f8b3 100644 --- a/include/acl_thread.h +++ b/include/acl_thread.h @@ -12,6 +12,10 @@ #include #include +// System headers. +#include +#include + #if defined(__cplusplus) extern "C" { #endif @@ -23,10 +27,15 @@ extern "C" { #define ACL_TLS __declspec(thread) #endif +// Foward declaration +class acl_mutex_wrapper_t; + extern ACL_TLS int acl_global_lock_count; extern ACL_TLS int acl_inside_sig_flag; extern ACL_TLS int acl_inside_sig_old_lock_count; +extern acl_mutex_wrapper_t acl_mutex_wrapper; + // -- signal handler functions -- // When we enter a signal handler, we save "acl_global_lock_count" to // "acl_inside_sig_old_lock_count" temporarily. This is because the signal @@ -75,10 +84,6 @@ static inline void acl_sig_unblock_signals() { // -- global lock functions -- -void acl_lock(); -void acl_unlock(); -int acl_suspend_lock(); -void acl_resume_lock(int lock_count); void acl_wait_for_device_update(cl_context context); void acl_signal_device_update(); @@ -105,4 +110,31 @@ void acl_yield_lock_and_thread(); } /* extern "C" */ #endif +// -- RAII wrapper classes -- + +// To follow RAII, provide a mutex class acl_mutex_wrapper_t which may be used +// with std::scoped_lock and std::unique_lock. Note that std::scoped_lock may +// only be constructed with a single instance of acl_mutex_wrapper_t since the +// latter only implements BasicLockable but not Lockable, due to a lack of +// try_lock() functionality in acl_threadsupport. +class acl_mutex_wrapper_t { +public: + void lock(); + void unlock(); + int suspend_lock(); + void resume_lock(int lock_count); +}; + +class acl_suspend_lock_guard { +public: + explicit acl_suspend_lock_guard(acl_mutex_wrapper_t &mutex) : mutex(mutex) { + lock_count = mutex.suspend_lock(); + }; + ~acl_suspend_lock_guard() { mutex.resume_lock(lock_count); } + +private: + int lock_count; + acl_mutex_wrapper_t &mutex; +}; + #endif // ACL_THREAD_H diff --git a/include/acl_util.h b/include/acl_util.h index 950716e1..d6af3a9c 100644 --- a/include/acl_util.h +++ b/include/acl_util.h @@ -116,17 +116,6 @@ void acl_dump_mem(cl_mem mem); #endif ///////////////////// -#define UNLOCK_RETURN(ret) \ - do { \ - acl_unlock(); \ - return (ret); \ - } while (0) -#define UNLOCK_RETURN_VOID \ - do { \ - acl_unlock(); \ - return; \ - } while (0) - // This macro is used to signal failure from a function via "errcode_ret" // and return 0. #define BAIL(STATUS) \ @@ -136,13 +125,6 @@ void acl_dump_mem(cl_mem mem); } \ return 0; \ } while (0) -#define UNLOCK_BAIL(STATUS) \ - do { \ - if (errcode_ret) { \ - *errcode_ret = (STATUS); \ - } \ - UNLOCK_RETURN(0); \ - } while (0) // This is used to callback for a context error, assuming C is an // initialized context. @@ -151,42 +133,31 @@ void acl_dump_mem(cl_mem mem); acl_context_callback(C, STR); \ BAIL(STATUS); \ } while (0) -#define UNLOCK_BAIL_INFO(STATUS, C, STR) \ - do { \ - acl_context_callback(C, STR); \ - UNLOCK_BAIL(STATUS); \ - } while (0) #define ERR_RET(STATUS, C, STR) \ do { \ acl_context_callback(C, STR); \ return STATUS; \ } while (0) -#define UNLOCK_ERR_RET(STATUS, C, STR) \ - do { \ - acl_context_callback(C, STR); \ - UNLOCK_RETURN(STATUS); \ - } while (0) // Caller only partly specified the buffer? // Caller isn't asking for any info at all? -#define UNLOCK_VALIDATE_ARRAY_OUT_ARGS(buf_size, buf, answer_size_out, \ - context) \ +#define VALIDATE_ARRAY_OUT_ARGS(buf_size, buf, answer_size_out, context) \ do { \ if (buf && buf_size <= 0) { \ acl_context_callback(context, \ #buf " is specified but " #buf_size " is zero"); \ - UNLOCK_RETURN(CL_INVALID_VALUE); \ + return CL_INVALID_VALUE; \ } \ if (buf == 0 && buf_size > 0) { \ acl_context_callback(context, #buf " is not specified but " #buf_size \ " is positive"); \ - UNLOCK_RETURN(CL_INVALID_VALUE); \ + return CL_INVALID_VALUE; \ } \ if (answer_size_out == 0 && buf == 0) { \ acl_context_callback(context, \ #buf " and " #answer_size_out " are both zero"); \ - UNLOCK_RETURN(CL_INVALID_VALUE); \ + return CL_INVALID_VALUE; \ } \ } while (0) diff --git a/src/acl_command.cpp b/src/acl_command.cpp index 8abf3575..6e71fc45 100644 --- a/src/acl_command.cpp +++ b/src/acl_command.cpp @@ -38,22 +38,22 @@ ACL_EXPORT CL_API_ENTRY cl_int CL_API_CALL clEnqueueBarrierIntelFPGA(cl_command_queue command_queue) { - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; if (!acl_command_queue_is_valid(command_queue)) { - UNLOCK_RETURN(CL_INVALID_COMMAND_QUEUE); + return CL_INVALID_COMMAND_QUEUE; } // For in order queue, since every event is executed in sequence, // there is an implicit barrier after each event. // enqueue barrier does not need to do anything if (!(command_queue->properties & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE)) { - UNLOCK_RETURN(CL_SUCCESS); + return CL_SUCCESS; } // OpenCL 1.2 spec: If event_wait_list is NULL, then this particular command // waits until all previous enqueued commands to command_queue have completed. cl_int status = clEnqueueBarrierWithWaitList(command_queue, 0, 0, NULL); - UNLOCK_RETURN(status); + return status; } ACL_EXPORT @@ -66,18 +66,18 @@ ACL_EXPORT CL_API_ENTRY cl_int CL_API_CALL clEnqueueMarkerIntelFPGA(cl_command_queue command_queue, cl_event *event) { cl_int result; - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; if (!acl_command_queue_is_valid(command_queue)) { - UNLOCK_RETURN(CL_INVALID_COMMAND_QUEUE); + return CL_INVALID_COMMAND_QUEUE; } if (!event) - UNLOCK_RETURN(CL_INVALID_VALUE); + return CL_INVALID_VALUE; result = acl_create_event(command_queue, 0, 0, CL_COMMAND_MARKER, event); - UNLOCK_RETURN(result); + return result; } ACL_EXPORT @@ -91,13 +91,13 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueWaitForEventsIntelFPGA( cl_command_queue command_queue, cl_uint num_event, const cl_event *events) { cl_int result; - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; if (!acl_command_queue_is_valid(command_queue)) { - UNLOCK_RETURN(CL_INVALID_COMMAND_QUEUE); + return CL_INVALID_COMMAND_QUEUE; } if (num_event == 0 || events == 0) { - UNLOCK_RETURN(CL_INVALID_VALUE); + return CL_INVALID_VALUE; } cl_event event = NULL; result = acl_create_event(command_queue, num_event, events, @@ -110,7 +110,7 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueWaitForEventsIntelFPGA( result = CL_INVALID_EVENT; } - UNLOCK_RETURN(result); + return result; } ACL_EXPORT @@ -129,16 +129,16 @@ clWaitForEventsIntelFPGA(cl_uint num_events, const cl_event *event_list) { cl_context context; bool first_yield_to_hal = true; - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; if (num_events == 0 || event_list == 0) { - UNLOCK_RETURN(CL_INVALID_VALUE); + return CL_INVALID_VALUE; } #ifndef REMOVE_VALID_CHECKS result = acl_check_events(num_events, event_list); if (result != CL_SUCCESS) { - UNLOCK_RETURN(CL_INVALID_EVENT); + return CL_INVALID_EVENT; } #endif @@ -193,12 +193,12 @@ clWaitForEventsIntelFPGA(cl_uint num_events, const cl_event *event_list) { cl_uint i = 0; for (i = 0; i < num_events; ++i) { if (event_list[i]->execution_status < 0) - UNLOCK_RETURN(CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST); + return CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST; } } #endif - UNLOCK_RETURN(result); + return result; } ACL_EXPORT @@ -214,10 +214,10 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueMarkerWithWaitListIntelFPGA( cl_int result; cl_event ret_event = NULL; - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; if (!acl_command_queue_is_valid(command_queue)) { - UNLOCK_RETURN(CL_INVALID_COMMAND_QUEUE); + return CL_INVALID_COMMAND_QUEUE; } // Spec says: @@ -248,7 +248,7 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueMarkerWithWaitListIntelFPGA( if (ret_event) clReleaseEvent(ret_event); // free the ret event if the caller doesn't want // to return it - UNLOCK_RETURN(result); + return result; } ACL_EXPORT @@ -265,12 +265,12 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueBarrierWithWaitListIntelFPGA( const cl_event *event_wait_list, cl_event *event) { cl_int result; cl_event local_event; - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; result = clEnqueueMarkerWithWaitList(command_queue, num_events_in_wait_list, event_wait_list, &local_event); if (result != CL_SUCCESS) { - UNLOCK_RETURN(result); + return result; } if (command_queue->properties & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) { @@ -282,7 +282,7 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueBarrierWithWaitListIntelFPGA( } else { clReleaseEvent(local_event); } - UNLOCK_RETURN(result); + return result; } ACL_EXPORT diff --git a/src/acl_command_queue.cpp b/src/acl_command_queue.cpp index e51b73f6..ed4e9774 100644 --- a/src/acl_command_queue.cpp +++ b/src/acl_command_queue.cpp @@ -114,17 +114,17 @@ clCreateCommandQueueWithPropertiesIntelFPGA( cl_command_queue result = 0; cl_command_queue_properties cq_properties = 0; cl_uint q_size_properties = 0, idx = 0; - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; if (!acl_context_is_valid(context)) { - UNLOCK_BAIL(CL_INVALID_CONTEXT); + BAIL(CL_INVALID_CONTEXT); } if (!acl_device_is_valid(device)) { - UNLOCK_BAIL_INFO(CL_INVALID_DEVICE, context, "Invalid device"); + BAIL_INFO(CL_INVALID_DEVICE, context, "Invalid device"); } if (!acl_context_uses_device(context, device)) { - UNLOCK_BAIL_INFO(CL_INVALID_DEVICE, context, - "Device is not associated with the context"); + BAIL_INFO(CL_INVALID_DEVICE, context, + "Device is not associated with the context"); } // Get the properties. Only two possible properties: CL_QUEUE_PROPERTIES and @@ -138,9 +138,9 @@ clCreateCommandQueueWithPropertiesIntelFPGA( if (q_size_properties == 0) q_size_properties = (cl_uint)properties[idx + 1]; else // This property was already given. - UNLOCK_BAIL_INFO(CL_INVALID_VALUE, context, "Invalid queue properties"); + BAIL_INFO(CL_INVALID_VALUE, context, "Invalid queue properties"); } else { - UNLOCK_BAIL_INFO(CL_INVALID_VALUE, context, "Invalid queue properties"); + BAIL_INFO(CL_INVALID_VALUE, context, "Invalid queue properties"); } idx += 2; } @@ -152,14 +152,14 @@ clCreateCommandQueueWithPropertiesIntelFPGA( CL_QUEUE_PROFILING_ENABLE | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_ON_DEVICE | CL_QUEUE_ON_DEVICE_DEFAULT; if (cq_properties & ~(valid_properties)) { - UNLOCK_BAIL_INFO(CL_INVALID_VALUE, context, "Invalid queue properties"); + BAIL_INFO(CL_INVALID_VALUE, context, "Invalid queue properties"); } // Also check the dependency of options: if (((cq_properties & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) == 0 && (cq_properties & CL_QUEUE_ON_DEVICE)) || ((cq_properties & CL_QUEUE_ON_DEVICE) == 0 && (cq_properties & CL_QUEUE_ON_DEVICE_DEFAULT))) { - UNLOCK_BAIL_INFO(CL_INVALID_VALUE, context, "Invalid queue properties"); + BAIL_INFO(CL_INVALID_VALUE, context, "Invalid queue properties"); } } { @@ -169,13 +169,11 @@ clCreateCommandQueueWithPropertiesIntelFPGA( // queried from current version of clGetDeviceInfo. So manually failing on // those properties for now. if (cq_properties & (CL_QUEUE_ON_DEVICE | CL_QUEUE_ON_DEVICE_DEFAULT)) - UNLOCK_BAIL_INFO( - CL_INVALID_QUEUE_PROPERTIES, context, - "Device does not support the specified queue properties"); + BAIL_INFO(CL_INVALID_QUEUE_PROPERTIES, context, + "Device does not support the specified queue properties"); if (q_size_properties != 0) { // not supported yet. - UNLOCK_BAIL_INFO( - CL_INVALID_QUEUE_PROPERTIES, context, - "Device does not support the specified queue properties"); + BAIL_INFO(CL_INVALID_QUEUE_PROPERTIES, context, + "Device does not support the specified queue properties"); } // Internal user may want to turn off support for OOO Queues @@ -183,9 +181,8 @@ clCreateCommandQueueWithPropertiesIntelFPGA( acl_getenv("CL_CONTEXT_DISABLE_OOO_QUEUES_INTELFPGA"); if (disable_oooq && (cq_properties & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE)) { - UNLOCK_BAIL_INFO( - CL_INVALID_QUEUE_PROPERTIES, context, - "Device does not support the specified queue properties"); + BAIL_INFO(CL_INVALID_QUEUE_PROPERTIES, context, + "Device does not support the specified queue properties"); } // What does the device support? @@ -195,24 +192,23 @@ clCreateCommandQueueWithPropertiesIntelFPGA( clGetDeviceInfo(device, CL_DEVICE_QUEUE_PROPERTIES, sizeof(device_props), &device_props, 0); if (cq_properties & ~(device_props)) { - UNLOCK_BAIL_INFO( - CL_INVALID_QUEUE_PROPERTIES, context, - "Device does not support the specified queue properties"); + BAIL_INFO(CL_INVALID_QUEUE_PROPERTIES, context, + "Device does not support the specified queue properties"); } } // Now actually allocate the command queue. result = acl_alloc_cl_command_queue(); if (result == 0) { - UNLOCK_BAIL_INFO(CL_OUT_OF_HOST_MEMORY, context, - "Could not allocate a command queue"); + BAIL_INFO(CL_OUT_OF_HOST_MEMORY, context, + "Could not allocate a command queue"); } // Fail to double the capacity of the pointer array if (!l_init_queue(result, cq_properties, context, device)) { acl_free_cl_command_queue(result); - UNLOCK_BAIL_INFO(CL_OUT_OF_HOST_MEMORY, context, - "Could not allocate a command queue"); + BAIL_INFO(CL_OUT_OF_HOST_MEMORY, context, + "Could not allocate a command queue"); } if (errcode_ret) { @@ -220,7 +216,7 @@ clCreateCommandQueueWithPropertiesIntelFPGA( } acl_track_object(ACL_OBJ_COMMAND_QUEUE, result); - UNLOCK_RETURN(result); + return result; } ACL_EXPORT @@ -255,13 +251,13 @@ CL_API_ENTRY cl_command_queue CL_API_CALL clCreateCommandQueue( ACL_EXPORT CL_API_ENTRY cl_int CL_API_CALL clRetainCommandQueueIntelFPGA(cl_command_queue command_queue) { - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; if (!acl_command_queue_is_valid(command_queue)) { - UNLOCK_RETURN(CL_INVALID_COMMAND_QUEUE); + return CL_INVALID_COMMAND_QUEUE; } acl_retain(command_queue); - UNLOCK_RETURN(CL_SUCCESS); + return CL_SUCCESS; } ACL_EXPORT @@ -273,10 +269,10 @@ clRetainCommandQueue(cl_command_queue command_queue) { ACL_EXPORT CL_API_ENTRY cl_int CL_API_CALL clReleaseCommandQueueIntelFPGA(cl_command_queue command_queue) { - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; if (!acl_command_queue_is_valid(command_queue)) { - UNLOCK_RETURN(CL_INVALID_COMMAND_QUEUE); + return CL_INVALID_COMMAND_QUEUE; } acl_release(command_queue); @@ -288,7 +284,7 @@ clReleaseCommandQueueIntelFPGA(cl_command_queue command_queue) { acl_delete_command_queue(command_queue); } - UNLOCK_RETURN(CL_SUCCESS); + return CL_SUCCESS; } ACL_EXPORT @@ -302,10 +298,10 @@ CL_API_ENTRY cl_int CL_API_CALL clGetCommandQueueInfoIntelFPGA( cl_command_queue command_queue, cl_command_queue_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret) { acl_result_t result; - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; if (!acl_command_queue_is_valid(command_queue)) { - UNLOCK_RETURN(CL_INVALID_COMMAND_QUEUE); + return CL_INVALID_COMMAND_QUEUE; } RESULT_INIT; @@ -328,14 +324,14 @@ CL_API_ENTRY cl_int CL_API_CALL clGetCommandQueueInfoIntelFPGA( } if (result.size == 0) { - UNLOCK_ERR_RET(CL_INVALID_VALUE, command_queue->context, - "Invalid or unsupported command queue property"); + ERR_RET(CL_INVALID_VALUE, command_queue->context, + "Invalid or unsupported command queue property"); } if (param_value) { if (param_value_size < result.size) { - UNLOCK_ERR_RET(CL_INVALID_VALUE, command_queue->context, - "Parameter return buffer is too small"); + ERR_RET(CL_INVALID_VALUE, command_queue->context, + "Parameter return buffer is too small"); } RESULT_COPY(param_value, param_value_size); } @@ -343,7 +339,7 @@ CL_API_ENTRY cl_int CL_API_CALL clGetCommandQueueInfoIntelFPGA( if (param_value_size_ret) { *param_value_size_ret = result.size; } - UNLOCK_RETURN(CL_SUCCESS); + return CL_SUCCESS; } ACL_EXPORT @@ -360,23 +356,23 @@ CL_API_ENTRY cl_int CL_API_CALL clSetCommandQueuePropertyIntelFPGA( cl_command_queue command_queue, cl_command_queue_properties properties, cl_bool enable, cl_command_queue_properties *old_properties) { cl_command_queue_properties bad_properties; - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; bad_properties = ~((cl_command_queue_properties)CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | (cl_command_queue_properties)CL_QUEUE_PROFILING_ENABLE); if (!acl_command_queue_is_valid(command_queue)) { - UNLOCK_RETURN(CL_INVALID_COMMAND_QUEUE); + return CL_INVALID_COMMAND_QUEUE; } // Internal user may want to turn off support for OOO Queues const char *disable_oooq = acl_getenv("CL_CONTEXT_DISABLE_OOO_QUEUES_INTELFPGA"); if (disable_oooq && (properties & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE)) { - UNLOCK_ERR_RET(CL_INVALID_QUEUE_PROPERTIES, command_queue->context, - "Can't set CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE property, " - "unsupported"); + ERR_RET(CL_INVALID_QUEUE_PROPERTIES, command_queue->context, + "Can't set CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE property, " + "unsupported"); } if (old_properties) { @@ -384,8 +380,8 @@ CL_API_ENTRY cl_int CL_API_CALL clSetCommandQueuePropertyIntelFPGA( } if (properties & bad_properties) { - UNLOCK_ERR_RET(CL_INVALID_VALUE, command_queue->context, - "Invalid or unsupported command queue property"); + ERR_RET(CL_INVALID_VALUE, command_queue->context, + "Invalid or unsupported command queue property"); } if (enable) { @@ -397,7 +393,7 @@ CL_API_ENTRY cl_int CL_API_CALL clSetCommandQueuePropertyIntelFPGA( // No queue synchronization is required because we don't support // out-of-order execution. - UNLOCK_RETURN(CL_SUCCESS); + return CL_SUCCESS; } ACL_EXPORT @@ -415,10 +411,10 @@ CL_API_ENTRY cl_int CL_API_CALL clFlushIntelFPGA(cl_command_queue command_queue) { bool any_queued = false; const acl_hal_t *hal = acl_get_hal(); - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; if (!acl_command_queue_is_valid(command_queue)) { - UNLOCK_RETURN(CL_INVALID_COMMAND_QUEUE); + return CL_INVALID_COMMAND_QUEUE; } // Context is valid too. Force a schedule update. @@ -428,7 +424,7 @@ clFlushIntelFPGA(cl_command_queue command_queue) { any_queued = 0; acl_idle_update(context); if (command_queue->num_commands == 0) { - UNLOCK_RETURN(CL_SUCCESS); + return CL_SUCCESS; } // Find if at least one event is not SUBMITTED @@ -454,7 +450,7 @@ clFlushIntelFPGA(cl_command_queue command_queue) { } while (any_queued); - UNLOCK_RETURN(CL_SUCCESS); + return CL_SUCCESS; } ACL_EXPORT @@ -467,10 +463,10 @@ CL_API_ENTRY cl_int CL_API_CALL clFinishIntelFPGA(cl_command_queue command_queue) { cl_event event = 0; cl_int result; - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; if (!acl_command_queue_is_valid(command_queue)) { - UNLOCK_RETURN(CL_INVALID_COMMAND_QUEUE); + return CL_INVALID_COMMAND_QUEUE; } // Spec says: @@ -483,7 +479,7 @@ clFinishIntelFPGA(cl_command_queue command_queue) { result = clWaitForEvents(1, &event); clReleaseEvent(event); } - UNLOCK_RETURN(result); + return result; } ACL_EXPORT diff --git a/src/acl_context.cpp b/src/acl_context.cpp index 405a8b2f..4e1001d3 100644 --- a/src/acl_context.cpp +++ b/src/acl_context.cpp @@ -83,24 +83,24 @@ CL_API_ENTRY cl_context CL_API_CALL clCreateContextIntelFPGA( cl_int *errcode_ret) { cl_context context; cl_int status; - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; context = l_create_context(properties, pfn_notify, user_data, &status); if (context == NULL || status != CL_SUCCESS) { acl_free_cl_context(context); - UNLOCK_BAIL(status); + BAIL(status); } // Now check the devices. if (num_devices == 0) { acl_context_callback(context, "No devices specified"); acl_free_cl_context(context); - UNLOCK_BAIL(CL_INVALID_VALUE); + BAIL(CL_INVALID_VALUE); } if (devices == 0) { acl_context_callback(context, "No device array specified"); acl_free_cl_context(context); - UNLOCK_BAIL(CL_INVALID_VALUE); + BAIL(CL_INVALID_VALUE); } // Make sure all mentioned devices are valid. @@ -108,7 +108,7 @@ CL_API_ENTRY cl_context CL_API_CALL clCreateContextIntelFPGA( if (!acl_device_is_valid_ptr(devices[i])) { acl_context_callback(context, "Invalid device specified"); acl_free_cl_context(context); - UNLOCK_BAIL(CL_INVALID_DEVICE); + BAIL(CL_INVALID_DEVICE); } if (devices[i]->opened_count) { @@ -118,7 +118,7 @@ CL_API_ENTRY cl_context CL_API_CALL clCreateContextIntelFPGA( "device in the device list is currently in use in another " "context created with reprogramming disabled."); acl_free_cl_context(context); - UNLOCK_BAIL(CL_INVALID_VALUE); + BAIL(CL_INVALID_VALUE); } else if (!context->uses_dynamic_sysdef && devices[i]->mode_lock == BINARY) { acl_context_callback( @@ -126,7 +126,7 @@ CL_API_ENTRY cl_context CL_API_CALL clCreateContextIntelFPGA( "device in the device list is currently in use in another " "context created with reprogramming enabled."); acl_free_cl_context(context); - UNLOCK_BAIL(CL_INVALID_VALUE); + BAIL(CL_INVALID_VALUE); } } else { // Since this is the first time creating a context for this device, we @@ -142,7 +142,7 @@ CL_API_ENTRY cl_context CL_API_CALL clCreateContextIntelFPGA( status = l_finalize_context(context, num_devices, devices); if (status != CL_SUCCESS) { - UNLOCK_BAIL(status); + BAIL(status); } // Open the profiler output file after the first context creation @@ -153,7 +153,7 @@ CL_API_ENTRY cl_context CL_API_CALL clCreateContextIntelFPGA( } // the context is created successfully, add it to the set acl_platform.contexts_set.insert(context); - UNLOCK_RETURN(context); + return context; } ACL_EXPORT @@ -172,12 +172,12 @@ CL_API_ENTRY cl_context CL_API_CALL clCreateContextFromTypeIntelFPGA( cl_uint num_devices = 0; cl_int status; cl_device_id devices[ACL_MAX_DEVICE]; - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; context = l_create_context(properties, pfn_notify, user_data, &status); if (context == NULL || status != CL_SUCCESS) { acl_free_cl_context(context); - UNLOCK_BAIL(status); + BAIL(status); } // Determine device IDs. @@ -186,7 +186,7 @@ CL_API_ENTRY cl_context CL_API_CALL clCreateContextFromTypeIntelFPGA( if (status != CL_SUCCESS || num_devices == 0) { acl_context_callback(context, "Device not found"); acl_free_cl_context(context); - UNLOCK_BAIL(CL_DEVICE_NOT_FOUND); + BAIL(CL_DEVICE_NOT_FOUND); } // Filter out devices. @@ -220,21 +220,21 @@ CL_API_ENTRY cl_context CL_API_CALL clCreateContextFromTypeIntelFPGA( "devices of the given device type are currently in use in " "other contexts created with reprogramming disabled."); acl_free_cl_context(context); - UNLOCK_BAIL(CL_DEVICE_NOT_AVAILABLE); + BAIL(CL_DEVICE_NOT_AVAILABLE); } else { acl_context_callback( context, "Could not create context with reprogramming disabled. All " "devices of the given device type are currently in use in " "other contexts created with reprogramming enabled."); acl_free_cl_context(context); - UNLOCK_BAIL(CL_DEVICE_NOT_AVAILABLE); + BAIL(CL_DEVICE_NOT_AVAILABLE); } } status = l_finalize_context(context, num_devices, devices); if (status != CL_SUCCESS) { - UNLOCK_BAIL(status); + BAIL(status); } // Open the profiler output file after the first context creation @@ -243,7 +243,7 @@ CL_API_ENTRY cl_context CL_API_CALL clCreateContextFromTypeIntelFPGA( if (errcode_ret) { *errcode_ret = CL_SUCCESS; } - UNLOCK_RETURN(context); + return context; } ACL_EXPORT @@ -256,7 +256,7 @@ CL_API_ENTRY cl_context CL_API_CALL clCreateContextFromType( ACL_EXPORT CL_API_ENTRY cl_int CL_API_CALL clRetainContextIntelFPGA(cl_context context) { - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; // Note: Context creation uses acl_retain<> directly, but users must use // clRetainContext. @@ -264,10 +264,10 @@ CL_API_ENTRY cl_int CL_API_CALL clRetainContextIntelFPGA(cl_context context) { // That's why we use acl_context_is_valid() here instead of just // acl_is_valid_ptr(). if (!acl_context_is_valid(context)) { - UNLOCK_RETURN(CL_INVALID_CONTEXT); + return CL_INVALID_CONTEXT; } acl_retain(context); - UNLOCK_RETURN(CL_SUCCESS); + return CL_SUCCESS; } ACL_EXPORT @@ -277,11 +277,11 @@ CL_API_ENTRY cl_int CL_API_CALL clRetainContext(cl_context context) { ACL_EXPORT CL_API_ENTRY cl_int CL_API_CALL clReleaseContextIntelFPGA(cl_context context) { - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; // Error out if the reference count is already 0 if (!acl_context_is_valid(context)) { - UNLOCK_RETURN(CL_INVALID_CONTEXT); + return CL_INVALID_CONTEXT; } // Must mirror what is retained in clRetainContext. @@ -303,7 +303,7 @@ CL_API_ENTRY cl_int CL_API_CALL clReleaseContextIntelFPGA(cl_context context) { // recursively trying to delete them again. if (context->is_being_freed) { acl_release(context); - UNLOCK_RETURN(CL_SUCCESS); + return CL_SUCCESS; } context->is_being_freed = 1; @@ -358,7 +358,7 @@ CL_API_ENTRY cl_int CL_API_CALL clReleaseContextIntelFPGA(cl_context context) { acl_close_profiler_file(); } - UNLOCK_RETURN(CL_SUCCESS); + return CL_SUCCESS; } ACL_EXPORT @@ -371,13 +371,13 @@ CL_API_ENTRY cl_int CL_API_CALL clGetContextInfoIntelFPGA( cl_context context, cl_context_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret) { acl_result_t result; - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; if (!acl_context_is_valid(context)) { - UNLOCK_RETURN(CL_INVALID_CONTEXT); + return CL_INVALID_CONTEXT; } - UNLOCK_VALIDATE_ARRAY_OUT_ARGS(param_value_size, param_value, - param_value_size_ret, context); + VALIDATE_ARRAY_OUT_ARGS(param_value_size, param_value, param_value_size_ret, + context); RESULT_INIT; @@ -403,14 +403,14 @@ CL_API_ENTRY cl_int CL_API_CALL clGetContextInfoIntelFPGA( context->num_property_entries * sizeof(cl_context_properties)); break; default: - UNLOCK_ERR_RET(CL_INVALID_VALUE, context, - "Invalid or unsupported context info query"); + ERR_RET(CL_INVALID_VALUE, context, + "Invalid or unsupported context info query"); } if (param_value) { if (param_value_size < result.size) { - UNLOCK_ERR_RET(CL_INVALID_VALUE, context, - "Parameter return buffer is too small"); + ERR_RET(CL_INVALID_VALUE, context, + "Parameter return buffer is too small"); } RESULT_COPY(param_value, param_value_size); } @@ -418,7 +418,7 @@ CL_API_ENTRY cl_int CL_API_CALL clGetContextInfoIntelFPGA( if (param_value_size_ret) { *param_value_size_ret = result.size; } - UNLOCK_RETURN(CL_SUCCESS); + return CL_SUCCESS; } ACL_EXPORT @@ -464,10 +464,10 @@ static cl_context l_create_context(const cl_context_properties *properties, cl_context context = 0; cl_int status; - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; if (user_data && !pfn_notify) { - UNLOCK_BAIL(CL_INVALID_VALUE); + BAIL(CL_INVALID_VALUE); } { @@ -478,12 +478,13 @@ static cl_context l_create_context(const cl_context_properties *properties, if (!allow_mp && platform_owner_pid != 0 && platform_owner_pid != acl_get_pid()) { if (pfn_notify) { - int lock_count = acl_suspend_lock(); - (pfn_notify)("Cannot create contexts in more than one process", 0, 0, - user_data); - acl_resume_lock(lock_count); + { + acl_suspend_lock_guard l(acl_mutex_wrapper); + (pfn_notify)("Cannot create contexts in more than one process", 0, 0, + user_data); + } } - UNLOCK_BAIL(CL_OUT_OF_RESOURCES); + BAIL(CL_OUT_OF_RESOURCES); } } @@ -491,11 +492,12 @@ static cl_context l_create_context(const cl_context_properties *properties, context = acl_alloc_cl_context(); if (context == 0) { if (pfn_notify) { - int lock_count = acl_suspend_lock(); - (pfn_notify)("Could not allocate a context object", 0, 0, user_data); - acl_resume_lock(lock_count); + { + acl_suspend_lock_guard l(acl_mutex_wrapper); + (pfn_notify)("Could not allocate a context object", 0, 0, user_data); + } } - UNLOCK_BAIL(CL_OUT_OF_HOST_MEMORY); + BAIL(CL_OUT_OF_HOST_MEMORY); } context->notify_fn = pfn_notify; @@ -505,26 +507,26 @@ static cl_context l_create_context(const cl_context_properties *properties, status = l_load_properties(context, properties); if (status != CL_SUCCESS) { acl_free_cl_context(context); - UNLOCK_BAIL(status); + BAIL(status); } // already called context error callback if (errcode_ret) { *errcode_ret = CL_SUCCESS; } - UNLOCK_RETURN(context); + return context; } static cl_int l_finalize_context(cl_context context, cl_uint num_devices, const cl_device_id *devices) { cl_int status; - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; status = acl_get_hal()->try_devices(num_devices, devices, &acl_platform); if (status) { acl_context_callback(context, "Could not open devices"); acl_free_cl_context(context); - UNLOCK_RETURN(status); + return status; } acl_retain(context); @@ -533,12 +535,12 @@ static cl_int l_finalize_context(cl_context context, cl_uint num_devices, if (status != CL_SUCCESS) { l_forcibly_release_allocations(context); acl_free_cl_context(context); - UNLOCK_RETURN(status); // already signaled callback + return status; // already signaled callback } acl_track_object(ACL_OBJ_CONTEXT, context); - UNLOCK_RETURN(CL_SUCCESS); + return CL_SUCCESS; } // Analyze and load the context properties. @@ -1119,10 +1121,11 @@ void acl_update_context(cl_context context) { ++i) { CL_EXCEPTION_TYPE_INTEL exception_type = 1ULL << i; if (device->device_exception_status & exception_type) { - int lock_count = acl_suspend_lock(); - notify_fn(exception_type, device->exception_private_info[i], - device->exception_cb[i], notify_user_data); - acl_resume_lock(lock_count); + { + acl_suspend_lock_guard l(acl_mutex_wrapper); + notify_fn(exception_type, device->exception_private_info[i], + device->exception_cb[i], notify_user_data); + } } } @@ -1309,10 +1312,10 @@ void acl_context_callback(cl_context context, const std::string errinfo) { if (context && context->notify_fn) { acl_notify_fn_t notify_fn = context->notify_fn; void *notify_user_data = context->notify_user_data; - - int lock_count = acl_suspend_lock(); - notify_fn(errinfo.c_str(), 0, 0, notify_user_data); - acl_resume_lock(lock_count); + { + acl_suspend_lock_guard l(acl_mutex_wrapper); + notify_fn(errinfo.c_str(), 0, 0, notify_user_data); + } } } diff --git a/src/acl_device.cpp b/src/acl_device.cpp index 83151016..84a5e63b 100644 --- a/src/acl_device.cpp +++ b/src/acl_device.cpp @@ -41,12 +41,12 @@ CL_API_ENTRY cl_int CL_API_CALL clGetDeviceIDsIntelFPGA( cl_device_id *devices, cl_uint *num_devices) { cl_int status = CL_SUCCESS; cl_uint num_matched = 0; - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; if (!acl_platform_is_valid(platform)) { - UNLOCK_RETURN(CL_INVALID_PLATFORM); + return CL_INVALID_PLATFORM; } - UNLOCK_VALIDATE_ARRAY_OUT_ARGS(num_entries, devices, num_devices, 0); + VALIDATE_ARRAY_OUT_ARGS(num_entries, devices, num_devices, 0); switch (device_type) { case CL_DEVICE_TYPE_CPU: @@ -70,7 +70,7 @@ CL_API_ENTRY cl_int CL_API_CALL clGetDeviceIDsIntelFPGA( } break; default: - UNLOCK_RETURN(CL_INVALID_DEVICE_TYPE); + return CL_INVALID_DEVICE_TYPE; break; } @@ -81,7 +81,7 @@ CL_API_ENTRY cl_int CL_API_CALL clGetDeviceIDsIntelFPGA( *num_devices = num_matched; } - UNLOCK_RETURN(status); + return status; } ACL_EXPORT @@ -101,14 +101,14 @@ CL_API_ENTRY cl_int CL_API_CALL clGetDeviceInfoIntelFPGA( char name_buf[MAX_NAME_SIZE]; acl_result_t result; cl_context context = 0; - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; #ifndef REMOVE_VALID_CHECKS if (!acl_device_is_valid_ptr(device)) { - UNLOCK_RETURN(CL_INVALID_DEVICE); + return CL_INVALID_DEVICE; } - UNLOCK_VALIDATE_ARRAY_OUT_ARGS(param_value_size, param_value, - param_value_size_ret, 0); + VALIDATE_ARRAY_OUT_ARGS(param_value_size, param_value, param_value_size_ret, + 0); #endif RESULT_INIT; @@ -130,7 +130,7 @@ CL_API_ENTRY cl_int CL_API_CALL clGetDeviceInfoIntelFPGA( case CL_DEVICE_VENDOR: context = clCreateContext(0, 1, &device, NULL, NULL, &status); if (status != CL_SUCCESS) { - UNLOCK_RETURN(status); + return status; } break; } @@ -159,7 +159,7 @@ CL_API_ENTRY cl_int CL_API_CALL clGetDeviceInfoIntelFPGA( if (param_name == CL_DEVICE_AVAILABLE) { // special case RESULT_BOOL(0); // it must not be available } else { - UNLOCK_RETURN(status); + return status; } } else if (param_name == CL_DEVICE_AVAILABLE) { RESULT_BOOL( @@ -568,14 +568,14 @@ CL_API_ENTRY cl_int CL_API_CALL clGetDeviceInfoIntelFPGA( if (result.size == 0) { // We didn't implement the enum. Error out semi-gracefully. - UNLOCK_RETURN(CL_INVALID_VALUE); + return CL_INVALID_VALUE; } if (param_value) { // Actually try to return the string. if (param_value_size < result.size) { // Buffer is too small to hold the return value. - UNLOCK_RETURN(CL_INVALID_VALUE); + return CL_INVALID_VALUE; } RESULT_COPY(param_value, param_value_size); } @@ -583,7 +583,7 @@ CL_API_ENTRY cl_int CL_API_CALL clGetDeviceInfoIntelFPGA( if (param_value_size_ret) { *param_value_size_ret = result.size; } - UNLOCK_RETURN(CL_SUCCESS); + return CL_SUCCESS; } ACL_EXPORT @@ -613,7 +613,7 @@ CL_API_ENTRY cl_int CL_API_CALL clCreateSubDevicesIntelFPGA( // Since we don't support creating sub devices, we should follow the first // case if in_device is not valid, and the second case if it is. - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; // Suppress compiler warnings. partition_properties = partition_properties; num_entries = num_entries; @@ -621,10 +621,10 @@ CL_API_ENTRY cl_int CL_API_CALL clCreateSubDevicesIntelFPGA( num_devices = num_devices; if (!acl_device_is_valid(in_device)) { - UNLOCK_RETURN(CL_INVALID_DEVICE); + return CL_INVALID_DEVICE; } - UNLOCK_RETURN(CL_INVALID_VALUE); + return CL_INVALID_VALUE; } ACL_EXPORT @@ -638,7 +638,7 @@ CL_API_ENTRY cl_int CL_API_CALL clCreateSubDevices( ACL_EXPORT CL_API_ENTRY cl_int CL_API_CALL clRetainDeviceIntelFPGA(cl_device_id device) { - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; // Spec says: // "increments the device reference count if device is a valid sub-device @@ -664,9 +664,9 @@ CL_API_ENTRY cl_int CL_API_CALL clRetainDeviceIntelFPGA(cl_device_id device) { // Since we don't (currently) support sub-devices, valid devices must be // root-level: if (acl_device_is_valid(device)) { - UNLOCK_RETURN(CL_SUCCESS); + return CL_SUCCESS; } else { - UNLOCK_RETURN(CL_INVALID_DEVICE); + return CL_INVALID_DEVICE; } } ACL_EXPORT @@ -676,7 +676,7 @@ CL_API_ENTRY cl_int CL_API_CALL clRetainDevice(cl_device_id device) { ACL_EXPORT CL_API_ENTRY cl_int CL_API_CALL clReleaseDeviceIntelFPGA(cl_device_id device) { - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; // Spec says: // "decrements the device reference count if device is a valid sub-device @@ -695,9 +695,9 @@ clReleaseDeviceIntelFPGA(cl_device_id device) { // Since we don't (currently) support sub-devices, valid devices must be // root-level: if (acl_device_is_valid(device)) { - UNLOCK_RETURN(CL_SUCCESS); + return CL_SUCCESS; } else { - UNLOCK_RETURN(CL_INVALID_DEVICE); + return CL_INVALID_DEVICE; } } @@ -712,22 +712,22 @@ clReconfigurePLLIntelFPGA(cl_device_id device, const char *pll_settings_str) { // comments specified for struct pll_setting_t in include/acl_pll. const acl_hal_t *hal; cl_int configure_status; - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; if (!acl_device_is_valid(device)) { - UNLOCK_RETURN(CL_INVALID_DEVICE); + return CL_INVALID_DEVICE; } if (!pll_settings_str) { - UNLOCK_RETURN(CL_INVALID_VALUE); + return CL_INVALID_VALUE; } hal = acl_get_hal(); configure_status = hal->pll_reconfigure(device->def.physical_device_id, pll_settings_str); if (configure_status == 0) - UNLOCK_RETURN(CL_SUCCESS); + return CL_SUCCESS; else - UNLOCK_RETURN(CL_INVALID_OPERATION); + return CL_INVALID_OPERATION; } ACL_EXPORT CL_API_ENTRY cl_int CL_API_CALL clSetDeviceExceptionCallback( @@ -745,16 +745,16 @@ clSetDeviceExceptionCallbackIntelFPGA( acl_exception_notify_fn_t pfn_exception_notify, void *user_data) { unsigned i; - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; if (!pfn_exception_notify) - UNLOCK_RETURN(CL_INVALID_VALUE); + return CL_INVALID_VALUE; if (!listen_mask) - UNLOCK_RETURN(CL_INVALID_VALUE); + return CL_INVALID_VALUE; if (!devices && num_devices > 0) - UNLOCK_RETURN(CL_INVALID_VALUE); + return CL_INVALID_VALUE; if (devices && num_devices == 0) - UNLOCK_RETURN(CL_INVALID_VALUE); + return CL_INVALID_VALUE; for (i = 0; i < num_devices; ++i) { devices[i]->exception_notify_fn = pfn_exception_notify; @@ -762,7 +762,7 @@ clSetDeviceExceptionCallbackIntelFPGA( devices[i]->listen_mask = listen_mask; } - UNLOCK_RETURN(CL_SUCCESS); + return CL_SUCCESS; } ////////////////////////////// diff --git a/src/acl_event.cpp b/src/acl_event.cpp index ffccdaf9..b94ed81a 100644 --- a/src/acl_event.cpp +++ b/src/acl_event.cpp @@ -81,12 +81,12 @@ static void l_record_milestone(cl_event event, cl_profiling_info milestone); ACL_EXPORT CL_API_ENTRY cl_int CL_API_CALL clRetainEventIntelFPGA(cl_event event) { - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; if (!acl_event_is_valid(event)) { - UNLOCK_RETURN(CL_INVALID_EVENT); + return CL_INVALID_EVENT; } acl_retain(event); - UNLOCK_RETURN(CL_SUCCESS); + return CL_SUCCESS; } ACL_EXPORT @@ -96,14 +96,14 @@ CL_API_ENTRY cl_int CL_API_CALL clRetainEvent(cl_event event) { ACL_EXPORT CL_API_ENTRY cl_int CL_API_CALL clReleaseEventIntelFPGA(cl_event event) { - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; if (!acl_event_is_valid(event)) { - UNLOCK_RETURN(CL_INVALID_EVENT); + return CL_INVALID_EVENT; } if (!acl_is_retained(event)) { - UNLOCK_ERR_RET(CL_INVALID_EVENT, event->context, - "Trying to release an event that is not retained"); + ERR_RET(CL_INVALID_EVENT, event->context, + "Trying to release an event that is not retained"); } acl_release(event); @@ -134,7 +134,7 @@ CL_API_ENTRY cl_int CL_API_CALL clReleaseEventIntelFPGA(cl_event event) { } } - UNLOCK_RETURN(CL_SUCCESS); + return CL_SUCCESS; } ACL_EXPORT @@ -147,10 +147,10 @@ CL_API_ENTRY cl_int CL_API_CALL clGetEventInfoIntelFPGA( cl_event event, cl_event_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret) { acl_result_t result; - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; if (!acl_event_is_valid(event)) { - UNLOCK_RETURN(CL_INVALID_EVENT); + return CL_INVALID_EVENT; } // Give the scheduler a nudge. @@ -159,8 +159,8 @@ CL_API_ENTRY cl_int CL_API_CALL clGetEventInfoIntelFPGA( acl_idle_update(event->context); } - UNLOCK_VALIDATE_ARRAY_OUT_ARGS(param_value_size, param_value, - param_value_size_ret, event->context); + VALIDATE_ARRAY_OUT_ARGS(param_value_size, param_value, param_value_size_ret, + event->context); RESULT_INIT; @@ -193,14 +193,14 @@ CL_API_ENTRY cl_int CL_API_CALL clGetEventInfoIntelFPGA( } if (result.size == 0) { - UNLOCK_ERR_RET(CL_INVALID_VALUE, event->context, - "Invalid or unsupported event query"); + ERR_RET(CL_INVALID_VALUE, event->context, + "Invalid or unsupported event query"); } if (param_value) { if (param_value_size < result.size) { - UNLOCK_ERR_RET(CL_INVALID_VALUE, event->context, - "Parameter return buffer is too small"); + ERR_RET(CL_INVALID_VALUE, event->context, + "Parameter return buffer is too small"); } RESULT_COPY(param_value, param_value_size); } @@ -208,7 +208,7 @@ CL_API_ENTRY cl_int CL_API_CALL clGetEventInfoIntelFPGA( if (param_value_size_ret) { *param_value_size_ret = result.size; } - UNLOCK_RETURN(CL_SUCCESS); + return CL_SUCCESS; } ACL_EXPORT @@ -226,21 +226,21 @@ CL_API_ENTRY cl_int CL_API_CALL clGetEventProfilingInfoIntelFPGA( cl_event event, cl_profiling_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret) { acl_result_t result; - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; if (!acl_event_is_valid(event)) { - UNLOCK_RETURN(CL_INVALID_EVENT); + return CL_INVALID_EVENT; } - UNLOCK_VALIDATE_ARRAY_OUT_ARGS(param_value_size, param_value, - param_value_size_ret, event->context); + VALIDATE_ARRAY_OUT_ARGS(param_value_size, param_value, param_value_size_ret, + event->context); // check if the event supports the profiling and error out accordingly if (event->cmd.type == CL_COMMAND_USER) { - UNLOCK_ERR_RET(CL_PROFILING_INFO_NOT_AVAILABLE, event->context, - "Profiling information is not available for user events"); + ERR_RET(CL_PROFILING_INFO_NOT_AVAILABLE, event->context, + "Profiling information is not available for user events"); } else if (!event->support_profiling) { // since user event will not have command_queue set, no need to check again - UNLOCK_ERR_RET( + ERR_RET( CL_PROFILING_INFO_NOT_AVAILABLE, event->context, "Profiling information is not available because " "CL_QUEUE_PROFILING_ENABLE was not set on the event's command queue"); @@ -266,14 +266,13 @@ CL_API_ENTRY cl_int CL_API_CALL clGetEventProfilingInfoIntelFPGA( } if (result.size == 0) { - UNLOCK_ERR_RET(CL_INVALID_VALUE, event->context, - "Invalid event profiling query"); + ERR_RET(CL_INVALID_VALUE, event->context, "Invalid event profiling query"); } if (param_value) { if (param_value_size < result.size) { - UNLOCK_ERR_RET(CL_INVALID_VALUE, event->context, - "Parameter return buffer is too small"); + ERR_RET(CL_INVALID_VALUE, event->context, + "Parameter return buffer is too small"); } RESULT_COPY(param_value, param_value_size); } @@ -281,7 +280,7 @@ CL_API_ENTRY cl_int CL_API_CALL clGetEventProfilingInfoIntelFPGA( if (param_value_size_ret) { *param_value_size_ret = result.size; } - UNLOCK_RETURN(CL_SUCCESS); + return CL_SUCCESS; } ACL_EXPORT @@ -297,10 +296,10 @@ CL_API_ENTRY cl_event CL_API_CALL clCreateUserEventIntelFPGA(cl_context context, cl_int *errcode_ret) { cl_event result = 0; cl_int status; - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; if (!acl_context_is_valid(context)) - UNLOCK_BAIL(CL_INVALID_CONTEXT); + BAIL(CL_INVALID_CONTEXT); // Create the user event on the user_event_queue. // In our model, every event is attached to some command queue. @@ -310,7 +309,7 @@ clCreateUserEventIntelFPGA(cl_context context, cl_int *errcode_ret) { 0, // depends on nothing else. CL_COMMAND_USER, &result); if (status != CL_SUCCESS) - UNLOCK_BAIL(status); // already signaled error + BAIL(status); // already signaled error // As per spec. acl_set_execution_status(result, CL_SUBMITTED); @@ -318,7 +317,7 @@ clCreateUserEventIntelFPGA(cl_context context, cl_int *errcode_ret) { if (errcode_ret) *errcode_ret = CL_SUCCESS; - UNLOCK_RETURN(result); + return result; } ACL_EXPORT @@ -330,16 +329,16 @@ CL_API_ENTRY cl_event CL_API_CALL clCreateUserEvent(cl_context context, ACL_EXPORT CL_API_ENTRY cl_int CL_API_CALL clSetUserEventStatusIntelFPGA(cl_event event, cl_int execution_status) { - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; if (!acl_event_is_valid(event)) { - UNLOCK_RETURN(CL_INVALID_EVENT); + return CL_INVALID_EVENT; } // Either negative, or CL_COMPLETE (which itself is 0) if (execution_status <= CL_COMPLETE) { if (event->execution_status <= CL_COMPLETE) { - UNLOCK_ERR_RET( + ERR_RET( CL_INVALID_OPERATION, event->context, "User event has already been completed or terminated with an error"); } @@ -349,10 +348,9 @@ clSetUserEventStatusIntelFPGA(cl_event event, cl_int execution_status) { // Nudge the scheduler. acl_idle_update(event->context); - UNLOCK_RETURN(CL_SUCCESS); + return CL_SUCCESS; } else { - UNLOCK_ERR_RET(CL_INVALID_VALUE, event->context, - "Invalid execution status"); + ERR_RET(CL_INVALID_VALUE, event->context, "Invalid execution status"); } } @@ -371,24 +369,24 @@ CL_API_ENTRY cl_int clSetEventCallbackIntelFPGA( void *user_data), void *user_data) { acl_event_user_callback *cb; - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; if (!acl_event_is_valid(event)) { - UNLOCK_RETURN(CL_INVALID_EVENT); + return CL_INVALID_EVENT; } if (pfn_event_notify == NULL) { - UNLOCK_RETURN(CL_INVALID_VALUE); + return CL_INVALID_VALUE; } if (command_exec_callback_type != CL_SUBMITTED && command_exec_callback_type != CL_RUNNING && command_exec_callback_type != CL_COMPLETE) { - UNLOCK_RETURN(CL_INVALID_VALUE); + return CL_INVALID_VALUE; } cb = (acl_event_user_callback *)acl_malloc(sizeof(acl_event_user_callback)); if (!cb) - UNLOCK_RETURN(CL_OUT_OF_HOST_MEMORY); + return CL_OUT_OF_HOST_MEMORY; cb->notify_user_data = user_data; cb->event_notify_fn = pfn_event_notify; @@ -403,7 +401,7 @@ CL_API_ENTRY cl_int clSetEventCallbackIntelFPGA( // status is already passed. acl_event_callback(event, event->execution_status); - UNLOCK_RETURN(CL_SUCCESS); + return CL_SUCCESS; } // registers a user callback function for a specific command execution status. @@ -454,10 +452,10 @@ void acl_event_callback(cl_event event, cl_int event_command_exec_status) { temp = cb_head; cb_head = cb_head->next; acl_free(temp); - - lock_count = acl_suspend_lock(); - event_notify_fn(event, event_command_exec_status, notify_user_data); - acl_resume_lock(lock_count); + { + acl_suspend_lock_guard l(acl_mutex_wrapper); + event_notify_fn(event, event_command_exec_status, notify_user_data); + } release++; } else { pre = cb_head; @@ -669,8 +667,9 @@ void acl_set_execution_status(cl_event event, int new_status) { // signal handler, which can't lock mutexes, so we don't lock in that case. // All functions called from this one therefore have to use // acl_assert_locked_or_sig() instead of just acl_assert_locked(). + std::unique_lock lock{acl_mutex_wrapper, std::defer_lock}; if (!acl_is_inside_sig()) { - acl_lock(); + lock.lock(); } if (event) { // just being defensive @@ -742,10 +741,6 @@ void acl_set_execution_status(cl_event event, int new_status) { // Signal all waiters. acl_signal_device_update(); } - - if (!acl_is_inside_sig()) { - acl_unlock(); - } } static void l_record_milestone(cl_event event, cl_profiling_info milestone) { diff --git a/src/acl_hal.cpp b/src/acl_hal.cpp index 3b57892d..97071ad3 100644 --- a/src/acl_hal.cpp +++ b/src/acl_hal.cpp @@ -132,7 +132,7 @@ int acl_print_debug_msg(const char *msg, ...) { extern CL_API_ENTRY void CL_API_CALL clSetBoardLibraryIntelFPGA(char *library_name) { acl_mmd_library_names_t *next_library = NULL; - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; acl_print_debug_msg("Adding library '%s' to list of libraries to open\n", library_name); @@ -152,7 +152,7 @@ clSetBoardLibraryIntelFPGA(char *library_name) { insertion_point->next = next_library; } - UNLOCK_RETURN_VOID; + return; } #ifdef __GNUC__ diff --git a/src/acl_hostch.cpp b/src/acl_hostch.cpp index f9c9a19f..b51f2030 100644 --- a/src/acl_hostch.cpp +++ b/src/acl_hostch.cpp @@ -243,9 +243,10 @@ CL_API_ENTRY cl_int CL_API_CALL clReadPipeIntelFPGA(cl_mem pipe, void *ptr) { size_t buffer_size; cl_int status = 0; - acl_lock(); - acl_idle_update(pipe->context); - acl_unlock(); + { + std::scoped_lock lock{acl_mutex_wrapper}; + acl_idle_update(pipe->context); + } acl_mutex_lock(&(pipe->host_pipe_info->m_lock)); @@ -343,9 +344,10 @@ CL_API_ENTRY cl_int CL_API_CALL clWritePipeIntelFPGA(cl_mem pipe, void *ptr) { cl_int status = 0; cl_int ret; - acl_lock(); - acl_idle_update(pipe->context); - acl_unlock(); + { + std::scoped_lock lock{acl_mutex_wrapper}; + acl_idle_update(pipe->context); + } acl_mutex_lock(&(pipe->host_pipe_info->m_lock)); @@ -458,9 +460,10 @@ CL_API_ENTRY void *CL_API_CALL clMapHostPipeIntelFPGA(cl_mem pipe, void *buffer = 0; int status = 0; - acl_lock(); - acl_idle_update(pipe->context); - acl_unlock(); + { + std::scoped_lock lock{acl_mutex_wrapper}; + acl_idle_update(pipe->context); + } acl_mutex_lock(&(pipe->host_pipe_info->m_lock)); @@ -587,9 +590,10 @@ clUnmapHostPipeIntelFPGA(cl_mem pipe, void *mapped_ptr, size_t size_to_unmap, int status = 0; int first = 1; - acl_lock(); - acl_idle_update(pipe->context); - acl_unlock(); + { + std::scoped_lock lock{acl_mutex_wrapper}; + acl_idle_update(pipe->context); + } acl_mutex_lock(&(pipe->host_pipe_info->m_lock)); diff --git a/src/acl_icd_dispatch.cpp b/src/acl_icd_dispatch.cpp index b1700bc2..04d37f71 100644 --- a/src/acl_icd_dispatch.cpp +++ b/src/acl_icd_dispatch.cpp @@ -78,11 +78,11 @@ clGetExtensionFunctionAddressIntelFPGA(const char *func_name) { ACL_EXPORT CL_API_ENTRY void *CL_API_CALL clGetBoardExtensionFunctionAddressIntelFPGA( const char *func_name, cl_device_id device) { - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; { void *ret = acl_get_hal()->get_board_extension_function_address( func_name, device->def.physical_device_id); - UNLOCK_RETURN(ret); + return ret; } } @@ -97,11 +97,11 @@ CL_API_ENTRY void *CL_API_CALL clGetExtensionFunctionAddressForPlatformIntelFPGA(cl_platform_id platform, const char *func_name) { // We currently only have one platform - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; if (!acl_platform_is_valid(platform)) { - UNLOCK_RETURN(NULL); + return NULL; } - UNLOCK_RETURN(clGetExtensionFunctionAddressIntelFPGA(func_name)); + return clGetExtensionFunctionAddressIntelFPGA(func_name); } ACL_EXPORT diff --git a/src/acl_kernel.cpp b/src/acl_kernel.cpp index 2f6e5b85..7f2ee78e 100644 --- a/src/acl_kernel.cpp +++ b/src/acl_kernel.cpp @@ -125,12 +125,12 @@ ACL_DEFINE_CL_OBJECT_ALLOC_FUNCTIONS(cl_kernel); ACL_EXPORT CL_API_ENTRY cl_int CL_API_CALL clRetainKernelIntelFPGA(cl_kernel kernel) { - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; if (!acl_kernel_is_valid(kernel)) { - UNLOCK_RETURN(CL_INVALID_KERNEL); + return CL_INVALID_KERNEL; } acl_retain(kernel); - UNLOCK_RETURN(CL_SUCCESS); + return CL_SUCCESS; } ACL_EXPORT @@ -140,10 +140,10 @@ CL_API_ENTRY cl_int CL_API_CALL clRetainKernel(cl_kernel kernel) { ACL_EXPORT CL_API_ENTRY cl_int CL_API_CALL clReleaseKernelIntelFPGA(cl_kernel kernel) { - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; if (!acl_kernel_is_valid(kernel)) { - UNLOCK_RETURN(CL_INVALID_KERNEL); + return CL_INVALID_KERNEL; } acl_print_debug_msg("Release kernel %p\n", kernel); @@ -179,7 +179,7 @@ CL_API_ENTRY cl_int CL_API_CALL clReleaseKernelIntelFPGA(cl_kernel kernel) { } else { acl_release(kernel); } - UNLOCK_RETURN(CL_SUCCESS); + return CL_SUCCESS; } ACL_EXPORT @@ -193,14 +193,14 @@ CL_API_ENTRY cl_kernel CL_API_CALL clCreateKernelIntelFPGA( cl_int status; cl_kernel kernel = 0; - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; // Can't call the callback, because we have no valid context. if (!acl_program_is_valid(program)) - UNLOCK_BAIL(CL_INVALID_PROGRAM); + BAIL(CL_INVALID_PROGRAM); if (!kernel_name) - UNLOCK_BAIL_INFO(CL_INVALID_VALUE, program->context, "kernel_name is NULL"); + BAIL_INFO(CL_INVALID_VALUE, program->context, "kernel_name is NULL"); // What device program is associated with this kernel? // Right now we only support one device per kernel. @@ -209,12 +209,12 @@ CL_API_ENTRY cl_kernel CL_API_CALL clCreateKernelIntelFPGA( &status, program->context, 0); if (status != CL_SUCCESS) - UNLOCK_BAIL(status); // already signaled callback + BAIL(status); // already signaled callback kernel = acl_program_alloc_kernel(program); if (kernel == 0) { - UNLOCK_BAIL_INFO(CL_OUT_OF_HOST_MEMORY, program->context, - "Could not allocate a program object"); + BAIL_INFO(CL_OUT_OF_HOST_MEMORY, program->context, + "Could not allocate a program object"); } l_init_kernel(kernel, program, accel_def, dev_bin, errcode_ret); @@ -223,7 +223,7 @@ CL_API_ENTRY cl_kernel CL_API_CALL clCreateKernelIntelFPGA( *errcode_ret = CL_SUCCESS; } - UNLOCK_RETURN(kernel); + return kernel; } ACL_EXPORT @@ -237,10 +237,10 @@ ACL_EXPORT CL_API_ENTRY cl_int CL_API_CALL clCreateKernelsInProgramIntelFPGA( cl_program program, cl_uint num_kernels, cl_kernel *kernels, cl_uint *num_kernels_ret) { - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; if (!acl_program_is_valid(program)) { - UNLOCK_RETURN(CL_INVALID_PROGRAM); + return CL_INVALID_PROGRAM; } auto context = program->context; @@ -251,23 +251,21 @@ CL_API_ENTRY cl_int CL_API_CALL clCreateKernelsInProgramIntelFPGA( l_load_consistently_built_kernels_in_program(program, accel_ret); if (status != CL_SUCCESS) { - UNLOCK_RETURN(status); // already signaled + return status; // already signaled } if (accel_ret.size() == 0) { - UNLOCK_ERR_RET( - CL_INVALID_PROGRAM_EXECUTABLE, context, - "No kernels were built across all devices with the same interface"); + ERR_RET(CL_INVALID_PROGRAM_EXECUTABLE, context, + "No kernels were built across all devices with the same interface"); } // Check return buffer spec if (num_kernels == 0 && kernels) { - UNLOCK_ERR_RET(CL_INVALID_VALUE, context, - "num_kernels is zero but kernels array is specified"); + ERR_RET(CL_INVALID_VALUE, context, + "num_kernels is zero but kernels array is specified"); } if (num_kernels > 0 && kernels == 0) { - UNLOCK_ERR_RET( - CL_INVALID_VALUE, context, - "num_kernels is non-zero but kernels array is not specified"); + ERR_RET(CL_INVALID_VALUE, context, + "num_kernels is non-zero but kernels array is not specified"); } if (kernels) { @@ -275,7 +273,7 @@ CL_API_ENTRY cl_int CL_API_CALL clCreateKernelsInProgramIntelFPGA( // Result buffer isn't big enough. if (num_kernels < accel_ret.size()) { - UNLOCK_RETURN(CL_INVALID_VALUE); + return CL_INVALID_VALUE; } // The definitions are in accel_ret. Create the kernels. @@ -300,7 +298,7 @@ CL_API_ENTRY cl_int CL_API_CALL clCreateKernelsInProgramIntelFPGA( if (num_kernels_ret) *num_kernels_ret = static_cast(accel_ret.size()); - UNLOCK_RETURN(status); + return status; } ACL_EXPORT @@ -320,17 +318,16 @@ CL_API_ENTRY cl_int CL_API_CALL clSetKernelArgIntelFPGA(cl_kernel kernel, cl_context context; cl_bool is_pipe = CL_FALSE; cl_bool is_sampler = CL_FALSE; - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; if (!acl_kernel_is_valid(kernel)) { - UNLOCK_RETURN(CL_INVALID_KERNEL); + return CL_INVALID_KERNEL; } context = kernel->program->context; if (arg_index >= kernel->accel_def->iface.args.size()) { - UNLOCK_ERR_RET(CL_INVALID_ARG_INDEX, context, - "Argument index is too large"); + ERR_RET(CL_INVALID_ARG_INDEX, context, "Argument index is too large"); } arg_info = &(kernel->accel_def->iface.args[arg_index]); @@ -341,14 +338,14 @@ CL_API_ENTRY cl_int CL_API_CALL clSetKernelArgIntelFPGA(cl_kernel kernel, // representing buffers. if (arg_value && (*(cl_mem *)arg_value) && !acl_mem_is_valid(*(cl_mem *)arg_value)) - UNLOCK_ERR_RET(CL_INVALID_MEM_OBJECT, context, - "Non-memory object passed in as memory object argument"); + ERR_RET(CL_INVALID_MEM_OBJECT, context, + "Non-memory object passed in as memory object argument"); } else if (arg_info->category == ACL_ARG_SAMPLER) { if (arg_value && (arg_size != sizeof(cl_sampler) || !acl_sampler_is_valid(*(cl_sampler *)arg_value))) { - UNLOCK_ERR_RET(CL_INVALID_SAMPLER, context, - "Non-sampler object passed in as sampler object argument"); + ERR_RET(CL_INVALID_SAMPLER, context, + "Non-sampler object passed in as sampler object argument"); } is_sampler = CL_TRUE; } else if (arg_size != arg_info->size && arg_value && @@ -361,13 +358,12 @@ CL_API_ENTRY cl_int CL_API_CALL clSetKernelArgIntelFPGA(cl_kernel kernel, switch (arg_info->addr_space) { case ACL_ARG_ADDR_LOCAL: /* Size is number of local bytes to allocate */ if (arg_size == 0) { - UNLOCK_ERR_RET(CL_INVALID_ARG_SIZE, context, - "Pointer-to-local argument specified zero size"); + ERR_RET(CL_INVALID_ARG_SIZE, context, + "Pointer-to-local argument specified zero size"); } if (arg_value != 0) { - UNLOCK_ERR_RET( - CL_INVALID_ARG_VALUE, context, - "Pointer-to-local argument specified with a non-null value"); + ERR_RET(CL_INVALID_ARG_VALUE, context, + "Pointer-to-local argument specified with a non-null value"); } /* We instantiated a specific mem capacity to handle this pointer. @@ -376,10 +372,9 @@ CL_API_ENTRY cl_int CL_API_CALL clSetKernelArgIntelFPGA(cl_kernel kernel, { unsigned lmem_size_instantiated = arg_info->lmem_size_bytes; if (arg_size > lmem_size_instantiated) { - UNLOCK_ERR_RET( - CL_INVALID_ARG_SIZE, context, - "Pointer-to-local argument requested size is larger than " - "maximum specified at compile time"); + ERR_RET(CL_INVALID_ARG_SIZE, context, + "Pointer-to-local argument requested size is larger than " + "maximum specified at compile time"); } } break; @@ -387,19 +382,17 @@ CL_API_ENTRY cl_int CL_API_CALL clSetKernelArgIntelFPGA(cl_kernel kernel, case ACL_ARG_ADDR_GLOBAL: case ACL_ARG_ADDR_CONSTANT: if (arg_size != sizeof(cl_mem)) { - UNLOCK_ERR_RET( - CL_INVALID_ARG_SIZE, context, - "Pointer-to-global or Pointer-to-constant argument size is " - "not the size of cl_mem"); + ERR_RET(CL_INVALID_ARG_SIZE, context, + "Pointer-to-global or Pointer-to-constant argument size is " + "not the size of cl_mem"); } // Can pass NULL or pointer to NULL in arg_value, or it must be a valid // memory object. if (arg_value && (*(cl_mem *)arg_value) && !acl_mem_is_valid(*(cl_mem *)arg_value)) { - UNLOCK_ERR_RET( - CL_INVALID_ARG_VALUE, context, - "Pointer-to-global or Pointer-to-constant argument value is " - "not a valid memory object"); + ERR_RET(CL_INVALID_ARG_VALUE, context, + "Pointer-to-global or Pointer-to-constant argument value is " + "not a valid memory object"); } if (arg_value && (*(cl_mem *)arg_value) && @@ -410,8 +403,8 @@ CL_API_ENTRY cl_int CL_API_CALL clSetKernelArgIntelFPGA(cl_kernel kernel, // If this buffer is an SVM buffer, assume that the user wants the memory to // be in sync. Treat this the same as an SVM kernel arg and return. if (arg_value && (*(cl_mem *)arg_value) && (*(cl_mem *)arg_value)->is_svm) { - UNLOCK_RETURN(clSetKernelArgSVMPointerIntelFPGA( - kernel, arg_index, (*(cl_mem *)arg_value)->host_mem.aligned_ptr)); + return clSetKernelArgSVMPointerIntelFPGA( + kernel, arg_index, (*(cl_mem *)arg_value)->host_mem.aligned_ptr); } break; @@ -419,22 +412,21 @@ CL_API_ENTRY cl_int CL_API_CALL clSetKernelArgIntelFPGA(cl_kernel kernel, if (is_sampler && arg_value != 0 && acl_sampler_is_valid_ptr(*((cl_sampler *)arg_value))) { if (arg_size != sizeof(cl_sampler)) { - UNLOCK_ERR_RET(CL_INVALID_ARG_SIZE, context, - "Sampler argument size is not the size of cl_sampler"); + ERR_RET(CL_INVALID_ARG_SIZE, context, + "Sampler argument size is not the size of cl_sampler"); } if (arg_info->size != sizeof(int)) { - UNLOCK_ERR_RET(CL_INVALID_ARG_SIZE, context, - "Argument size is the wrong size"); + ERR_RET(CL_INVALID_ARG_SIZE, context, + "Argument size is the wrong size"); } } else if (arg_size == sizeof(cl_mem) && acl_pipe_is_valid_pointer(*((cl_mem *)arg_value), kernel)) { is_pipe = CL_TRUE; } else if (arg_size != arg_info->size) { - UNLOCK_ERR_RET(CL_INVALID_ARG_SIZE, context, - "Argument size is the wrong size"); + ERR_RET(CL_INVALID_ARG_SIZE, context, "Argument size is the wrong size"); } if (arg_value == 0) { - UNLOCK_ERR_RET(CL_INVALID_ARG_VALUE, context, "Argument value is NULL"); + ERR_RET(CL_INVALID_ARG_VALUE, context, "Argument value is NULL"); } break; } @@ -452,9 +444,9 @@ CL_API_ENTRY cl_int CL_API_CALL clSetKernelArgIntelFPGA(cl_kernel kernel, /* If this is a host pipe, create a host channel and bind them together */ if (arg_info->host_accessible && pipe_ptr->host_pipe_info != NULL) { if (pipe_ptr->host_pipe_info->m_binded_kernel != NULL) { - UNLOCK_ERR_RET(CL_INVALID_ARG_VALUE, context, - "This pipe has already been bound to a kernel. Cannot " - "rebind to a new kernel"); + ERR_RET(CL_INVALID_ARG_VALUE, context, + "This pipe has already been bound to a kernel. Cannot " + "rebind to a new kernel"); } // Check to see if the kernel argument's width matches up with our cl_pipe @@ -475,23 +467,23 @@ CL_API_ENTRY cl_int CL_API_CALL clSetKernelArgIntelFPGA(cl_kernel kernel, hostpipe_info.is_host_to_dev) { // Direction match } else { - UNLOCK_ERR_RET(CL_INVALID_ARG_VALUE, context, - "Host accessible pipe direction is not the same " - "of cl_pipe"); + ERR_RET(CL_INVALID_ARG_VALUE, context, + "Host accessible pipe direction is not the same " + "of cl_pipe"); } // Check width if (pipe_ptr->fields.pipe_objs.pipe_packet_size != hostpipe_info.data_width) { - UNLOCK_ERR_RET(CL_INVALID_ARG_SIZE, context, - "Host accessible pipe size is not the same size " - "of cl_pipe"); + ERR_RET(CL_INVALID_ARG_SIZE, context, + "Host accessible pipe size is not the same size " + "of cl_pipe"); } // Check max buffer size if (pipe_ptr->fields.pipe_objs.pipe_max_packets > hostpipe_info.max_buffer_depth) { - UNLOCK_ERR_RET(CL_INVALID_ARG_VALUE, context, - "Host accessible pipe max packets size is " - "smaller than cl_pipe requested size"); + ERR_RET(CL_INVALID_ARG_VALUE, context, + "Host accessible pipe max packets size is " + "smaller than cl_pipe requested size"); } found = true; } @@ -513,23 +505,23 @@ CL_API_ENTRY cl_int CL_API_CALL clSetKernelArgIntelFPGA(cl_kernel kernel, hostpipe_info.is_host_to_dev) { // Direction match } else { - UNLOCK_ERR_RET( + ERR_RET( CL_INVALID_ARG_VALUE, context, "Host accessible pipe direction is not the same of cl_pipe"); } // Check width if (pipe_ptr->fields.pipe_objs.pipe_packet_size != hostpipe_info.data_width) { - UNLOCK_ERR_RET( + ERR_RET( CL_INVALID_ARG_SIZE, context, "Host accessible pipe size is not the same size of cl_pipe"); } // Check max buffer size if (pipe_ptr->fields.pipe_objs.pipe_max_packets > hostpipe_info.max_buffer_depth) { - UNLOCK_ERR_RET(CL_INVALID_ARG_VALUE, context, - "Host accessible pipe max packets size is smaller " - "than cl_pipe requested size"); + ERR_RET(CL_INVALID_ARG_VALUE, context, + "Host accessible pipe max packets size is smaller " + "than cl_pipe requested size"); } found = true; } @@ -547,7 +539,7 @@ CL_API_ENTRY cl_int CL_API_CALL clSetKernelArgIntelFPGA(cl_kernel kernel, // figure out which device at enqueue time pipe_ptr->host_pipe_info->binded = false; } - UNLOCK_RETURN(CL_SUCCESS); + return CL_SUCCESS; } // Now try saving the value. @@ -564,9 +556,8 @@ CL_API_ENTRY cl_int CL_API_CALL clSetKernelArgIntelFPGA(cl_kernel kernel, // creation time, or at system initialization... #ifndef REMOVE_VALID_CHECKS if ((start_idx + iface_arg_size) > kernel->arg_value_size) { - UNLOCK_ERR_RET( - CL_INVALID_KERNEL, context, - "Argument overflows the space allocated for kernel arguments"); + ERR_RET(CL_INVALID_KERNEL, context, + "Argument overflows the space allocated for kernel arguments"); } #endif @@ -594,9 +585,9 @@ CL_API_ENTRY cl_int CL_API_CALL clSetKernelArgIntelFPGA(cl_kernel kernel, .autodiscovery_def.num_global_mem_systems > 1 && !l_check_mem_type_support_on_kernel_arg( kernel, arg_index, ACL_GLOBAL_MEM_DEVICE_PRIVATE)) { - UNLOCK_ERR_RET(CL_INVALID_ARG_VALUE, context, - "cl_mem object was set on kernel argument that doesn't " - "have attribute to access device private memory"); + ERR_RET(CL_INVALID_ARG_VALUE, context, + "cl_mem object was set on kernel argument that doesn't " + "have attribute to access device private memory"); } } @@ -724,7 +715,7 @@ CL_API_ENTRY cl_int CL_API_CALL clSetKernelArgIntelFPGA(cl_kernel kernel, kernel->arg_defined[arg_index] = 1; } - UNLOCK_RETURN(CL_SUCCESS); + return CL_SUCCESS; } ACL_EXPORT @@ -739,22 +730,21 @@ ACL_EXPORT CL_API_ENTRY cl_int CL_API_CALL clSetKernelArgSVMPointerIntelFPGA( cl_kernel kernel, cl_uint arg_index, const void *arg_value) { cl_context context; - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; #ifndef REMOVE_VALID_CHECKS if (!acl_kernel_is_valid(kernel)) { - UNLOCK_RETURN(CL_INVALID_KERNEL); + return CL_INVALID_KERNEL; } context = kernel->program->context; if (arg_index >= kernel->accel_def->iface.args.size()) { - UNLOCK_ERR_RET(CL_INVALID_ARG_INDEX, context, - "Argument index is too large"); + ERR_RET(CL_INVALID_ARG_INDEX, context, "Argument index is too large"); } if (arg_value == NULL) { - UNLOCK_ERR_RET(CL_INVALID_ARG_VALUE, context, "SVM argument is NULL"); + ERR_RET(CL_INVALID_ARG_VALUE, context, "SVM argument is NULL"); } unsigned expected_alignment = @@ -763,13 +753,12 @@ CL_API_ENTRY cl_int CL_API_CALL clSetKernelArgSVMPointerIntelFPGA( expected_alignment ? expected_alignment : ACL_MEM_ALIGN; // For tests if ((uintptr_t)arg_value % expected_alignment != 0) { if (expected_alignment == ACL_MEM_ALIGN) { - UNLOCK_ERR_RET( - CL_INVALID_ARG_VALUE, context, - "SVM argument is not aligned correctly for type. Ensure the " - "kernel argument is targeting the correct buffer location."); + ERR_RET(CL_INVALID_ARG_VALUE, context, + "SVM argument is not aligned correctly for type. Ensure the " + "kernel argument is targeting the correct buffer location."); } else { - UNLOCK_ERR_RET(CL_INVALID_ARG_VALUE, context, - "SVM argument is not aligned correctly for type."); + ERR_RET(CL_INVALID_ARG_VALUE, context, + "SVM argument is not aligned correctly for type."); } } #endif @@ -786,9 +775,8 @@ CL_API_ENTRY cl_int CL_API_CALL clSetKernelArgSVMPointerIntelFPGA( // creation time, or at system initialization... #ifndef REMOVE_VALID_CHECKS if ((start_idx + iface_arg_size) > kernel->arg_value_size) { - UNLOCK_ERR_RET( - CL_INVALID_KERNEL, context, - "Argument overflows the space allocated for kernel arguments"); + ERR_RET(CL_INVALID_KERNEL, context, + "Argument overflows the space allocated for kernel arguments"); } // If the board has both SVM and DGM, make sure kernel argument is SVM cl_bool context_has_device_with_physical_mem = CL_FALSE; @@ -812,9 +800,9 @@ CL_API_ENTRY cl_int CL_API_CALL clSetKernelArgSVMPointerIntelFPGA( 1 && !l_check_mem_type_support_on_kernel_arg( kernel, arg_index, ACL_GLOBAL_MEM_SHARED_VIRTUAL)) { - UNLOCK_ERR_RET(CL_INVALID_ARG_VALUE, context, - "SVM pointer was set on kernel argument that doesn't have " - "attribute to access SVM"); + ERR_RET(CL_INVALID_ARG_VALUE, context, + "SVM pointer was set on kernel argument that doesn't have " + "attribute to access SVM"); } #endif @@ -831,7 +819,7 @@ CL_API_ENTRY cl_int CL_API_CALL clSetKernelArgSVMPointerIntelFPGA( kernel->arg_defined[arg_index] = 1; } - UNLOCK_RETURN(CL_SUCCESS); + return CL_SUCCESS; } ACL_EXPORT @@ -843,17 +831,16 @@ CL_API_ENTRY cl_int CL_API_CALL clSetKernelArgSVMPointer( ACL_EXPORT CL_API_ENTRY cl_int CL_API_CALL clSetKernelArgMemPointerINTEL( cl_kernel kernel, cl_uint arg_index, const void *arg_value) { - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; if (!acl_kernel_is_valid(kernel)) { - UNLOCK_RETURN(CL_INVALID_KERNEL); + return CL_INVALID_KERNEL; } cl_context context = kernel->program->context; if (arg_index >= kernel->accel_def->iface.args.size()) { - UNLOCK_ERR_RET(CL_INVALID_ARG_INDEX, context, - "Argument index is too large"); + ERR_RET(CL_INVALID_ARG_INDEX, context, "Argument index is too large"); } // Determine where to write the value. @@ -866,9 +853,8 @@ CL_API_ENTRY cl_int CL_API_CALL clSetKernelArgMemPointerINTEL( // creation time, or at system initialization... #ifndef REMOVE_VALID_CHECKS if ((start_idx + iface_arg_size) > kernel->arg_value_size) { - UNLOCK_ERR_RET( - CL_INVALID_KERNEL, context, - "Argument overflows the space allocated for kernel arguments"); + ERR_RET(CL_INVALID_KERNEL, context, + "Argument overflows the space allocated for kernel arguments"); } unsigned expected_alignment = @@ -877,20 +863,20 @@ CL_API_ENTRY cl_int CL_API_CALL clSetKernelArgMemPointerINTEL( expected_alignment ? expected_alignment : ACL_MEM_ALIGN; // For tests if ((uintptr_t)arg_value % expected_alignment != 0) { if (expected_alignment == ACL_MEM_ALIGN) { - UNLOCK_ERR_RET( + ERR_RET( CL_INVALID_ARG_VALUE, context, "Pointer argument is not aligned correctly for type. If you are " "using unified shared memory compile the kernel with the -usm flag."); } else { - UNLOCK_ERR_RET(CL_INVALID_ARG_VALUE, context, - "Pointer argument is not aligned correctly for type."); + ERR_RET(CL_INVALID_ARG_VALUE, context, + "Pointer argument is not aligned correctly for type."); } } if (!acl_usm_ptr_belongs_to_context(context, arg_value)) { - UNLOCK_ERR_RET(CL_INVALID_ARG_VALUE, context, - "Pointer argument is not allocated using USM or not " - "allocated in correct context."); + ERR_RET(CL_INVALID_ARG_VALUE, context, + "Pointer argument is not allocated using USM or not " + "allocated in correct context."); } // Ensure the USM allocation (arg_value) is compatible with what the kernel @@ -914,9 +900,9 @@ CL_API_ENTRY cl_int CL_API_CALL clSetKernelArgMemPointerINTEL( acl_usm_allocation_t *usm_alloc = acl_get_usm_alloc_from_ptr(context, arg_value); if (usm_alloc == nullptr) { - UNLOCK_ERR_RET(CL_INVALID_ARG_VALUE, context, - "Pointer argument is not allocated using USM or not " - "allocated in correct context."); + ERR_RET(CL_INVALID_ARG_VALUE, context, + "Pointer argument is not allocated using USM or not " + "allocated in correct context."); } // Try to find the memory interface that corresponds to this allocation. @@ -962,29 +948,29 @@ CL_API_ENTRY cl_int CL_API_CALL clSetKernelArgMemPointerINTEL( ACL_GLOBAL_MEM_DEVICE_ALLOCATION)) { if (kernel_arg_mem->allocation_type & ACL_GLOBAL_MEM_HOST_ALLOCATION) { // Host not compatible with device memory. - UNLOCK_ERR_RET(CL_INVALID_ARG_VALUE, context, - "Argument expects host allocation but pointer is to " - "USM device memory"); + ERR_RET(CL_INVALID_ARG_VALUE, context, + "Argument expects host allocation but pointer is to " + "USM device memory"); } else if (kernel_arg_mem->allocation_type & ACL_GLOBAL_MEM_SHARED_ALLOCATION) { // Shared not compatible with device memory. - UNLOCK_ERR_RET(CL_INVALID_ARG_VALUE, context, - "Argument expects shared allocation but pointer is to " - "USM device memory"); + ERR_RET(CL_INVALID_ARG_VALUE, context, + "Argument expects shared allocation but pointer is to " + "USM device memory"); } } else if (allocation_mem != kernel_arg_mem) { - UNLOCK_ERR_RET(CL_INVALID_ARG_VALUE, context, - "Possibly incompatible interface used for device memory " - "allocation."); + ERR_RET(CL_INVALID_ARG_VALUE, context, + "Possibly incompatible interface used for device memory " + "allocation."); } } else if (usm_alloc->type == CL_MEM_TYPE_SHARED_INTEL) { if (!(kernel_arg_mem->allocation_type & ACL_GLOBAL_MEM_SHARED_ALLOCATION)) { if (kernel_arg_mem->allocation_type & ACL_GLOBAL_MEM_DEVICE_ALLOCATION) { - UNLOCK_ERR_RET(CL_INVALID_ARG_VALUE, context, - "Argument expects device allocation but pointer is to " - "USM shared memory"); + ERR_RET(CL_INVALID_ARG_VALUE, context, + "Argument expects device allocation but pointer is to " + "USM shared memory"); } else if (kernel_arg_mem->allocation_type & ACL_GLOBAL_MEM_HOST_ALLOCATION) { bool compatible = false; @@ -1000,23 +986,23 @@ CL_API_ENTRY cl_int CL_API_CALL clSetKernelArgMemPointerINTEL( } } if (!compatible) { - UNLOCK_ERR_RET(CL_INVALID_ARG_VALUE, context, - "Argument expects host allocation but pointer is to " - "USM shared memory"); + ERR_RET(CL_INVALID_ARG_VALUE, context, + "Argument expects host allocation but pointer is to " + "USM shared memory"); } } } else if (allocation_mem != kernel_arg_mem) { - UNLOCK_ERR_RET(CL_INVALID_ARG_VALUE, context, - "Possibly incompatible interface used for shared memory " - "allocation."); + ERR_RET(CL_INVALID_ARG_VALUE, context, + "Possibly incompatible interface used for shared memory " + "allocation."); } } else if (usm_alloc->type == CL_MEM_TYPE_HOST_INTEL) { if (!(kernel_arg_mem->allocation_type & ACL_GLOBAL_MEM_HOST_ALLOCATION)) { if (kernel_arg_mem->allocation_type & ACL_GLOBAL_MEM_DEVICE_ALLOCATION) { - UNLOCK_ERR_RET(CL_INVALID_ARG_VALUE, context, - "Argument expects device allocation but pointer is to " - "USM host memory"); + ERR_RET(CL_INVALID_ARG_VALUE, context, + "Argument expects device allocation but pointer is to " + "USM host memory"); } else if (kernel_arg_mem->allocation_type & ACL_GLOBAL_MEM_SHARED_ALLOCATION) { bool compatible = false; @@ -1032,13 +1018,13 @@ CL_API_ENTRY cl_int CL_API_CALL clSetKernelArgMemPointerINTEL( } } if (!compatible) { - UNLOCK_ERR_RET(CL_INVALID_ARG_VALUE, context, - "Argument expects shared allocation but pointer is " - "to USM host memory"); + ERR_RET(CL_INVALID_ARG_VALUE, context, + "Argument expects shared allocation but pointer is " + "to USM host memory"); } } } else if (allocation_mem != kernel_arg_mem) { - UNLOCK_ERR_RET( + ERR_RET( CL_INVALID_ARG_VALUE, context, "Possibly incompatible interface used for host memory allocation."); } @@ -1073,7 +1059,7 @@ CL_API_ENTRY cl_int CL_API_CALL clSetKernelArgMemPointerINTEL( } kernel->ptr_arg_vector[arg_index] = usm_alloc->range.begin; - UNLOCK_RETURN(CL_SUCCESS); + return CL_SUCCESS; } ACL_EXPORT @@ -1083,77 +1069,72 @@ clSetKernelExecInfoIntelFPGA(cl_kernel kernel, cl_kernel_exec_info param_name, cl_context context; cl_int status = CL_SUCCESS; size_t iparam; - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; if (!acl_kernel_is_valid(kernel)) { - UNLOCK_RETURN(CL_INVALID_KERNEL); + return CL_INVALID_KERNEL; } context = kernel->program->context; if (param_value == NULL) - UNLOCK_ERR_RET(CL_INVALID_VALUE, context, "param_value cannot be NULL"); + ERR_RET(CL_INVALID_VALUE, context, "param_value cannot be NULL"); switch (param_name) { case CL_KERNEL_EXEC_INFO_SVM_PTRS: { iparam = 0; // param_value_size must be a coefficient of sizeof(void*) if (param_value_size % sizeof(void *) != 0) - UNLOCK_ERR_RET(CL_INVALID_VALUE, context, - "param_value_size is not valid"); + ERR_RET(CL_INVALID_VALUE, context, "param_value_size is not valid"); // The pointers must be valid svm pointers or svm pointers + offset into the // SVM region. for (iparam = 0; iparam < param_value_size / (sizeof(void *)); iparam++) { if (!acl_ptr_is_contained_in_context_svm( context, ((void **)param_value)[iparam])) { - UNLOCK_ERR_RET(CL_INVALID_VALUE, context, - "param_value contains a pointer that is not contained " - "in the SVM region"); + ERR_RET(CL_INVALID_VALUE, context, + "param_value contains a pointer that is not contained " + "in the SVM region"); } } break; } case CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM: if (param_value_size != sizeof(cl_bool)) - UNLOCK_ERR_RET(CL_INVALID_VALUE, context, - "param_value_size is not valid"); + ERR_RET(CL_INVALID_VALUE, context, "param_value_size is not valid"); // We currently don't support any fine-grain system SVM: if (*(cl_bool *)param_value == CL_TRUE) - UNLOCK_ERR_RET(CL_INVALID_OPERATION, context, - "No devices in context associated with " - "kernel support fine-grain system SVM allocations"); + ERR_RET(CL_INVALID_OPERATION, context, + "No devices in context associated with " + "kernel support fine-grain system SVM allocations"); break; case CL_KERNEL_EXEC_INFO_INDIRECT_HOST_ACCESS_INTEL: if (param_value_size != sizeof(cl_bool)) - UNLOCK_ERR_RET(CL_INVALID_VALUE, context, - "param_value_size is not valid"); + ERR_RET(CL_INVALID_VALUE, context, "param_value_size is not valid"); if (*((cl_bool *)param_value) != CL_TRUE && *((cl_bool *)param_value) != CL_FALSE) { - UNLOCK_ERR_RET(CL_INVALID_VALUE, context, - "param_value is not valid cl_bool value"); + ERR_RET(CL_INVALID_VALUE, context, + "param_value is not valid cl_bool value"); } break; case CL_KERNEL_EXEC_INFO_INDIRECT_DEVICE_ACCESS_INTEL: if (param_value_size != sizeof(cl_bool)) - UNLOCK_ERR_RET(CL_INVALID_VALUE, context, - "param_value_size is not valid"); + ERR_RET(CL_INVALID_VALUE, context, "param_value_size is not valid"); if (*((cl_bool *)param_value) != CL_TRUE && *((cl_bool *)param_value) != CL_FALSE) { - UNLOCK_ERR_RET(CL_INVALID_VALUE, context, - "param_value is not valid cl_bool value"); + ERR_RET(CL_INVALID_VALUE, context, + "param_value is not valid cl_bool value"); } break; case CL_KERNEL_EXEC_INFO_INDIRECT_SHARED_ACCESS_INTEL: if (param_value_size != sizeof(cl_bool)) - UNLOCK_ERR_RET(CL_INVALID_VALUE, context, - "param_value_size is not valid"); + ERR_RET(CL_INVALID_VALUE, context, "param_value_size is not valid"); if (*((cl_bool *)param_value) != CL_TRUE && *((cl_bool *)param_value) != CL_FALSE) { - UNLOCK_ERR_RET(CL_INVALID_VALUE, context, - "param_value is not valid cl_bool value"); + ERR_RET(CL_INVALID_VALUE, context, + "param_value is not valid cl_bool value"); } break; case CL_KERNEL_EXEC_INFO_USM_PTRS_INTEL: @@ -1161,26 +1142,24 @@ clSetKernelExecInfoIntelFPGA(cl_kernel kernel, cl_kernel_exec_info param_name, kernel->ptr_hashtable.clear(); // param_value_size must be a coefficient of sizeof(void*) if (param_value_size % sizeof(void *) != 0) - UNLOCK_ERR_RET(CL_INVALID_VALUE, context, - "param_value_size is not valid"); + ERR_RET(CL_INVALID_VALUE, context, "param_value_size is not valid"); // The pointers must be valid device pointer for (iparam = 0; iparam < param_value_size / (sizeof(void *)); iparam++) { acl_usm_allocation_t *usm_alloc = acl_get_usm_alloc_from_ptr(context, ((void **)param_value)[iparam]); if (!usm_alloc) { - UNLOCK_ERR_RET( - CL_INVALID_VALUE, context, - "param_value contains a pointer that is not part of context"); + ERR_RET(CL_INVALID_VALUE, context, + "param_value contains a pointer that is not part of context"); } kernel->ptr_hashtable.insert(usm_alloc->range.begin); } break; default: - UNLOCK_ERR_RET(CL_INVALID_VALUE, context, "Invalid param_name"); + ERR_RET(CL_INVALID_VALUE, context, "Invalid param_name"); } - UNLOCK_RETURN(status); + return status; } ACL_EXPORT @@ -1202,19 +1181,19 @@ CL_API_ENTRY cl_int CL_API_CALL clGetKernelArgInfoIntelFPGA( cl_context context; cl_program program; - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; if (!acl_kernel_is_valid(kernel)) { - UNLOCK_RETURN(CL_INVALID_KERNEL); + return CL_INVALID_KERNEL; } program = kernel->program; context = program->context; - UNLOCK_VALIDATE_ARRAY_OUT_ARGS(param_value_size, param_value, - param_value_size_ret, context); + VALIDATE_ARRAY_OUT_ARGS(param_value_size, param_value, param_value_size_ret, + context); if (arg_indx >= kernel->accel_def->iface.args.size()) - UNLOCK_ERR_RET(CL_INVALID_ARG_INDEX, context, "Invalid kernel arg index."); + ERR_RET(CL_INVALID_ARG_INDEX, context, "Invalid kernel arg index."); // addr_space and type_qualifier is always available via autodiscovery, the // other three parameters are optionally loaded in the autodiscovery string, // therefore any one of the three parameter being empty infers information not @@ -1222,15 +1201,15 @@ CL_API_ENTRY cl_int CL_API_CALL clGetKernelArgInfoIntelFPGA( if ((kernel->accel_def->iface.args[arg_indx].name.empty()) && !(param_name == CL_KERNEL_ARG_ADDRESS_QUALIFIER || param_name == CL_KERNEL_ARG_TYPE_QUALIFIER)) - UNLOCK_ERR_RET(CL_KERNEL_ARG_INFO_NOT_AVAILABLE, context, - "Kernel arg info not available."); + ERR_RET(CL_KERNEL_ARG_INFO_NOT_AVAILABLE, context, + "Kernel arg info not available."); // filtering the arguments that are added by the compiler to handle printfs. // In such cases, the arguments won't have any type, hence the type_name is // empty. if (!kernel->accel_def->iface.args[arg_indx].name.empty() && kernel->accel_def->iface.args[arg_indx].type_name.empty()) - UNLOCK_ERR_RET(CL_INVALID_ARG_INDEX, context, "Invalid kernel arg index."); + ERR_RET(CL_INVALID_ARG_INDEX, context, "Invalid kernel arg index."); RESULT_INIT; @@ -1270,17 +1249,17 @@ CL_API_ENTRY cl_int CL_API_CALL clGetKernelArgInfoIntelFPGA( break; default: - UNLOCK_ERR_RET(CL_INVALID_VALUE, context, "Invalid kernel arg info query"); + ERR_RET(CL_INVALID_VALUE, context, "Invalid kernel arg info query"); } if (result.size == 0) { - UNLOCK_RETURN(CL_INVALID_VALUE); + return CL_INVALID_VALUE; } // should have already signaled if (param_value) { if (param_value_size < result.size) { - UNLOCK_ERR_RET(CL_INVALID_VALUE, context, - "Parameter return buffer is too small"); + ERR_RET(CL_INVALID_VALUE, context, + "Parameter return buffer is too small"); } RESULT_COPY(param_value, param_value_size); } @@ -1288,7 +1267,7 @@ CL_API_ENTRY cl_int CL_API_CALL clGetKernelArgInfoIntelFPGA( if (param_value_size_ret) { *param_value_size_ret = result.size; } - UNLOCK_RETURN(CL_SUCCESS); + return CL_SUCCESS; } ACL_EXPORT @@ -1306,15 +1285,15 @@ CL_API_ENTRY cl_int CL_API_CALL clGetKernelInfoIntelFPGA( void *param_value, size_t *param_value_size_ret) { acl_result_t result; cl_context context; - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; if (!acl_kernel_is_valid(kernel)) { - UNLOCK_RETURN(CL_INVALID_KERNEL); + return CL_INVALID_KERNEL; } context = kernel->program->context; - UNLOCK_VALIDATE_ARRAY_OUT_ARGS(param_value_size, param_value, - param_value_size_ret, context); + VALIDATE_ARRAY_OUT_ARGS(param_value_size, param_value, param_value_size_ret, + context); RESULT_INIT; @@ -1339,17 +1318,17 @@ CL_API_ENTRY cl_int CL_API_CALL clGetKernelInfoIntelFPGA( break; default: - UNLOCK_ERR_RET(CL_INVALID_VALUE, context, "Invalid kernel info query"); + ERR_RET(CL_INVALID_VALUE, context, "Invalid kernel info query"); } if (result.size == 0) { - UNLOCK_RETURN(CL_INVALID_VALUE); + return CL_INVALID_VALUE; } // should have already signaled if (param_value) { if (param_value_size < result.size) { - UNLOCK_ERR_RET(CL_INVALID_VALUE, context, - "Parameter return buffer is too small"); + ERR_RET(CL_INVALID_VALUE, context, + "Parameter return buffer is too small"); } RESULT_COPY(param_value, param_value_size); } @@ -1357,7 +1336,7 @@ CL_API_ENTRY cl_int CL_API_CALL clGetKernelInfoIntelFPGA( if (param_value_size_ret) { *param_value_size_ret = result.size; } - UNLOCK_RETURN(CL_SUCCESS); + return CL_SUCCESS; } ACL_EXPORT @@ -1376,10 +1355,10 @@ CL_API_ENTRY cl_int CL_API_CALL clGetKernelWorkGroupInfoIntelFPGA( size_t param_value_size, void *param_value, size_t *param_value_size_ret) { acl_result_t result; cl_context context; - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; if (!acl_kernel_is_valid(kernel)) { - UNLOCK_RETURN(CL_INVALID_KERNEL); + return CL_INVALID_KERNEL; } context = kernel->program->context; @@ -1396,22 +1375,21 @@ CL_API_ENTRY cl_int CL_API_CALL clGetKernelWorkGroupInfoIntelFPGA( } } if (!matched) { - UNLOCK_ERR_RET(CL_INVALID_DEVICE, context, - "Kernel program is not built for the specified device"); + ERR_RET(CL_INVALID_DEVICE, context, + "Kernel program is not built for the specified device"); } } else { // Must only be one device for this kernel. if (kernel->program->num_devices != 1) { - UNLOCK_ERR_RET( - CL_INVALID_DEVICE, context, - "Device is not specified, but kernel is not built for a unique " - "device"); + ERR_RET(CL_INVALID_DEVICE, context, + "Device is not specified, but kernel is not built for a unique " + "device"); } device = kernel->program->device[0]; } - UNLOCK_VALIDATE_ARRAY_OUT_ARGS(param_value_size, param_value, - param_value_size_ret, context); + VALIDATE_ARRAY_OUT_ARGS(param_value_size, param_value, param_value_size_ret, + context); RESULT_INIT; @@ -1425,7 +1403,7 @@ CL_API_ENTRY cl_int CL_API_CALL clGetKernelWorkGroupInfoIntelFPGA( acl_platform.max_work_item_sizes); break; } else { - UNLOCK_RETURN(CL_INVALID_VALUE); + return CL_INVALID_VALUE; } case CL_KERNEL_WORK_GROUP_SIZE: RESULT_SIZE_T(kernel->accel_def->max_work_group_size); @@ -1445,17 +1423,17 @@ CL_API_ENTRY cl_int CL_API_CALL clGetKernelWorkGroupInfoIntelFPGA( RESULT_ULONG(0); break; default: - UNLOCK_ERR_RET(CL_INVALID_VALUE, context, "Invalid kernel info query"); + ERR_RET(CL_INVALID_VALUE, context, "Invalid kernel info query"); } if (result.size == 0) { - UNLOCK_RETURN(CL_INVALID_VALUE); + return CL_INVALID_VALUE; } // already signalled. if (param_value) { if (param_value_size < result.size) { - UNLOCK_ERR_RET(CL_INVALID_VALUE, context, - "Parameter return buffer is too small"); + ERR_RET(CL_INVALID_VALUE, context, + "Parameter return buffer is too small"); } RESULT_COPY(param_value, param_value_size); } @@ -1463,7 +1441,7 @@ CL_API_ENTRY cl_int CL_API_CALL clGetKernelWorkGroupInfoIntelFPGA( if (param_value_size_ret) { *param_value_size_ret = result.size; } - UNLOCK_RETURN(CL_SUCCESS); + return CL_SUCCESS; } ACL_EXPORT @@ -1481,10 +1459,10 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueNativeKernelIntelFPGA( size_t cb_args, cl_uint num_mem_objects, const cl_mem *mem_list, const void **args_mem_loc, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event) { - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; if (!acl_command_queue_is_valid(command_queue)) { - UNLOCK_RETURN(CL_INVALID_COMMAND_QUEUE); + return CL_INVALID_COMMAND_QUEUE; } // Avoid warnings @@ -1500,8 +1478,8 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueNativeKernelIntelFPGA( event = event; // We don't support native kernels. - UNLOCK_ERR_RET(CL_INVALID_OPERATION, command_queue->context, - "Native kernels are not supported."); + ERR_RET(CL_INVALID_OPERATION, command_queue->context, + "Native kernels are not supported."); } ACL_EXPORT @@ -1523,7 +1501,7 @@ clEnqueueTaskIntelFPGA(cl_command_queue command_queue, cl_kernel kernel, size_t task_global_work_size = 1; size_t task_local_work_size = 1; cl_int ret; - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; ret = l_enqueue_kernel_with_type( command_queue, kernel, @@ -1531,7 +1509,7 @@ clEnqueueTaskIntelFPGA(cl_command_queue command_queue, cl_kernel kernel, 0, // global work offset &task_global_work_size, &task_local_work_size, num_events_in_wait_list, event_wait_list, event, CL_COMMAND_TASK); - UNLOCK_RETURN(ret); + return ret; } ACL_EXPORT @@ -1551,14 +1529,14 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueNDRangeKernelIntelFPGA( const size_t *local_work_size, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event) { cl_int ret; - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; ret = l_enqueue_kernel_with_type( command_queue, kernel, work_dim, global_work_offset, global_work_size, local_work_size, num_events_in_wait_list, event_wait_list, event, CL_COMMAND_NDRANGE_KERNEL); - UNLOCK_RETURN(ret); + return ret; } ACL_EXPORT @@ -1577,13 +1555,13 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueNDRangeKernel( ACL_EXPORT CL_API_ENTRY cl_int CL_API_CALL clResetKernelsIntelFPGA( cl_context context, cl_uint num_devices, const cl_device_id *device_list) { - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; if (!acl_context_is_valid(context)) - UNLOCK_RETURN(CL_INVALID_CONTEXT); + return CL_INVALID_CONTEXT; if (num_devices == 0 && device_list != NULL) - UNLOCK_ERR_RET(CL_INVALID_VALUE, context, - "num_devices is 0 while device list is not NULL"); + ERR_RET(CL_INVALID_VALUE, context, + "num_devices is 0 while device list is not NULL"); if (device_list) { // The supplied devices must be associated with the context. cl_uint idev, ictxdev; @@ -1596,8 +1574,8 @@ CL_API_ENTRY cl_int CL_API_CALL clResetKernelsIntelFPGA( saw_it = (context->device[ictxdev] == device_list[idev]); } if (!saw_it) { - UNLOCK_ERR_RET(CL_INVALID_DEVICE, context, - "A specified device is not associated with the context"); + ERR_RET(CL_INVALID_DEVICE, context, + "A specified device is not associated with the context"); } } // Ok, each device is associated with the context. @@ -1618,7 +1596,7 @@ CL_API_ENTRY cl_int CL_API_CALL clResetKernelsIntelFPGA( } acl_idle_update(context); // nudge the scheduler to take care of the rest. - UNLOCK_RETURN(CL_SUCCESS); + return CL_SUCCESS; } ////////////////////////////// @@ -3195,12 +3173,12 @@ void acl_receive_kernel_update(int activation_id, cl_int status) { // signal handler, which can't lock mutexes, so we don't lock in that case. // All functions called from this one therefore have to use // acl_assert_locked_or_sig() instead of just acl_assert_locked(). + std::unique_lock lock{acl_mutex_wrapper, std::defer_lock}; if (!acl_is_inside_sig()) { - acl_lock(); + lock.lock(); } if (activation_id >= 0 && activation_id < doq->max_ops) { - // This address is stable, given a fixed activation_id. // So we don't run into race conditions. acl_device_op_t *op = doq->op + activation_id; @@ -3219,10 +3197,6 @@ void acl_receive_kernel_update(int activation_id, cl_int status) { // Signal all waiters. acl_signal_device_update(); } - - if (!acl_is_inside_sig()) { - acl_unlock(); - } } // The kernel invocation has completed. diff --git a/src/acl_mem.cpp b/src/acl_mem.cpp index c9287536..00e0f4af 100644 --- a/src/acl_mem.cpp +++ b/src/acl_mem.cpp @@ -118,7 +118,7 @@ void CL_CALLBACK acl_free_allocation_after_event_completion( event_command_exec_status = event_command_exec_status; // Avoiding Windows warning. event = event; - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; if (callback_ptrs[0]) { acl_mem_aligned_free(event->context, (acl_aligned_ptr_t *)callback_ptrs[0]); acl_free(callback_ptrs[0]); @@ -126,7 +126,6 @@ void CL_CALLBACK acl_free_allocation_after_event_completion( if (callback_ptrs[1]) clReleaseEvent(((cl_event)callback_ptrs[1])); acl_free(callback_data); - acl_unlock(); } ACL_DEFINE_CL_OBJECT_ALLOC_FUNCTIONS(cl_mem); @@ -136,17 +135,17 @@ ACL_DEFINE_CL_OBJECT_ALLOC_FUNCTIONS(cl_mem); ACL_EXPORT CL_API_ENTRY cl_int CL_API_CALL clRetainMemObjectIntelFPGA(cl_mem mem) { - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; if (!acl_mem_is_valid(mem)) { - UNLOCK_RETURN(CL_INVALID_MEM_OBJECT); + return CL_INVALID_MEM_OBJECT; } acl_retain(mem); acl_print_debug_msg("Retain mem[%p] now %u\n", mem, acl_ref_count(mem)); - UNLOCK_RETURN(CL_SUCCESS); + return CL_SUCCESS; } ACL_EXPORT @@ -156,13 +155,13 @@ CL_API_ENTRY cl_int CL_API_CALL clRetainMemObject(cl_mem mem) { ACL_EXPORT CL_API_ENTRY cl_int CL_API_CALL clReleaseMemObjectIntelFPGA(cl_mem mem) { - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; // In the double-free case, we'll error out here, for two reasons: // 1) the reference count will be 0. // 1) mem->region == 0 if (!acl_mem_is_valid(mem)) { - UNLOCK_RETURN(CL_INVALID_MEM_OBJECT); + return CL_INVALID_MEM_OBJECT; } acl_release(mem); @@ -249,7 +248,7 @@ CL_API_ENTRY cl_int CL_API_CALL clReleaseMemObjectIntelFPGA(cl_mem mem) { clReleaseContext(context); } - UNLOCK_RETURN(CL_SUCCESS); + return CL_SUCCESS; } ACL_EXPORT @@ -325,19 +324,19 @@ CL_API_ENTRY cl_int clSetMemObjectDestructorCallbackIntelFPGA( void(CL_CALLBACK *pfn_notify)(cl_mem memobj, void *user_data), void *user_data) { acl_mem_destructor_user_callback *cb; - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; if (!acl_mem_is_valid(memobj)) { - UNLOCK_RETURN(CL_INVALID_MEM_OBJECT); + return CL_INVALID_MEM_OBJECT; } if (pfn_notify == NULL) { - UNLOCK_RETURN(CL_INVALID_VALUE); + return CL_INVALID_VALUE; } cb = (acl_mem_destructor_user_callback *)acl_malloc( sizeof(acl_mem_destructor_user_callback)); if (!cb) - UNLOCK_RETURN(CL_OUT_OF_HOST_MEMORY); + return CL_OUT_OF_HOST_MEMORY; // Push to the front of the list. cb->notify_user_data = user_data; @@ -345,7 +344,7 @@ CL_API_ENTRY cl_int clSetMemObjectDestructorCallbackIntelFPGA( cb->next = memobj->destructor_callback_list; memobj->destructor_callback_list = cb; - UNLOCK_RETURN(CL_SUCCESS); + return CL_SUCCESS; } ACL_EXPORT @@ -421,7 +420,7 @@ CL_API_ENTRY cl_mem clCreateBufferWithPropertiesINTEL( unsigned int idevice; cl_uint bank_id = 0; cl_uint tmp_mem_id = 0; - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; #ifdef MEM_DEBUG_MSG printf("CreateBuffer\n"); @@ -431,8 +430,8 @@ CL_API_ENTRY cl_mem clCreateBufferWithPropertiesINTEL( switch (*properties) { case CL_MEM_CHANNEL_INTEL: { if (flags & CL_CHANNEL_7_INTELFPGA) { - UNLOCK_BAIL_INFO(CL_INVALID_DEVICE, context, - "Both channel flag and channel property are set"); + BAIL_INFO(CL_INVALID_DEVICE, context, + "Both channel flag and channel property are set"); } bank_id = (cl_uint) * (properties + 1); } break; @@ -440,7 +439,7 @@ CL_API_ENTRY cl_mem clCreateBufferWithPropertiesINTEL( tmp_mem_id = (cl_uint) * (properties + 1); } break; default: { - UNLOCK_BAIL_INFO(CL_INVALID_DEVICE, context, "Invalid properties"); + BAIL_INFO(CL_INVALID_DEVICE, context, "Invalid properties"); } } properties += 2; @@ -448,11 +447,10 @@ CL_API_ENTRY cl_mem clCreateBufferWithPropertiesINTEL( #ifndef REMOVE_VALID_CHECKS if (!acl_context_is_valid(context)) - UNLOCK_BAIL(CL_INVALID_CONTEXT); + BAIL(CL_INVALID_CONTEXT); if (bank_id > 7) { - UNLOCK_BAIL_INFO(CL_INVALID_VALUE, context, - "Invalid channel property value"); + BAIL_INFO(CL_INVALID_VALUE, context, "Invalid channel property value"); } // Check flags @@ -463,8 +461,7 @@ CL_API_ENTRY cl_mem clCreateBufferWithPropertiesINTEL( CL_MEM_COPY_HOST_PTR | CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_NO_ACCESS | CL_CHANNEL_7_INTELFPGA | CL_MEM_HETEROGENEOUS_INTELFPGA)) { - UNLOCK_BAIL_INFO(CL_INVALID_VALUE, context, - "Invalid or unsupported flags"); + BAIL_INFO(CL_INVALID_VALUE, context, "Invalid or unsupported flags"); } { @@ -478,8 +475,8 @@ CL_API_ENTRY cl_mem clCreateBufferWithPropertiesINTEL( num_rw_specs++; // Default to CL_MEM_READ_WRITE. if (num_rw_specs > 1) { - UNLOCK_BAIL_INFO(CL_INVALID_VALUE, context, - "More than one read/write flag is specified"); + BAIL_INFO(CL_INVALID_VALUE, context, + "More than one read/write flag is specified"); } if (num_rw_specs == 0) flags |= CL_MEM_READ_WRITE; @@ -493,7 +490,7 @@ CL_API_ENTRY cl_mem clCreateBufferWithPropertiesINTEL( if (flags & CL_MEM_HOST_NO_ACCESS) num_rw_specs++; if (num_rw_specs > 1) { - UNLOCK_BAIL_INFO( + BAIL_INFO( CL_INVALID_VALUE, context, "More than one host read/write/no_access flags are specified"); } @@ -501,65 +498,60 @@ CL_API_ENTRY cl_mem clCreateBufferWithPropertiesINTEL( // Check exclusion between use-host-ptr and others if ((flags & CL_MEM_USE_HOST_PTR) && (flags & CL_MEM_ALLOC_HOST_PTR)) { - UNLOCK_BAIL_INFO( - CL_INVALID_VALUE, context, - "Flags CL_MEM_USE_HOST_PTR and CL_MEM_ALLOC_HOST_PTR are both " - "specified but are mutually exclusive"); + BAIL_INFO(CL_INVALID_VALUE, context, + "Flags CL_MEM_USE_HOST_PTR and CL_MEM_ALLOC_HOST_PTR are both " + "specified but are mutually exclusive"); } if ((flags & CL_MEM_USE_HOST_PTR) && (flags & CL_MEM_COPY_HOST_PTR)) { - UNLOCK_BAIL_INFO( - CL_INVALID_VALUE, context, - "Flags CL_MEM_USE_HOST_PTR and CL_MEM_COPY_HOST_PTR are both " - "specified but are mutually exclusive"); + BAIL_INFO(CL_INVALID_VALUE, context, + "Flags CL_MEM_USE_HOST_PTR and CL_MEM_COPY_HOST_PTR are both " + "specified but are mutually exclusive"); } } // Check host_ptr if (host_ptr == 0 && (flags & CL_MEM_USE_HOST_PTR)) { - UNLOCK_BAIL_INFO( - CL_INVALID_HOST_PTR, context, - "Flag CL_MEM_USE_HOST_PTR is specified, but no host pointer is " - "provided"); + BAIL_INFO(CL_INVALID_HOST_PTR, context, + "Flag CL_MEM_USE_HOST_PTR is specified, but no host pointer is " + "provided"); } if (host_ptr == 0 && (flags & CL_MEM_COPY_HOST_PTR)) { - UNLOCK_BAIL_INFO( - CL_INVALID_HOST_PTR, context, - "Flag CL_MEM_COPY_HOST_PTR is specified, but no host pointer is " - "provided"); + BAIL_INFO(CL_INVALID_HOST_PTR, context, + "Flag CL_MEM_COPY_HOST_PTR is specified, but no host pointer is " + "provided"); } if (host_ptr != 0 && !(flags & (CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR))) { - UNLOCK_BAIL_INFO( - CL_INVALID_HOST_PTR, context, - "A host pointer is provided without also specifying one of " - "CL_MEM_USE_HOST_PTR or CL_MEM_COPY_HOST_PTR"); + BAIL_INFO(CL_INVALID_HOST_PTR, context, + "A host pointer is provided without also specifying one of " + "CL_MEM_USE_HOST_PTR or CL_MEM_COPY_HOST_PTR"); } // Check size if (size == 0) { - UNLOCK_BAIL_INFO(CL_INVALID_BUFFER_SIZE, context, - "Memory buffer cannot be of size zero"); + BAIL_INFO(CL_INVALID_BUFFER_SIZE, context, + "Memory buffer cannot be of size zero"); } // If using host memory, then just accept any size. if (!(flags & CL_MEM_USE_HOST_PTR) && (size > context->max_mem_alloc_size)) { - UNLOCK_BAIL_INFO(CL_INVALID_BUFFER_SIZE, context, - "Requested memory object size exceeds device limits"); + BAIL_INFO(CL_INVALID_BUFFER_SIZE, context, + "Requested memory object size exceeds device limits"); } #endif auto *new_block = acl_new(); if (!new_block) { - UNLOCK_BAIL_INFO(CL_OUT_OF_HOST_MEMORY, context, - "Could not allocate a cl_mem object"); + BAIL_INFO(CL_OUT_OF_HOST_MEMORY, context, + "Could not allocate a cl_mem object"); } // Now actually allocate the mem object. mem = acl_alloc_cl_mem(); if (!mem) { acl_delete(new_block); - UNLOCK_BAIL_INFO(CL_OUT_OF_HOST_MEMORY, context, - "Could not allocate a cl_mem object"); + BAIL_INFO(CL_OUT_OF_HOST_MEMORY, context, + "Could not allocate a cl_mem object"); } mem->mem_id = tmp_mem_id; @@ -633,11 +625,10 @@ CL_API_ENTRY cl_mem clCreateBufferWithPropertiesINTEL( context_has_device_with_physical_mem) { acl_delete(mem->block_allocation); acl_free_cl_mem(mem); - UNLOCK_BAIL_INFO( - CL_MEM_OBJECT_ALLOCATION_FAILURE, context, - "Detected devices with only SVM and on-board memory in the same " - "context. Altera does not currently support this combination and " - "cannot allocate requested memory object."); + BAIL_INFO(CL_MEM_OBJECT_ALLOCATION_FAILURE, context, + "Detected devices with only SVM and on-board memory in the same " + "context. Altera does not currently support this combination and " + "cannot allocate requested memory object."); } else { mem->is_svm = CL_FALSE; } @@ -659,7 +650,7 @@ CL_API_ENTRY cl_mem clCreateBufferWithPropertiesINTEL( ptr.size = size; mem->host_mem = ptr; } else { - UNLOCK_BAIL_INFO( + BAIL_INFO( CL_INVALID_HOST_PTR, context, "On a system that only supports SVM and does not support " "fine-grained system SVM, " @@ -743,8 +734,8 @@ CL_API_ENTRY cl_mem clCreateBufferWithPropertiesINTEL( if (mem->host_mem.raw == 0) { acl_free_cl_mem(mem); - UNLOCK_BAIL_INFO(CL_OUT_OF_HOST_MEMORY, context, - "Could not allocate a buffer in host memory"); + BAIL_INFO(CL_OUT_OF_HOST_MEMORY, context, + "Could not allocate a buffer in host memory"); } mem->block_allocation->range.begin = mem->host_mem.aligned_ptr; mem->block_allocation->range.next = @@ -784,9 +775,8 @@ CL_API_ENTRY cl_mem clCreateBufferWithPropertiesINTEL( if (mem->host_mem.raw == 0) { acl_delete(mem->block_allocation); acl_free_cl_mem(mem); - UNLOCK_BAIL_INFO( - CL_OUT_OF_HOST_MEMORY, context, - "Could not allocate backing store for a device buffer"); + BAIL_INFO(CL_OUT_OF_HOST_MEMORY, context, + "Could not allocate backing store for a device buffer"); } } @@ -829,10 +819,9 @@ CL_API_ENTRY cl_mem clCreateBufferWithPropertiesINTEL( : CL_OUT_OF_RESOURCES; acl_delete(mem->block_allocation); acl_free_cl_mem(mem); - UNLOCK_BAIL_INFO( - code, context, - "Could not allocate a buffer of the specified size due to " - "fragmentation or exhaustion"); + BAIL_INFO(code, context, + "Could not allocate a buffer of the specified size due to " + "fragmentation or exhaustion"); } } @@ -937,8 +926,8 @@ CL_API_ENTRY cl_mem clCreateBufferWithPropertiesINTEL( acl_delete(mem->block_allocation); acl_free_cl_mem(mem); // Need an error status valid to return from this function - UNLOCK_BAIL_INFO(CL_OUT_OF_RESOURCES, context, - "Could not copy data into the allocated buffer"); + BAIL_INFO(CL_OUT_OF_RESOURCES, context, + "Could not copy data into the allocated buffer"); } } } @@ -953,7 +942,7 @@ CL_API_ENTRY cl_mem clCreateBufferWithPropertiesINTEL( printf("CreateBuffer Finished: %zx\n", (size_t)result); #endif - UNLOCK_RETURN(result); + return result; } // Create a buffer. @@ -1003,18 +992,18 @@ CL_API_ENTRY cl_mem CL_API_CALL clCreateSubBufferIntelFPGA( cl_mem mem; int num_rw_specs = 0; - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; #ifdef MEM_DEBUG_MSG printf("CreateSubBuffer"); #endif if (!acl_mem_is_valid(buffer)) { - UNLOCK_BAIL(CL_INVALID_MEM_OBJECT); + BAIL(CL_INVALID_MEM_OBJECT); } if (buffer->mem_object_type != CL_MEM_OBJECT_BUFFER || buffer->fields.buffer_objs.is_subbuffer) { - UNLOCK_BAIL(CL_INVALID_MEM_OBJECT); + BAIL(CL_INVALID_MEM_OBJECT); } context = buffer->context; @@ -1025,7 +1014,7 @@ CL_API_ENTRY cl_mem CL_API_CALL clCreateSubBufferIntelFPGA( ~(CL_MEM_READ_WRITE | CL_MEM_READ_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_NO_ACCESS | CL_CHANNEL_7_INTELFPGA | CL_MEM_HETEROGENEOUS_INTELFPGA)) { - UNLOCK_BAIL_INFO(CL_INVALID_VALUE, context, "Invalid or unsupported flags"); + BAIL_INFO(CL_INVALID_VALUE, context, "Invalid or unsupported flags"); } // Check for exactly one read/write spec @@ -1036,8 +1025,8 @@ CL_API_ENTRY cl_mem CL_API_CALL clCreateSubBufferIntelFPGA( if (flags & CL_MEM_WRITE_ONLY) num_rw_specs++; if (num_rw_specs > 1) { - UNLOCK_BAIL_INFO(CL_INVALID_VALUE, context, - "More than one read/write flag is specified"); + BAIL_INFO(CL_INVALID_VALUE, context, + "More than one read/write flag is specified"); } // Check for exactly one host read/write/no_access spec @@ -1049,9 +1038,8 @@ CL_API_ENTRY cl_mem CL_API_CALL clCreateSubBufferIntelFPGA( if (flags & CL_MEM_HOST_NO_ACCESS) num_rw_specs++; if (num_rw_specs > 1) { - UNLOCK_BAIL_INFO( - CL_INVALID_VALUE, context, - "More than one host read/write/no_access flags are specified"); + BAIL_INFO(CL_INVALID_VALUE, context, + "More than one host read/write/no_access flags are specified"); } // If the parent buffer is write only then the sub-buffer cannot read. @@ -1066,30 +1054,27 @@ CL_API_ENTRY cl_mem CL_API_CALL clCreateSubBufferIntelFPGA( (flags & CL_MEM_HOST_WRITE_ONLY)) || ((buffer->flags & CL_MEM_HOST_NO_ACCESS) && ((flags & CL_MEM_HOST_READ_ONLY) || (flags & CL_MEM_HOST_WRITE_ONLY)))) { - UNLOCK_BAIL_INFO( - CL_INVALID_VALUE, context, - "Read/write flags are incompatible with the parent buffer"); + BAIL_INFO(CL_INVALID_VALUE, context, + "Read/write flags are incompatible with the parent buffer"); } if (buffer_create_type != CL_BUFFER_CREATE_TYPE_REGION) { - UNLOCK_BAIL_INFO(CL_INVALID_VALUE, context, - "Invalid buffer_create_type value"); + BAIL_INFO(CL_INVALID_VALUE, context, "Invalid buffer_create_type value"); } if (buffer_create_info == NULL) { - UNLOCK_BAIL_INFO(CL_INVALID_VALUE, context, "Empty buffer_create_info"); + BAIL_INFO(CL_INVALID_VALUE, context, "Empty buffer_create_info"); } if (((cl_buffer_region *)buffer_create_info)->origin + ((cl_buffer_region *)buffer_create_info)->size > buffer->size) { - UNLOCK_BAIL_INFO(CL_INVALID_VALUE, context, - "Origin plus size is out of bounds of parent buffer"); + BAIL_INFO(CL_INVALID_VALUE, context, + "Origin plus size is out of bounds of parent buffer"); } if (((cl_buffer_region *)buffer_create_info)->size == 0) { - UNLOCK_BAIL_INFO(CL_INVALID_BUFFER_SIZE, context, - "Sub-buffer size is zero"); + BAIL_INFO(CL_INVALID_BUFFER_SIZE, context, "Sub-buffer size is zero"); } for (idevice = 0; idevice < context->num_devices; ++idevice) { @@ -1100,7 +1085,7 @@ CL_API_ENTRY cl_mem CL_API_CALL clCreateSubBufferIntelFPGA( context->device[idevice], CL_DEVICE_MEM_BASE_ADDR_ALIGN, sizeof(int), &device_mem_base_addr_align, NULL); if (status_code != CL_SUCCESS) { - UNLOCK_BAIL(CL_OUT_OF_HOST_MEMORY); + BAIL(CL_OUT_OF_HOST_MEMORY); } if (!((((cl_buffer_region *)buffer_create_info)->origin * 8) & @@ -1111,22 +1096,21 @@ CL_API_ENTRY cl_mem CL_API_CALL clCreateSubBufferIntelFPGA( } if (!aligns_with_any_device) { - UNLOCK_BAIL_INFO( - CL_MISALIGNED_SUB_BUFFER_OFFSET, context, - "Sub-buffer offset does not align with any device in context"); + BAIL_INFO(CL_MISALIGNED_SUB_BUFFER_OFFSET, context, + "Sub-buffer offset does not align with any device in context"); } acl_block_allocation_t *new_block = acl_new(); if (!new_block) { - UNLOCK_BAIL_INFO(CL_OUT_OF_HOST_MEMORY, context, - "Could not allocate a cl_mem object"); + BAIL_INFO(CL_OUT_OF_HOST_MEMORY, context, + "Could not allocate a cl_mem object"); } // Now actually allocate the mem object. mem = acl_alloc_cl_mem(); if (!mem) { acl_delete(new_block); - UNLOCK_BAIL_INFO(CL_OUT_OF_HOST_MEMORY, context, - "Could not allocate a cl_mem object"); + BAIL_INFO(CL_OUT_OF_HOST_MEMORY, context, + "Could not allocate a cl_mem object"); } mem->block_allocation = new_block; @@ -1176,7 +1160,7 @@ CL_API_ENTRY cl_mem CL_API_CALL clCreateSubBufferIntelFPGA( buffer->host_mem = acl_mem_aligned_malloc(buffer->size); if (!buffer->host_mem.raw) { acl_free_cl_mem(mem); - UNLOCK_BAIL_INFO( + BAIL_INFO( CL_OUT_OF_HOST_MEMORY, context, "Could not allocate backing store for a device buffer with sub " "buffers"); @@ -1275,9 +1259,8 @@ CL_API_ENTRY cl_mem CL_API_CALL clCreateSubBufferIntelFPGA( cl_uint sub_bank_id = ((cl_uint)sub_flags & CL_CHANNEL_7_INTELFPGA) / CL_CHANNEL_1_INTELFPGA; if (sub_bank_id != buffer->bank_id) { - UNLOCK_BAIL_INFO( - CL_INVALID_VALUE, context, - "Sub-buffer bank id does not match parent buffer bank id"); + BAIL_INFO(CL_INVALID_VALUE, context, + "Sub-buffer bank id does not match parent buffer bank id"); } } } @@ -1305,10 +1288,9 @@ CL_API_ENTRY cl_mem CL_API_CALL clCreateSubBufferIntelFPGA( ? CL_OUT_OF_HOST_MEMORY : CL_OUT_OF_RESOURCES; acl_free_cl_mem(mem); - UNLOCK_BAIL_INFO( - code, context, - "Could not allocate a buffer of the specified size due to " - "fragmentation or exhaustion"); + BAIL_INFO(code, context, + "Could not allocate a buffer of the specified size due to " + "fragmentation or exhaustion"); } } @@ -1330,9 +1312,8 @@ CL_API_ENTRY cl_mem CL_API_CALL clCreateSubBufferIntelFPGA( cl_uint sub_bank_id = ((cl_uint)sub_flags & CL_CHANNEL_7_INTELFPGA) / CL_CHANNEL_1_INTELFPGA; if (sub_bank_id != buffer->bank_id) { - UNLOCK_BAIL_INFO( - CL_INVALID_VALUE, context, - "Sub-buffer bank id does not match parent buffer bank id"); + BAIL_INFO(CL_INVALID_VALUE, context, + "Sub-buffer bank id does not match parent buffer bank id"); } } } @@ -1352,7 +1333,7 @@ CL_API_ENTRY cl_mem CL_API_CALL clCreateSubBufferIntelFPGA( printf(" %zx\n", (size_t)result); #endif - UNLOCK_RETURN(result); + return result; } ACL_EXPORT @@ -1369,12 +1350,12 @@ CL_API_ENTRY cl_int CL_API_CALL clGetMemObjectInfoIntelFPGA( void *param_value, size_t *param_value_size_ret) { acl_result_t result; cl_context context; - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; RESULT_INIT; if (!acl_mem_is_valid(mem)) { - UNLOCK_RETURN(CL_INVALID_MEM_OBJECT); + return CL_INVALID_MEM_OBJECT; } context = mem->context; @@ -1454,14 +1435,14 @@ CL_API_ENTRY cl_int CL_API_CALL clGetMemObjectInfoIntelFPGA( } if (result.size == 0) { - UNLOCK_ERR_RET(CL_INVALID_VALUE, context, - "Invalid or unsupported memory object query"); + ERR_RET(CL_INVALID_VALUE, context, + "Invalid or unsupported memory object query"); } if (param_value) { if (param_value_size < result.size) { - UNLOCK_ERR_RET(CL_INVALID_VALUE, context, - "Parameter return buffer is too small"); + ERR_RET(CL_INVALID_VALUE, context, + "Parameter return buffer is too small"); } RESULT_COPY(param_value, param_value_size); } @@ -1469,7 +1450,7 @@ CL_API_ENTRY cl_int CL_API_CALL clGetMemObjectInfoIntelFPGA( if (param_value_size_ret) { *param_value_size_ret = result.size; } - UNLOCK_RETURN(CL_SUCCESS); + return CL_SUCCESS; } ACL_EXPORT @@ -1497,10 +1478,10 @@ ACL_EXPORT CL_API_ENTRY cl_mem CL_API_CALL clCreateImageIntelFPGA( unsigned iformat; cl_bool found_image_format; unsigned int idevice; - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; if (!acl_context_is_valid(context)) { - UNLOCK_BAIL(CL_INVALID_CONTEXT); + BAIL(CL_INVALID_CONTEXT); } // Check the maximum image sizes for all available devices in the context @@ -1530,20 +1511,20 @@ ACL_EXPORT CL_API_ENTRY cl_mem CL_API_CALL clCreateImageIntelFPGA( } if (image_format == NULL) { - UNLOCK_BAIL_INFO(CL_INVALID_IMAGE_FORMAT_DESCRIPTOR, context, - "image_format is NULL"); + BAIL_INFO(CL_INVALID_IMAGE_FORMAT_DESCRIPTOR, context, + "image_format is NULL"); } element_size = acl_get_image_element_size(context, image_format, &local_errcode_ret); if (local_errcode_ret != CL_SUCCESS) { - UNLOCK_BAIL(local_errcode_ret); + BAIL(local_errcode_ret); } local_errcode_ret = clGetSupportedImageFormats( context, flags, image_desc->image_type, 0, NULL, &num_image_formats); if (local_errcode_ret != CL_SUCCESS) { - UNLOCK_BAIL(local_errcode_ret); + BAIL(local_errcode_ret); } supported_image_formats = (cl_image_format *)acl_malloc( sizeof(cl_image_format) * num_image_formats); @@ -1565,11 +1546,11 @@ ACL_EXPORT CL_API_ENTRY cl_mem CL_API_CALL clCreateImageIntelFPGA( acl_free(supported_image_formats); if (local_errcode_ret != CL_SUCCESS) { - UNLOCK_BAIL(local_errcode_ret); + BAIL(local_errcode_ret); } if (!found_image_format) { - UNLOCK_BAIL_INFO(CL_IMAGE_FORMAT_NOT_SUPPORTED, context, - "Unsupported image format"); + BAIL_INFO(CL_IMAGE_FORMAT_NOT_SUPPORTED, context, + "Unsupported image format"); } // Allocate the memory for the image. This size (and sometimes the method) @@ -1580,17 +1561,17 @@ ACL_EXPORT CL_API_ENTRY cl_mem CL_API_CALL clCreateImageIntelFPGA( element_size * image_desc->image_width + get_offset_for_image_param(context, image_desc->image_type, "data"); if (image_desc->image_width <= 0) { - UNLOCK_BAIL_INFO(CL_INVALID_IMAGE_SIZE, context, - "image width cannot be zero for a 1D object"); + BAIL_INFO(CL_INVALID_IMAGE_SIZE, context, + "image width cannot be zero for a 1D object"); } if (image_size > context->max_mem_alloc_size) { - UNLOCK_BAIL_INFO(CL_INVALID_IMAGE_SIZE, context, - "image size exceeds maximum alloc size"); + BAIL_INFO(CL_INVALID_IMAGE_SIZE, context, + "image size exceeds maximum alloc size"); } return_buffer = clCreateBuffer(context, flags, image_size, host_ptr, errcode_ret); if (return_buffer == NULL) - UNLOCK_RETURN(NULL); + return NULL; return_buffer->fields.image_objs.image_format = (cl_image_format *)acl_malloc(sizeof(cl_image_format)); return_buffer->fields.image_objs.image_desc = @@ -1598,8 +1579,8 @@ ACL_EXPORT CL_API_ENTRY cl_mem CL_API_CALL clCreateImageIntelFPGA( break; case CL_MEM_OBJECT_IMAGE1D_BUFFER: // Need to actually allocate/assign the buffer data here - UNLOCK_BAIL_INFO(CL_IMAGE_FORMAT_NOT_SUPPORTED, context, - "Do not support images created from buffers"); + BAIL_INFO(CL_IMAGE_FORMAT_NOT_SUPPORTED, context, + "Do not support images created from buffers"); // Need to actually allocate/assign the buffer data here break; case CL_MEM_OBJECT_IMAGE1D_ARRAY: @@ -1607,17 +1588,17 @@ ACL_EXPORT CL_API_ENTRY cl_mem CL_API_CALL clCreateImageIntelFPGA( element_size * image_desc->image_width * image_desc->image_array_size + get_offset_for_image_param(context, image_desc->image_type, "data"); if (image_desc->image_width <= 0) { - UNLOCK_BAIL_INFO(CL_INVALID_IMAGE_SIZE, context, - "image width cannot be zero for a 1D object"); + BAIL_INFO(CL_INVALID_IMAGE_SIZE, context, + "image width cannot be zero for a 1D object"); } if (image_size > context->max_mem_alloc_size) { - UNLOCK_BAIL_INFO(CL_INVALID_IMAGE_SIZE, context, - "image size exceeds maximum alloc size"); + BAIL_INFO(CL_INVALID_IMAGE_SIZE, context, + "image size exceeds maximum alloc size"); } return_buffer = clCreateBuffer(context, flags, image_size, host_ptr, errcode_ret); if (return_buffer == NULL) - UNLOCK_RETURN(NULL); + return NULL; return_buffer->fields.image_objs.image_format = (cl_image_format *)acl_malloc(sizeof(cl_image_format)); return_buffer->fields.image_objs.image_desc = @@ -1627,45 +1608,45 @@ ACL_EXPORT CL_API_ENTRY cl_mem CL_API_CALL clCreateImageIntelFPGA( // If we change this, need to actually allocate/assign the buffer data here if (image_desc->mem_object != NULL && image_desc->mem_object->mem_object_type == CL_MEM_OBJECT_BUFFER) { - UNLOCK_BAIL_INFO(CL_IMAGE_FORMAT_NOT_SUPPORTED, context, - "Do not support images created from buffers"); + BAIL_INFO(CL_IMAGE_FORMAT_NOT_SUPPORTED, context, + "Do not support images created from buffers"); // Copy information from the other image object } else if (image_desc->mem_object != NULL && image_desc->mem_object->mem_object_type == CL_MEM_OBJECT_BUFFER) { - UNLOCK_BAIL_INFO(CL_IMAGE_FORMAT_NOT_SUPPORTED, context, - "Do not support images created from other images"); + BAIL_INFO(CL_IMAGE_FORMAT_NOT_SUPPORTED, context, + "Do not support images created from other images"); // Allocate a new image object } else { image_size = element_size * image_desc->image_width * image_desc->image_height + get_offset_for_image_param(context, image_desc->image_type, "data"); if (image_desc->image_width <= 0) { - UNLOCK_BAIL_INFO(CL_INVALID_IMAGE_SIZE, context, - "image width cannot be zero for a 2D object"); + BAIL_INFO(CL_INVALID_IMAGE_SIZE, context, + "image width cannot be zero for a 2D object"); } if (image_desc->image_width > max_2d_image_width) { - UNLOCK_BAIL_INFO( + BAIL_INFO( CL_INVALID_IMAGE_SIZE, context, "image width exceeds maximum width for all devices in context"); } if (image_desc->image_height <= 0) { - UNLOCK_BAIL_INFO(CL_INVALID_IMAGE_SIZE, context, - "image height cannot be zero for a 2D object"); + BAIL_INFO(CL_INVALID_IMAGE_SIZE, context, + "image height cannot be zero for a 2D object"); } if (image_desc->image_height > max_2d_image_height) { - UNLOCK_BAIL_INFO( + BAIL_INFO( CL_INVALID_IMAGE_SIZE, context, "1 image height exceeds maximum height for all devices in context"); } if (image_size > context->max_mem_alloc_size) { - UNLOCK_BAIL_INFO(CL_INVALID_IMAGE_SIZE, context, - "image size exceeds maximum alloc size"); + BAIL_INFO(CL_INVALID_IMAGE_SIZE, context, + "image size exceeds maximum alloc size"); } return_buffer = clCreateBuffer(context, flags, image_size, host_ptr, errcode_ret); if (return_buffer == NULL) - UNLOCK_RETURN(NULL); + return NULL; return_buffer->fields.image_objs.image_format = (cl_image_format *)acl_malloc(sizeof(cl_image_format)); return_buffer->fields.image_objs.image_desc = @@ -1678,22 +1659,21 @@ ACL_EXPORT CL_API_ENTRY cl_mem CL_API_CALL clCreateImageIntelFPGA( image_desc->image_array_size + get_offset_for_image_param(context, image_desc->image_type, "data"); if (image_desc->image_width <= 0) - UNLOCK_BAIL_INFO(CL_INVALID_IMAGE_SIZE, context, - "image width cannot be zero for a 2D object"); + BAIL_INFO(CL_INVALID_IMAGE_SIZE, context, + "image width cannot be zero for a 2D object"); if (image_desc->image_width > max_2d_image_width) - UNLOCK_BAIL_INFO( - CL_INVALID_IMAGE_SIZE, context, - "image width exceeds maximum width for all devices in context"); + BAIL_INFO(CL_INVALID_IMAGE_SIZE, context, + "image width exceeds maximum width for all devices in context"); if (image_desc->image_height <= 0) - UNLOCK_BAIL_INFO(CL_INVALID_IMAGE_SIZE, context, - "image height cannot be zero for a 2D object"); + BAIL_INFO(CL_INVALID_IMAGE_SIZE, context, + "image height cannot be zero for a 2D object"); if (image_desc->image_height > max_2d_image_height) - UNLOCK_BAIL_INFO( + BAIL_INFO( CL_INVALID_IMAGE_SIZE, context, "2 image height exceeds maximum height for all devices in context"); if (image_size > context->max_mem_alloc_size) - UNLOCK_BAIL_INFO(CL_INVALID_IMAGE_SIZE, context, - "image size exceeds maximum alloc size"); + BAIL_INFO(CL_INVALID_IMAGE_SIZE, context, + "image size exceeds maximum alloc size"); return_buffer = clCreateBuffer(context, flags, image_size, host_ptr, errcode_ret); if (return_buffer == NULL) @@ -1709,41 +1689,38 @@ ACL_EXPORT CL_API_ENTRY cl_mem CL_API_CALL clCreateImageIntelFPGA( image_desc->image_depth + get_offset_for_image_param(context, image_desc->image_type, "data"); if (image_desc->image_width <= 0) - UNLOCK_BAIL_INFO(CL_INVALID_IMAGE_SIZE, context, - "image width cannot be zero for a 3D object"); + BAIL_INFO(CL_INVALID_IMAGE_SIZE, context, + "image width cannot be zero for a 3D object"); if (image_desc->image_width > max_3d_image_width) - UNLOCK_BAIL_INFO( - CL_INVALID_IMAGE_SIZE, context, - "image width exceeds maximum width for all devices in context"); + BAIL_INFO(CL_INVALID_IMAGE_SIZE, context, + "image width exceeds maximum width for all devices in context"); if (image_desc->image_height <= 0) - UNLOCK_BAIL_INFO(CL_INVALID_IMAGE_SIZE, context, - "image height cannot be zero for a 3D object"); + BAIL_INFO(CL_INVALID_IMAGE_SIZE, context, + "image height cannot be zero for a 3D object"); if (image_desc->image_height > max_3d_image_height) - UNLOCK_BAIL_INFO( + BAIL_INFO( CL_INVALID_IMAGE_SIZE, context, "image height exceeds maximum height for all devices in context"); if (image_desc->image_depth <= 0) - UNLOCK_BAIL_INFO(CL_INVALID_IMAGE_SIZE, context, - "image depth cannot be zero for a 3D object"); + BAIL_INFO(CL_INVALID_IMAGE_SIZE, context, + "image depth cannot be zero for a 3D object"); if (image_desc->image_depth > max_3d_image_depth) - UNLOCK_BAIL_INFO( - CL_INVALID_IMAGE_SIZE, context, - "image depth exceeds maximum depth for all devices in context"); + BAIL_INFO(CL_INVALID_IMAGE_SIZE, context, + "image depth exceeds maximum depth for all devices in context"); if (image_size > context->max_mem_alloc_size) - UNLOCK_BAIL_INFO(CL_INVALID_IMAGE_SIZE, context, - "image size exceeds maximum alloc size"); + BAIL_INFO(CL_INVALID_IMAGE_SIZE, context, + "image size exceeds maximum alloc size"); return_buffer = clCreateBuffer(context, flags, image_size, host_ptr, errcode_ret); if (return_buffer == NULL) - UNLOCK_RETURN(NULL); + return NULL; return_buffer->fields.image_objs.image_format = (cl_image_format *)acl_malloc(sizeof(cl_image_format)); return_buffer->fields.image_objs.image_desc = (cl_image_desc *)acl_malloc(sizeof(cl_image_desc)); break; default: - UNLOCK_BAIL_INFO(CL_INVALID_IMAGE_DESCRIPTOR, context, - "invalid image type"); + BAIL_INFO(CL_INVALID_IMAGE_DESCRIPTOR, context, "invalid image type"); break; } @@ -1761,8 +1738,8 @@ ACL_EXPORT CL_API_ENTRY cl_mem CL_API_CALL clCreateImageIntelFPGA( if (!return_buffer->host_mem.aligned_ptr) { return_buffer->host_mem = acl_mem_aligned_malloc(image_size); if (return_buffer->host_mem.raw == 0) { - UNLOCK_BAIL_INFO(CL_OUT_OF_HOST_MEMORY, context, - "Could not allocate backing store for a device image"); + BAIL_INFO(CL_OUT_OF_HOST_MEMORY, context, + "Could not allocate backing store for a device image"); } } @@ -1773,7 +1750,7 @@ ACL_EXPORT CL_API_ENTRY cl_mem CL_API_CALL clCreateImageIntelFPGA( &(acl_platform.host_user_mem))) { copy_image_metadata(return_buffer); } - UNLOCK_RETURN(return_buffer); + return return_buffer; } ACL_EXPORT CL_API_ENTRY cl_mem CL_API_CALL clCreateImage( @@ -1894,18 +1871,18 @@ CL_API_ENTRY cl_int CL_API_CALL clGetSupportedImageFormatsIntelFPGA( {CL_BGRA, CL_UNORM_INT8}, }; - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; if (!acl_context_is_valid(context)) { - UNLOCK_RETURN(CL_INVALID_CONTEXT); + return CL_INVALID_CONTEXT; } if (num_entries == 0 && image_formats) { - UNLOCK_ERR_RET(CL_INVALID_VALUE, context, - "num_entries is zero but image formats array is specified"); + ERR_RET(CL_INVALID_VALUE, context, + "num_entries is zero but image formats array is specified"); } if (num_entries > 0 && image_formats == 0) { - UNLOCK_ERR_RET(CL_INVALID_VALUE, context, - "num_entries is non-zero but image_formats array is NULL"); + ERR_RET(CL_INVALID_VALUE, context, + "num_entries is non-zero but image_formats array is NULL"); } switch (image_type) { case CL_MEM_OBJECT_IMAGE2D: @@ -1916,13 +1893,12 @@ CL_API_ENTRY cl_int CL_API_CALL clGetSupportedImageFormatsIntelFPGA( case CL_MEM_OBJECT_IMAGE1D_BUFFER: break; default: - UNLOCK_ERR_RET(CL_INVALID_VALUE, context, - "Invalid or unsupported image type"); + ERR_RET(CL_INVALID_VALUE, context, "Invalid or unsupported image type"); } if (flags & ~(CL_MEM_READ_WRITE | CL_MEM_READ_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR)) { - UNLOCK_ERR_RET(CL_INVALID_VALUE, context, "Invalid flags"); + ERR_RET(CL_INVALID_VALUE, context, "Invalid flags"); } if (num_image_formats) { @@ -1938,7 +1914,7 @@ CL_API_ENTRY cl_int CL_API_CALL clGetSupportedImageFormatsIntelFPGA( supported_image_formats[i].image_channel_data_type; } } - UNLOCK_RETURN(CL_SUCCESS); + return CL_SUCCESS; } ACL_EXPORT @@ -1957,19 +1933,18 @@ CL_API_ENTRY cl_int CL_API_CALL clGetImageInfoIntelFPGA( void *param_value, size_t *param_value_size_ret) { acl_result_t result; cl_context context; - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; RESULT_INIT; if (!acl_mem_is_valid(image)) { - UNLOCK_RETURN(CL_INVALID_MEM_OBJECT); + return CL_INVALID_MEM_OBJECT; } context = image->context; if (!is_image(image)) { - UNLOCK_ERR_RET(CL_INVALID_MEM_OBJECT, context, - "Memory object is not an image"); + ERR_RET(CL_INVALID_MEM_OBJECT, context, "Memory object is not an image"); } switch (param_name) { @@ -2000,14 +1975,14 @@ CL_API_ENTRY cl_int CL_API_CALL clGetImageInfoIntelFPGA( } if (result.size == 0) { - UNLOCK_ERR_RET(CL_INVALID_VALUE, context, - "Invalid or unsupported memory object query"); + ERR_RET(CL_INVALID_VALUE, context, + "Invalid or unsupported memory object query"); } if (param_value) { if (param_value_size < result.size) { - UNLOCK_ERR_RET(CL_INVALID_VALUE, context, - "Parameter return buffer is too small"); + ERR_RET(CL_INVALID_VALUE, context, + "Parameter return buffer is too small"); } RESULT_COPY(param_value, param_value_size); } @@ -2015,7 +1990,7 @@ CL_API_ENTRY cl_int CL_API_CALL clGetImageInfoIntelFPGA( if (param_value_size_ret) { *param_value_size_ret = result.size; } - UNLOCK_RETURN(CL_SUCCESS); + return CL_SUCCESS; } ACL_EXPORT @@ -2040,22 +2015,22 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueReadImageIntelFPGA( size_t tmp_row_pitch, tmp_slice_pitch; cl_int errcode_ret; size_t src_element_size; - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; if (!acl_command_queue_is_valid(command_queue)) { - UNLOCK_RETURN(CL_INVALID_COMMAND_QUEUE); + return CL_INVALID_COMMAND_QUEUE; } if (ptr == 0) { - UNLOCK_ERR_RET(CL_INVALID_VALUE, command_queue->context, - "Pointer argument cannot be NULL"); + ERR_RET(CL_INVALID_VALUE, command_queue->context, + "Pointer argument cannot be NULL"); } if (image != NULL) { src_element_size = acl_get_image_element_size( image->context, image->fields.image_objs.image_format, &errcode_ret); if (errcode_ret != CL_SUCCESS) { - UNLOCK_RETURN(errcode_ret); + return errcode_ret; } } else { src_element_size = 0; @@ -2076,8 +2051,8 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueReadImageIntelFPGA( if (row_pitch != 0) { if (row_pitch < image->fields.image_objs.image_desc->image_width * src_element_size) { - UNLOCK_ERR_RET(CL_INVALID_VALUE, command_queue->context, - "Invalid row pitch provided"); + ERR_RET(CL_INVALID_VALUE, command_queue->context, + "Invalid row pitch provided"); } tmp_row_pitch = row_pitch; } else { @@ -2096,20 +2071,20 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueReadImageIntelFPGA( // Allow the user to override the default slice pitch if (slice_pitch != 0) { if (slice_pitch < tmp_slice_pitch) { - UNLOCK_ERR_RET(CL_INVALID_VALUE, command_queue->context, - "Invalid row pitch provided"); + ERR_RET(CL_INVALID_VALUE, command_queue->context, + "Invalid row pitch provided"); } tmp_slice_pitch = slice_pitch; } if (!is_image(image)) { - UNLOCK_ERR_RET(CL_INVALID_MEM_OBJECT, command_queue->context, - "Memory object is not an image"); + ERR_RET(CL_INVALID_MEM_OBJECT, command_queue->context, + "Memory object is not an image"); } if (!acl_bind_buffer_to_device(command_queue->device, image)) { - UNLOCK_ERR_RET(CL_MEM_OBJECT_ALLOCATION_FAILURE, command_queue->context, - "Deferred Allocation Failed"); + ERR_RET(CL_MEM_OBJECT_ALLOCATION_FAILURE, command_queue->context, + "Deferred Allocation Failed"); } { @@ -2119,7 +2094,7 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueReadImageIntelFPGA( tmp_dst_offset, // see creation of the unwrapped_host_mem tmp_row_pitch, tmp_slice_pitch, tmp_cb, num_events_in_wait_list, event_wait_list, event, CL_COMMAND_READ_BUFFER, 0); - UNLOCK_RETURN(ret); + return ret; } } @@ -2146,20 +2121,20 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueWriteImageIntelFPGA( size_t tmp_row_pitch, tmp_slice_pitch; cl_int errcode_ret; size_t dst_element_size; - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; if (image != NULL) { dst_element_size = acl_get_image_element_size( image->context, image->fields.image_objs.image_format, &errcode_ret); if (errcode_ret != CL_SUCCESS) { - UNLOCK_RETURN(errcode_ret); + return errcode_ret; } } else { - UNLOCK_RETURN(CL_INVALID_MEM_OBJECT); + return CL_INVALID_MEM_OBJECT; } if (!acl_command_queue_is_valid(command_queue)) { - UNLOCK_RETURN(CL_INVALID_COMMAND_QUEUE); + return CL_INVALID_COMMAND_QUEUE; } tmp_src_offset[0] = (size_t)((char *)ptr - (const char *)ACL_MEM_ALIGN); @@ -2177,8 +2152,8 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueWriteImageIntelFPGA( if (input_row_pitch != 0) { if (input_row_pitch < image->fields.image_objs.image_desc->image_width * dst_element_size) { - UNLOCK_ERR_RET(CL_INVALID_VALUE, command_queue->context, - "Invalid row pitch provided"); + ERR_RET(CL_INVALID_VALUE, command_queue->context, + "Invalid row pitch provided"); } tmp_row_pitch = input_row_pitch; } else { @@ -2197,20 +2172,20 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueWriteImageIntelFPGA( // Allow the user to override the default slice pitch if (input_slice_pitch != 0) { if (input_slice_pitch < tmp_slice_pitch) { - UNLOCK_ERR_RET(CL_INVALID_VALUE, command_queue->context, - "Invalid row pitch provided"); + ERR_RET(CL_INVALID_VALUE, command_queue->context, + "Invalid row pitch provided"); } tmp_slice_pitch = input_slice_pitch; } if (!is_image(image)) { - UNLOCK_ERR_RET(CL_INVALID_MEM_OBJECT, command_queue->context, - "Memory object is not an image"); + ERR_RET(CL_INVALID_MEM_OBJECT, command_queue->context, + "Memory object is not an image"); } if (!acl_bind_buffer_to_device(command_queue->device, image)) { - UNLOCK_ERR_RET(CL_MEM_OBJECT_ALLOCATION_FAILURE, command_queue->context, - "Deferred Allocation Failed"); + ERR_RET(CL_MEM_OBJECT_ALLOCATION_FAILURE, command_queue->context, + "Deferred Allocation Failed"); } { @@ -2220,7 +2195,7 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueWriteImageIntelFPGA( tmp_row_pitch, tmp_slice_pitch, image, tmp_dst_offset, 0, 0, tmp_cb, num_events_in_wait_list, event_wait_list, event, CL_COMMAND_WRITE_BUFFER, 0); - UNLOCK_RETURN(ret); + return ret; } } @@ -2250,25 +2225,25 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueFillImageIntelFPGA( char converted_fill_color[16]; // Maximum number of bytes needed to keep a // pixel. cl_event tmp_event; - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; if (image != NULL) { dst_element_size = acl_get_image_element_size( image->context, image->fields.image_objs.image_format, &errcode_ret); if (errcode_ret != CL_SUCCESS) { - UNLOCK_RETURN(errcode_ret); + return errcode_ret; } } else { - UNLOCK_RETURN(CL_INVALID_MEM_OBJECT); + return CL_INVALID_MEM_OBJECT; } if (!acl_command_queue_is_valid(command_queue)) { - UNLOCK_RETURN(CL_INVALID_COMMAND_QUEUE); + return CL_INVALID_COMMAND_QUEUE; } if (!is_image(image)) { - UNLOCK_ERR_RET(CL_INVALID_MEM_OBJECT, command_queue->context, - "Memory object is not an image"); + ERR_RET(CL_INVALID_MEM_OBJECT, command_queue->context, + "Memory object is not an image"); } // Replicating the color in the region allocated in host mem. @@ -2276,8 +2251,8 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueFillImageIntelFPGA( color_format.image_channel_order = CL_RGBA; if (fill_color == NULL) - UNLOCK_ERR_RET(CL_INVALID_VALUE, command_queue->context, - "fill_color cannot be NULL"); + ERR_RET(CL_INVALID_VALUE, command_queue->context, + "fill_color cannot be NULL"); size_t host_mem_size = region[0] * region[1] * region[2] * dst_element_size; @@ -2326,24 +2301,24 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueFillImageIntelFPGA( errcode_ret = -1; } if (errcode_ret != CL_SUCCESS) - UNLOCK_ERR_RET(CL_IMAGE_FORMAT_NOT_SUPPORTED, command_queue->context, - "Failed to convert fill_color to the appropriate image " - "channel format and order"); + ERR_RET(CL_IMAGE_FORMAT_NOT_SUPPORTED, command_queue->context, + "Failed to convert fill_color to the appropriate image " + "channel format and order"); // This array is passed to clSetEventCallback for releasing the // allocated memory and releasing the event, if *event is null. void **callback_data = (void **)acl_malloc(sizeof(void *) * 2); if (!callback_data) { - UNLOCK_ERR_RET(CL_OUT_OF_HOST_MEMORY, command_queue->context, - "Out of host memory"); + ERR_RET(CL_OUT_OF_HOST_MEMORY, command_queue->context, + "Out of host memory"); } acl_aligned_ptr_t *aligned_ptr = (acl_aligned_ptr_t *)acl_malloc(sizeof(acl_aligned_ptr_t)); if (!aligned_ptr) { acl_free(callback_data); - UNLOCK_ERR_RET(CL_OUT_OF_HOST_MEMORY, command_queue->context, - "Out of host memory"); + ERR_RET(CL_OUT_OF_HOST_MEMORY, command_queue->context, + "Out of host memory"); } *aligned_ptr = acl_mem_aligned_malloc(host_mem_size); @@ -2351,8 +2326,8 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueFillImageIntelFPGA( if (!ptr) { acl_free(aligned_ptr); acl_free(callback_data); - UNLOCK_ERR_RET(CL_OUT_OF_HOST_MEMORY, command_queue->context, - "Out of host memory"); + ERR_RET(CL_OUT_OF_HOST_MEMORY, command_queue->context, + "Out of host memory"); } for (cl_uint i = 0; i < region[0] * region[1] * region[2]; i++) { @@ -2391,8 +2366,8 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueFillImageIntelFPGA( acl_free(aligned_ptr); acl_free(callback_data); } - UNLOCK_ERR_RET(CL_MEM_OBJECT_ALLOCATION_FAILURE, command_queue->context, - "Deferred Allocation Failed"); + ERR_RET(CL_MEM_OBJECT_ALLOCATION_FAILURE, command_queue->context, + "Deferred Allocation Failed"); } { @@ -2407,7 +2382,7 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueFillImageIntelFPGA( aligned_ptr); // Cleaning up before failing. acl_free(aligned_ptr); acl_free(callback_data); - UNLOCK_RETURN(ret); + return ret; } callback_data[0] = (void *)(aligned_ptr); @@ -2422,7 +2397,7 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueFillImageIntelFPGA( clSetEventCallback(tmp_event, CL_COMPLETE, acl_free_allocation_after_event_completion, (void *)callback_data); - UNLOCK_RETURN(ret); + return ret; } } @@ -2447,27 +2422,27 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueCopyImageIntelFPGA( size_t tmp_cb[3]; size_t element_size; cl_int errcode_ret; - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; if (!acl_command_queue_is_valid(command_queue)) { - UNLOCK_RETURN(CL_INVALID_COMMAND_QUEUE); + return CL_INVALID_COMMAND_QUEUE; } if (src_image == NULL || !is_image(src_image)) { - UNLOCK_ERR_RET(CL_INVALID_MEM_OBJECT, command_queue->context, - "Source memory object is not an image"); + ERR_RET(CL_INVALID_MEM_OBJECT, command_queue->context, + "Source memory object is not an image"); } if (dst_image == NULL || !is_image(dst_image)) { - UNLOCK_ERR_RET(CL_INVALID_MEM_OBJECT, command_queue->context, - "Source memory object is not an image"); + ERR_RET(CL_INVALID_MEM_OBJECT, command_queue->context, + "Source memory object is not an image"); } if ((src_image->fields.image_objs.image_format->image_channel_order != dst_image->fields.image_objs.image_format->image_channel_order) || (src_image->fields.image_objs.image_format->image_channel_data_type != dst_image->fields.image_objs.image_format->image_channel_data_type)) { - UNLOCK_ERR_RET(CL_INVALID_MEM_OBJECT, command_queue->context, - "Source memory object is not an image"); + ERR_RET(CL_INVALID_MEM_OBJECT, command_queue->context, + "Source memory object is not an image"); } // Doesn't matter if we look at src or dst, already verified that they are the @@ -2476,8 +2451,8 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueCopyImageIntelFPGA( command_queue->context, src_image->fields.image_objs.image_format, &errcode_ret); if (errcode_ret != CL_SUCCESS) { - UNLOCK_ERR_RET(errcode_ret, command_queue->context, - "Source memory object is not an image"); + ERR_RET(errcode_ret, command_queue->context, + "Source memory object is not an image"); } tmp_src_offset[0] = src_origin[0]; @@ -2496,17 +2471,16 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueCopyImageIntelFPGA( dst_image->fields.image_objs.image_format->image_channel_order) || (src_image->fields.image_objs.image_format->image_channel_data_type != dst_image->fields.image_objs.image_format->image_channel_data_type)) { - UNLOCK_ERR_RET( - CL_IMAGE_FORMAT_MISMATCH, command_queue->context, - "Mismatch in image format between source & destination image"); + ERR_RET(CL_IMAGE_FORMAT_MISMATCH, command_queue->context, + "Mismatch in image format between source & destination image"); } if (!acl_bind_buffer_to_device(command_queue->device, src_image)) { - UNLOCK_ERR_RET(CL_MEM_OBJECT_ALLOCATION_FAILURE, command_queue->context, - "Deferred Allocation Failed"); + ERR_RET(CL_MEM_OBJECT_ALLOCATION_FAILURE, command_queue->context, + "Deferred Allocation Failed"); } if (!acl_bind_buffer_to_device(command_queue->device, dst_image)) { - UNLOCK_ERR_RET(CL_MEM_OBJECT_ALLOCATION_FAILURE, command_queue->context, - "Deferred Allocation Failed"); + ERR_RET(CL_MEM_OBJECT_ALLOCATION_FAILURE, command_queue->context, + "Deferred Allocation Failed"); } { @@ -2514,7 +2488,7 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueCopyImageIntelFPGA( command_queue, 0, src_image, tmp_src_offset, 0, 0, dst_image, tmp_dst_offset, 0, 0, tmp_cb, num_events_in_wait_list, event_wait_list, event, CL_COMMAND_COPY_BUFFER, 0); - UNLOCK_RETURN(ret); + return ret; } } @@ -2541,18 +2515,18 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueCopyImageToBufferIntelFPGA( size_t tmp_row_pitch, tmp_slice_pitch; cl_int errcode_ret; size_t src_element_size; - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; if (!acl_command_queue_is_valid(command_queue)) { - UNLOCK_RETURN(CL_INVALID_COMMAND_QUEUE); + return CL_INVALID_COMMAND_QUEUE; } if (!acl_mem_is_valid(src_image)) { - UNLOCK_ERR_RET(CL_INVALID_MEM_OBJECT, command_queue->context, - "Source image is invalid"); + ERR_RET(CL_INVALID_MEM_OBJECT, command_queue->context, + "Source image is invalid"); } if (!acl_mem_is_valid(dst_buffer)) { - UNLOCK_ERR_RET(CL_INVALID_MEM_OBJECT, command_queue->context, - "Destination buffer is invalid"); + ERR_RET(CL_INVALID_MEM_OBJECT, command_queue->context, + "Destination buffer is invalid"); } if (src_image != NULL) { @@ -2560,7 +2534,7 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueCopyImageToBufferIntelFPGA( src_image->context, src_image->fields.image_objs.image_format, &errcode_ret); if (errcode_ret != CL_SUCCESS) { - UNLOCK_RETURN(errcode_ret); + return errcode_ret; } } else { src_element_size = 0; @@ -2590,17 +2564,17 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueCopyImageToBufferIntelFPGA( } if (!is_image(src_image)) { - UNLOCK_ERR_RET(CL_INVALID_MEM_OBJECT, command_queue->context, - "Memory object is not an image"); + ERR_RET(CL_INVALID_MEM_OBJECT, command_queue->context, + "Memory object is not an image"); } if (!acl_bind_buffer_to_device(command_queue->device, src_image)) { - UNLOCK_ERR_RET(CL_MEM_OBJECT_ALLOCATION_FAILURE, command_queue->context, - "Deferred Allocation Failed"); + ERR_RET(CL_MEM_OBJECT_ALLOCATION_FAILURE, command_queue->context, + "Deferred Allocation Failed"); } if (!acl_bind_buffer_to_device(command_queue->device, dst_buffer)) { - UNLOCK_ERR_RET(CL_MEM_OBJECT_ALLOCATION_FAILURE, command_queue->context, - "Deferred Allocation Failed"); + ERR_RET(CL_MEM_OBJECT_ALLOCATION_FAILURE, command_queue->context, + "Deferred Allocation Failed"); } { @@ -2609,7 +2583,7 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueCopyImageToBufferIntelFPGA( tmp_dst_offset, // see creation of the unwrapped_host_mem tmp_row_pitch, tmp_slice_pitch, tmp_cb, num_events_in_wait_list, event_wait_list, event, CL_COMMAND_READ_BUFFER, 0); - UNLOCK_RETURN(ret); + return ret; } } @@ -2636,29 +2610,29 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueCopyBufferToImageIntelFPGA( size_t tmp_row_pitch, tmp_slice_pitch; cl_int errcode_ret; size_t dst_element_size; - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; if (dst_image != NULL) { dst_element_size = acl_get_image_element_size( dst_image->context, dst_image->fields.image_objs.image_format, &errcode_ret); if (errcode_ret != CL_SUCCESS) { - UNLOCK_RETURN(errcode_ret); + return errcode_ret; } } else { dst_element_size = 0; } if (!acl_command_queue_is_valid(command_queue)) { - UNLOCK_RETURN(CL_INVALID_COMMAND_QUEUE); + return CL_INVALID_COMMAND_QUEUE; } if (!acl_mem_is_valid(src_buffer)) { - UNLOCK_ERR_RET(CL_INVALID_MEM_OBJECT, command_queue->context, - "Source buffer is invalid"); + ERR_RET(CL_INVALID_MEM_OBJECT, command_queue->context, + "Source buffer is invalid"); } if (!acl_mem_is_valid(dst_image)) { - UNLOCK_ERR_RET(CL_INVALID_MEM_OBJECT, command_queue->context, - "Destination buffer is invalid"); + ERR_RET(CL_INVALID_MEM_OBJECT, command_queue->context, + "Destination buffer is invalid"); } tmp_src_offset[0] = src_offset; @@ -2685,17 +2659,17 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueCopyBufferToImageIntelFPGA( } if (!is_image(dst_image)) { - UNLOCK_ERR_RET(CL_INVALID_MEM_OBJECT, command_queue->context, - "Memory object is not an image"); + ERR_RET(CL_INVALID_MEM_OBJECT, command_queue->context, + "Memory object is not an image"); } if (!acl_bind_buffer_to_device(command_queue->device, src_buffer)) { - UNLOCK_ERR_RET(CL_MEM_OBJECT_ALLOCATION_FAILURE, command_queue->context, - "Deferred Allocation Failed"); + ERR_RET(CL_MEM_OBJECT_ALLOCATION_FAILURE, command_queue->context, + "Deferred Allocation Failed"); } if (!acl_bind_buffer_to_device(command_queue->device, dst_image)) { - UNLOCK_ERR_RET(CL_MEM_OBJECT_ALLOCATION_FAILURE, command_queue->context, - "Deferred Allocation Failed"); + ERR_RET(CL_MEM_OBJECT_ALLOCATION_FAILURE, command_queue->context, + "Deferred Allocation Failed"); } { @@ -2704,7 +2678,7 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueCopyBufferToImageIntelFPGA( tmp_slice_pitch, dst_image, tmp_dst_offset, 0, 0, tmp_cb, num_events_in_wait_list, event_wait_list, event, CL_COMMAND_WRITE_BUFFER, 0); - UNLOCK_RETURN(ret); + return ret; } } @@ -2733,35 +2707,35 @@ CL_API_ENTRY void *CL_API_CALL clEnqueueMapImageIntelFPGA( size_t element_size; size_t tmp_row_pitch; size_t tmp_slice_pitch; - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; if (image != NULL) { element_size = acl_get_image_element_size( image->context, image->fields.image_objs.image_format, errcode_ret); if (*errcode_ret != CL_SUCCESS) { - UNLOCK_RETURN(NULL); + return NULL; } } else { element_size = 0; } if (!acl_command_queue_is_valid(command_queue)) { - UNLOCK_BAIL(CL_INVALID_COMMAND_QUEUE); + BAIL(CL_INVALID_COMMAND_QUEUE); } context = command_queue->context; if (!acl_mem_is_valid(image)) { - UNLOCK_BAIL_INFO(CL_INVALID_MEM_OBJECT, context, "Invalid memory object"); + BAIL_INFO(CL_INVALID_MEM_OBJECT, context, "Invalid memory object"); } if (command_queue->context != image->context) { - UNLOCK_BAIL_INFO( + BAIL_INFO( CL_INVALID_CONTEXT, context, "Command queue and memory object are not associated with the same " "context"); } if (!acl_bind_buffer_to_device(command_queue->device, image)) { - UNLOCK_BAIL_INFO(CL_MEM_OBJECT_ALLOCATION_FAILURE, context, - "Deferred Allocation Failed"); + BAIL_INFO(CL_MEM_OBJECT_ALLOCATION_FAILURE, context, + "Deferred Allocation Failed"); } // Check if we can physically map the data into place. @@ -2769,18 +2743,18 @@ CL_API_ENTRY void *CL_API_CALL clEnqueueMapImageIntelFPGA( // have backing store for it. if (!image->block_allocation->region->is_host_accessible && !image->host_mem.aligned_ptr) { - UNLOCK_BAIL_INFO(CL_MAP_FAILURE, context, - "Could not map the image into host memory"); + BAIL_INFO(CL_MAP_FAILURE, context, + "Could not map the image into host memory"); } if (!is_image(image)) { - UNLOCK_BAIL_INFO(CL_INVALID_MEM_OBJECT, command_queue->context, - "Memory object is not an image"); + BAIL_INFO(CL_INVALID_MEM_OBJECT, command_queue->context, + "Memory object is not an image"); } if (image_row_pitch == NULL) { - UNLOCK_BAIL_INFO(CL_INVALID_VALUE, command_queue->context, - "Invalid row pitch provided"); + BAIL_INFO(CL_INVALID_VALUE, command_queue->context, + "Invalid row pitch provided"); } else { tmp_row_pitch = image->fields.image_objs.image_desc->image_width * element_size; @@ -2791,8 +2765,8 @@ CL_API_ENTRY void *CL_API_CALL clEnqueueMapImageIntelFPGA( image->mem_object_type == CL_MEM_OBJECT_IMAGE2D_ARRAY || image->mem_object_type == CL_MEM_OBJECT_IMAGE1D_ARRAY) && image_slice_pitch == NULL) { - UNLOCK_BAIL_INFO(CL_INVALID_VALUE, command_queue->context, - "Invalid slice pitch provided"); + BAIL_INFO(CL_INVALID_VALUE, command_queue->context, + "Invalid slice pitch provided"); } else { if (image->mem_object_type == CL_MEM_OBJECT_IMAGE2D || image->mem_object_type == CL_MEM_OBJECT_IMAGE1D || @@ -2858,7 +2832,7 @@ CL_API_ENTRY void *CL_API_CALL clEnqueueMapImageIntelFPGA( acl_print_debug_msg(" map: ref count is %u\n", acl_ref_count(image)); if (status != CL_SUCCESS) - UNLOCK_BAIL(status); // already signalled callback + BAIL(status); // already signalled callback // The enqueue of the mem transfer will retain the buffer. } else { @@ -2868,7 +2842,7 @@ CL_API_ENTRY void *CL_API_CALL clEnqueueMapImageIntelFPGA( acl_create_event(command_queue, num_events_in_wait_list, event_wait_list, CL_COMMAND_MAP_BUFFER, &local_event); if (status != CL_SUCCESS) - UNLOCK_BAIL(status); // already signalled callback + BAIL(status); // already signalled callback // Mark it as the trivial map buffer case. local_event->cmd.trivial = 1; local_event->cmd.info.trivial_mem_mapping.mem = image; @@ -2915,7 +2889,7 @@ CL_API_ENTRY void *CL_API_CALL clEnqueueMapImageIntelFPGA( } acl_dump_mem_internal(image); - UNLOCK_RETURN(result); + return result; } ACL_EXPORT @@ -2953,38 +2927,38 @@ CL_API_ENTRY void *CL_API_CALL clEnqueueMapBufferIntelFPGA( cl_event local_event = 0; // used for blocking cl_context context; cl_int status; - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; if (!acl_command_queue_is_valid(command_queue)) { - UNLOCK_BAIL(CL_INVALID_COMMAND_QUEUE); + BAIL(CL_INVALID_COMMAND_QUEUE); } context = command_queue->context; if (!acl_mem_is_valid(buffer)) { - UNLOCK_BAIL_INFO(CL_INVALID_MEM_OBJECT, context, "Invalid memory object"); + BAIL_INFO(CL_INVALID_MEM_OBJECT, context, "Invalid memory object"); } if (command_queue->context != buffer->context) { - UNLOCK_BAIL_INFO( + BAIL_INFO( CL_INVALID_CONTEXT, context, "Command queue and memory object are not associated with the same " "context"); } if (!acl_bind_buffer_to_device(command_queue->device, buffer)) { - UNLOCK_BAIL_INFO(CL_MEM_OBJECT_ALLOCATION_FAILURE, context, - "Deferred Allocation Failed"); + BAIL_INFO(CL_MEM_OBJECT_ALLOCATION_FAILURE, context, + "Deferred Allocation Failed"); } // Check flags if (map_flags & ~(CL_MAP_READ | CL_MAP_WRITE | CL_MAP_WRITE_INVALIDATE_REGION)) { - UNLOCK_BAIL_INFO(CL_INVALID_VALUE, context, "Invalid or unsupported flags"); + BAIL_INFO(CL_INVALID_VALUE, context, "Invalid or unsupported flags"); } if (((map_flags & CL_MAP_READ) & (map_flags & CL_MAP_WRITE_INVALIDATE_REGION)) || ((map_flags & CL_MAP_WRITE) & (map_flags & CL_MAP_WRITE_INVALIDATE_REGION))) { - UNLOCK_BAIL_INFO( + BAIL_INFO( CL_INVALID_VALUE, context, "CL_MAP_READ or CL_MAP_WRITE and CL_MAP_WRITE_INVALIDATE_REGION are " "specified but are mutually exclusive"); @@ -2996,13 +2970,13 @@ CL_API_ENTRY void *CL_API_CALL clEnqueueMapBufferIntelFPGA( cl_mem_flags flags = buffer->flags; if (!buffer->block_allocation->region->is_host_accessible && !buffer->host_mem.aligned_ptr && !(flags & CL_MEM_USE_HOST_PTR)) { - UNLOCK_BAIL_INFO(CL_MAP_FAILURE, context, - "Could not map the buffer into host memory"); + BAIL_INFO(CL_MAP_FAILURE, context, + "Could not map the buffer into host memory"); } if (offset + cb > buffer->size) { - UNLOCK_BAIL_INFO(CL_INVALID_VALUE, context, - "Requested offset and byte count exceeds the buffer size"); + BAIL_INFO(CL_INVALID_VALUE, context, + "Requested offset and byte count exceeds the buffer size"); } if (flags & CL_MEM_USE_HOST_PTR) { @@ -3031,7 +3005,7 @@ CL_API_ENTRY void *CL_API_CALL clEnqueueMapBufferIntelFPGA( acl_print_debug_msg(" map: ref count is %u\n", acl_ref_count(buffer)); if (status != CL_SUCCESS) - UNLOCK_BAIL(status); // already signalled callback + BAIL(status); // already signalled callback } else if (!buffer->block_allocation->region->is_host_accessible) { size_t tmp_src_offset[3]; @@ -3071,7 +3045,7 @@ CL_API_ENTRY void *CL_API_CALL clEnqueueMapBufferIntelFPGA( acl_print_debug_msg(" map: ref count is %u\n", acl_ref_count(buffer)); if (status != CL_SUCCESS) - UNLOCK_BAIL(status); // already signalled callback + BAIL(status); // already signalled callback // The enqueue of the mem transfer will retain the buffer. } else { @@ -3080,7 +3054,7 @@ CL_API_ENTRY void *CL_API_CALL clEnqueueMapBufferIntelFPGA( status = acl_create_event(command_queue, num_events, events, CL_COMMAND_MAP_BUFFER, &local_event); if (status != CL_SUCCESS) - UNLOCK_BAIL(status); // already signalled callback + BAIL(status); // already signalled callback // Mark it as the trivial map buffer case. local_event->cmd.trivial = 1; local_event->cmd.info.trivial_mem_mapping.mem = buffer; @@ -3131,7 +3105,7 @@ CL_API_ENTRY void *CL_API_CALL clEnqueueMapBufferIntelFPGA( } } acl_dump_mem_internal(buffer); - UNLOCK_RETURN(result); + return result; } ACL_EXPORT @@ -3152,28 +3126,27 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueUnmapMemObjectIntelFPGA( cl_context context; cl_int status; char *valid_base_ptr; - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; if (!acl_command_queue_is_valid(command_queue)) { - UNLOCK_RETURN(CL_INVALID_COMMAND_QUEUE); + return CL_INVALID_COMMAND_QUEUE; } context = command_queue->context; if (!acl_mem_is_valid(mem)) { - UNLOCK_ERR_RET(CL_INVALID_MEM_OBJECT, context, "Memory object is invalid"); + ERR_RET(CL_INVALID_MEM_OBJECT, context, "Memory object is invalid"); } if (command_queue->context != mem->context) { - UNLOCK_ERR_RET( - CL_INVALID_CONTEXT, context, - "Command queue and memory object are not associated with the " - "same context"); + ERR_RET(CL_INVALID_CONTEXT, context, + "Command queue and memory object are not associated with the " + "same context"); } cl_mem_flags flags = mem->flags; if ((!mem->block_allocation->region->is_host_accessible && !mem->host_mem.aligned_ptr && !(flags & CL_MEM_USE_HOST_PTR)) || mem->allocation_deferred) { - UNLOCK_ERR_RET(CL_MAP_FAILURE, context, - "Could not have mapped the buffer into host memory"); + ERR_RET(CL_MAP_FAILURE, context, + "Could not have mapped the buffer into host memory"); } // Necessary sanity check on the pointer. @@ -3187,12 +3160,12 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueUnmapMemObjectIntelFPGA( : mem->block_allocation->range.begin); } if ((valid_base_ptr - (char *)mapped_ptr) > 0) { - UNLOCK_ERR_RET(CL_INVALID_VALUE, context, - "Invalid mapped_ptr argument: it lies outside the buffer"); + ERR_RET(CL_INVALID_VALUE, context, + "Invalid mapped_ptr argument: it lies outside the buffer"); } if (((char *)mapped_ptr - (valid_base_ptr + mem->size)) >= 0) { - UNLOCK_ERR_RET(CL_INVALID_VALUE, context, - "Invalid mapped_ptr argument: it lies outside the buffer"); + ERR_RET(CL_INVALID_VALUE, context, + "Invalid mapped_ptr argument: it lies outside the buffer"); } // This is the mirror image of mapping the buffer in the first place. @@ -3214,7 +3187,7 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueUnmapMemObjectIntelFPGA( size_t image_element_size = acl_get_image_element_size( mem->context, mem->fields.image_objs.image_format, &status); if (status != CL_SUCCESS) { - UNLOCK_RETURN(status); + return status; } tmp_cb[0] = @@ -3249,7 +3222,7 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueUnmapMemObjectIntelFPGA( // execution time. 0); if (status != CL_SUCCESS) - UNLOCK_RETURN(status); // already signalled callback + return status; // already signalled callback acl_print_debug_msg("mem[%p] enqueue unmap. refcount %u\n", mem, acl_ref_count(mem)); @@ -3271,7 +3244,7 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueUnmapMemObjectIntelFPGA( size_t image_element_size = acl_get_image_element_size( mem->context, mem->fields.image_objs.image_format, &status); if (status != CL_SUCCESS) { - UNLOCK_RETURN(status); + return status; } tmp_cb[0] = @@ -3306,14 +3279,14 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueUnmapMemObjectIntelFPGA( // execution time. 0); if (status != CL_SUCCESS) - UNLOCK_RETURN(status); // already signalled callback + return status; // already signalled callback acl_print_debug_msg("mem[%p] enqueue unmap. refcount %u\n", mem, acl_ref_count(mem)); } else { status = acl_create_event(command_queue, num_events, events, CL_COMMAND_UNMAP_MEM_OBJECT, &local_event); if (status != CL_SUCCESS) - UNLOCK_RETURN(status); // already signalled callback + return status; // already signalled callback local_event->cmd.trivial = 1; local_event->cmd.info.trivial_mem_mapping.mem = mem; // Should retain the memory object so that its metadata will stick around @@ -3334,7 +3307,7 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueUnmapMemObjectIntelFPGA( clReleaseEvent(local_event); acl_idle_update(command_queue->context); // Clean up early } - UNLOCK_RETURN(CL_SUCCESS); + return CL_SUCCESS; } ACL_EXPORT @@ -3354,7 +3327,7 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueReadBufferIntelFPGA( size_t tmp_src_offset[3]; size_t tmp_dst_offset[3]; size_t tmp_cb[3]; - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; tmp_src_offset[0] = offset; tmp_src_offset[1] = 0; @@ -3367,15 +3340,15 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueReadBufferIntelFPGA( tmp_cb[2] = 1; if (!acl_command_queue_is_valid(command_queue)) { - UNLOCK_RETURN(CL_INVALID_COMMAND_QUEUE); + return CL_INVALID_COMMAND_QUEUE; } if (ptr == 0) { - UNLOCK_ERR_RET(CL_INVALID_VALUE, command_queue->context, - "Pointer argument cannot be NULL"); + ERR_RET(CL_INVALID_VALUE, command_queue->context, + "Pointer argument cannot be NULL"); } if (!acl_bind_buffer_to_device(command_queue->device, buffer)) { - UNLOCK_ERR_RET(CL_MEM_OBJECT_ALLOCATION_FAILURE, command_queue->context, - "Deferred Allocation Failed"); + ERR_RET(CL_MEM_OBJECT_ALLOCATION_FAILURE, command_queue->context, + "Deferred Allocation Failed"); } { @@ -3384,7 +3357,7 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueReadBufferIntelFPGA( command_queue->context->unwrapped_host_mem, tmp_dst_offset, // see creation of the unwrapped_host_mem 0, 0, tmp_cb, num_events, events, event, CL_COMMAND_READ_BUFFER, 0); - UNLOCK_RETURN(ret); + return ret; } } @@ -3410,7 +3383,7 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueReadBufferRectIntelFPGA( size_t tmp_src_offset[3]; size_t tmp_dst_offset[3]; size_t tmp_cb[3]; - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; if (buffer_row_pitch == 0) { buffer_row_pitch = region[0]; @@ -3436,19 +3409,18 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueReadBufferRectIntelFPGA( tmp_cb[2] = region[2]; if (!acl_command_queue_is_valid(command_queue)) { - UNLOCK_RETURN(CL_INVALID_COMMAND_QUEUE); + return CL_INVALID_COMMAND_QUEUE; } if (!acl_mem_is_valid(buffer)) { - UNLOCK_ERR_RET(CL_INVALID_MEM_OBJECT, command_queue->context, - "Buffer is invalid"); + ERR_RET(CL_INVALID_MEM_OBJECT, command_queue->context, "Buffer is invalid"); } if (ptr == 0) { - UNLOCK_ERR_RET(CL_INVALID_VALUE, command_queue->context, - "Pointer argument cannot be NULL"); + ERR_RET(CL_INVALID_VALUE, command_queue->context, + "Pointer argument cannot be NULL"); } if (!acl_bind_buffer_to_device(command_queue->device, buffer)) { - UNLOCK_ERR_RET(CL_MEM_OBJECT_ALLOCATION_FAILURE, command_queue->context, - "Deferred Allocation Failed"); + ERR_RET(CL_MEM_OBJECT_ALLOCATION_FAILURE, command_queue->context, + "Deferred Allocation Failed"); } { cl_int ret = l_enqueue_mem_transfer( @@ -3457,7 +3429,7 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueReadBufferRectIntelFPGA( tmp_dst_offset, // see creation of the unwrapped_host_mem host_row_pitch, host_slice_pitch, tmp_cb, num_events_in_wait_list, event_wait_list, event, CL_COMMAND_READ_BUFFER, 0); - UNLOCK_RETURN(ret); + return ret; } } @@ -3483,7 +3455,7 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueWriteBufferIntelFPGA( size_t tmp_src_offset[3]; size_t tmp_dst_offset[3]; size_t tmp_cb[3]; - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; tmp_src_offset[0] = (size_t)((char *)ptr - (const char *)ACL_MEM_ALIGN); tmp_src_offset[1] = 0; @@ -3496,11 +3468,11 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueWriteBufferIntelFPGA( tmp_cb[2] = 1; if (!acl_command_queue_is_valid(command_queue)) { - UNLOCK_RETURN(CL_INVALID_COMMAND_QUEUE); + return CL_INVALID_COMMAND_QUEUE; } if (!acl_bind_buffer_to_device(command_queue->device, buffer)) { - UNLOCK_ERR_RET(CL_MEM_OBJECT_ALLOCATION_FAILURE, command_queue->context, - "Deferred Allocation Failed"); + ERR_RET(CL_MEM_OBJECT_ALLOCATION_FAILURE, command_queue->context, + "Deferred Allocation Failed"); } { @@ -3509,7 +3481,7 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueWriteBufferIntelFPGA( command_queue->context->unwrapped_host_mem, tmp_src_offset, 0, 0, buffer, tmp_dst_offset, 0, 0, tmp_cb, num_events, events, event, CL_COMMAND_WRITE_BUFFER, 0); - UNLOCK_RETURN(ret); + return ret; } } @@ -3534,7 +3506,7 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueWriteBufferRectIntelFPGA( size_t tmp_src_offset[3]; size_t tmp_dst_offset[3]; size_t tmp_cb[3]; - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; if (buffer_row_pitch == 0) { buffer_row_pitch = region[0]; @@ -3560,19 +3532,18 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueWriteBufferRectIntelFPGA( tmp_cb[2] = region[2]; if (!acl_command_queue_is_valid(command_queue)) { - UNLOCK_RETURN(CL_INVALID_COMMAND_QUEUE); + return CL_INVALID_COMMAND_QUEUE; } if (!acl_mem_is_valid(buffer)) { - UNLOCK_ERR_RET(CL_INVALID_MEM_OBJECT, command_queue->context, - "Buffer is invalid"); + ERR_RET(CL_INVALID_MEM_OBJECT, command_queue->context, "Buffer is invalid"); } if (ptr == 0) { - UNLOCK_ERR_RET(CL_INVALID_VALUE, command_queue->context, - "Pointer argument cannot be NULL"); + ERR_RET(CL_INVALID_VALUE, command_queue->context, + "Pointer argument cannot be NULL"); } if (!acl_bind_buffer_to_device(command_queue->device, buffer)) { - UNLOCK_ERR_RET(CL_MEM_OBJECT_ALLOCATION_FAILURE, command_queue->context, - "Deferred Allocation Failed"); + ERR_RET(CL_MEM_OBJECT_ALLOCATION_FAILURE, command_queue->context, + "Deferred Allocation Failed"); } { cl_int ret = l_enqueue_mem_transfer( @@ -3582,7 +3553,7 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueWriteBufferRectIntelFPGA( tmp_dst_offset, // see creation of the unwrapped_host_mem buffer_row_pitch, buffer_slice_pitch, tmp_cb, num_events_in_wait_list, event_wait_list, event, CL_COMMAND_WRITE_BUFFER, 0); - UNLOCK_RETURN(ret); + return ret; } } @@ -3612,53 +3583,50 @@ CL_API_ENTRY cl_int clEnqueueFillBufferIntelFPGA( char *ptr; cl_event tmp_event; - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; if (!acl_command_queue_is_valid(command_queue)) { - UNLOCK_RETURN(CL_INVALID_COMMAND_QUEUE); + return CL_INVALID_COMMAND_QUEUE; } if (!acl_mem_is_valid(buffer)) { - UNLOCK_ERR_RET(CL_INVALID_MEM_OBJECT, command_queue->context, - "Buffer is invalid"); + ERR_RET(CL_INVALID_MEM_OBJECT, command_queue->context, "Buffer is invalid"); } // Pattern size can only be {1,2,4,8,...,1024 sizeof(double16)}. if (pattern_size == 0 || pattern_size > 1024 || (pattern_size & (pattern_size - 1))) { - UNLOCK_ERR_RET(CL_INVALID_VALUE, command_queue->context, - "Invalid pattern size"); + ERR_RET(CL_INVALID_VALUE, command_queue->context, "Invalid pattern size"); } if (offset % pattern_size != 0 || size % pattern_size != 0) { - UNLOCK_ERR_RET(CL_INVALID_VALUE, command_queue->context, - "Offset and size must be a multiple of pattern size"); + ERR_RET(CL_INVALID_VALUE, command_queue->context, + "Offset and size must be a multiple of pattern size"); } if (pattern == NULL) { - UNLOCK_ERR_RET(CL_INVALID_VALUE, command_queue->context, - "pattern cannot be NULL"); + ERR_RET(CL_INVALID_VALUE, command_queue->context, "pattern cannot be NULL"); } if (!acl_bind_buffer_to_device(command_queue->device, buffer)) { - UNLOCK_ERR_RET(CL_MEM_OBJECT_ALLOCATION_FAILURE, command_queue->context, - "Deferred Allocation Failed"); + ERR_RET(CL_MEM_OBJECT_ALLOCATION_FAILURE, command_queue->context, + "Deferred Allocation Failed"); } // This array is passed to clSetEventCallback for releasing the // allocated memory and releasing the event, if *event is null. void **callback_data = (void **)acl_malloc(sizeof(void *) * 2); if (!callback_data) { - UNLOCK_ERR_RET(CL_OUT_OF_HOST_MEMORY, command_queue->context, - "Out of host memory"); + ERR_RET(CL_OUT_OF_HOST_MEMORY, command_queue->context, + "Out of host memory"); } acl_aligned_ptr_t *aligned_ptr = (acl_aligned_ptr_t *)acl_malloc(sizeof(acl_aligned_ptr_t)); if (!aligned_ptr) { acl_free(callback_data); - UNLOCK_ERR_RET(CL_OUT_OF_HOST_MEMORY, command_queue->context, - "Out of host memory"); + ERR_RET(CL_OUT_OF_HOST_MEMORY, command_queue->context, + "Out of host memory"); } // Replicating the pattern, size/pattern_size times. @@ -3667,8 +3635,8 @@ CL_API_ENTRY cl_int clEnqueueFillBufferIntelFPGA( if (!ptr) { acl_free(aligned_ptr); acl_free(callback_data); - UNLOCK_ERR_RET(CL_OUT_OF_HOST_MEMORY, command_queue->context, - "Out of host memory"); + ERR_RET(CL_OUT_OF_HOST_MEMORY, command_queue->context, + "Out of host memory"); } for (cl_uint i = 0; i < size / pattern_size; i++) { @@ -3697,7 +3665,7 @@ CL_API_ENTRY cl_int clEnqueueFillBufferIntelFPGA( acl_mem_aligned_free(command_queue->context, aligned_ptr); acl_free(aligned_ptr); acl_free(callback_data); - UNLOCK_RETURN(ret); + return ret; } callback_data[0] = (void *)(aligned_ptr); if (event) { @@ -3711,7 +3679,7 @@ CL_API_ENTRY cl_int clEnqueueFillBufferIntelFPGA( clSetEventCallback(tmp_event, CL_COMPLETE, acl_free_allocation_after_event_completion, (void *)callback_data); - UNLOCK_RETURN(ret); + return ret; } } @@ -3736,7 +3704,7 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueCopyBufferIntelFPGA( size_t tmp_src_offset[3]; size_t tmp_dst_offset[3]; size_t tmp_cb[3]; - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; tmp_src_offset[0] = src_offset; tmp_src_offset[1] = 0; @@ -3749,23 +3717,23 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueCopyBufferIntelFPGA( tmp_cb[2] = 1; if (!acl_command_queue_is_valid(command_queue)) { - UNLOCK_RETURN(CL_INVALID_COMMAND_QUEUE); + return CL_INVALID_COMMAND_QUEUE; } if (!acl_mem_is_valid(src_buffer)) { - UNLOCK_ERR_RET(CL_INVALID_MEM_OBJECT, command_queue->context, - "Source buffer is invalid"); + ERR_RET(CL_INVALID_MEM_OBJECT, command_queue->context, + "Source buffer is invalid"); } if (!acl_mem_is_valid(dst_buffer)) { - UNLOCK_ERR_RET(CL_INVALID_MEM_OBJECT, command_queue->context, - "Destination buffer is invalid"); + ERR_RET(CL_INVALID_MEM_OBJECT, command_queue->context, + "Destination buffer is invalid"); } if (!acl_bind_buffer_to_device(command_queue->device, src_buffer)) { - UNLOCK_ERR_RET(CL_MEM_OBJECT_ALLOCATION_FAILURE, command_queue->context, - "Deferred Allocation Failed"); + ERR_RET(CL_MEM_OBJECT_ALLOCATION_FAILURE, command_queue->context, + "Deferred Allocation Failed"); } if (!acl_bind_buffer_to_device(command_queue->device, dst_buffer)) { - UNLOCK_ERR_RET(CL_MEM_OBJECT_ALLOCATION_FAILURE, command_queue->context, - "Deferred Allocation Failed"); + ERR_RET(CL_MEM_OBJECT_ALLOCATION_FAILURE, command_queue->context, + "Deferred Allocation Failed"); } { @@ -3773,7 +3741,7 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueCopyBufferIntelFPGA( command_queue, 0, src_buffer, tmp_src_offset, 0, 0, dst_buffer, tmp_dst_offset, 0, 0, tmp_cb, num_events, events, event, CL_COMMAND_COPY_BUFFER, 0); - UNLOCK_RETURN(ret); + return ret; } } @@ -3797,7 +3765,7 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueCopyBufferRectIntelFPGA( size_t tmp_src_offset[3]; size_t tmp_dst_offset[3]; size_t tmp_cb[3]; - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; if (src_row_pitch == 0) { src_row_pitch = region[0]; @@ -3823,44 +3791,41 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueCopyBufferRectIntelFPGA( tmp_cb[2] = region[2]; if (!acl_command_queue_is_valid(command_queue)) { - UNLOCK_RETURN(CL_INVALID_COMMAND_QUEUE); + return CL_INVALID_COMMAND_QUEUE; } if (!acl_mem_is_valid(src_buffer)) { - UNLOCK_ERR_RET(CL_INVALID_MEM_OBJECT, command_queue->context, - "Source buffer is invalid"); + ERR_RET(CL_INVALID_MEM_OBJECT, command_queue->context, + "Source buffer is invalid"); } if (!acl_mem_is_valid(dst_buffer)) { - UNLOCK_ERR_RET(CL_INVALID_MEM_OBJECT, command_queue->context, - "Destination buffer is invalid"); + ERR_RET(CL_INVALID_MEM_OBJECT, command_queue->context, + "Destination buffer is invalid"); } if (!acl_bind_buffer_to_device(command_queue->device, src_buffer)) { - UNLOCK_ERR_RET(CL_MEM_OBJECT_ALLOCATION_FAILURE, command_queue->context, - "Deferred Allocation Failed"); + ERR_RET(CL_MEM_OBJECT_ALLOCATION_FAILURE, command_queue->context, + "Deferred Allocation Failed"); } if (!acl_bind_buffer_to_device(command_queue->device, dst_buffer)) { - UNLOCK_ERR_RET(CL_MEM_OBJECT_ALLOCATION_FAILURE, command_queue->context, - "Deferred Allocation Failed"); + ERR_RET(CL_MEM_OBJECT_ALLOCATION_FAILURE, command_queue->context, + "Deferred Allocation Failed"); } if (src_buffer == dst_buffer) { if (src_row_pitch != dst_row_pitch) { - UNLOCK_ERR_RET( - CL_INVALID_VALUE, command_queue->context, - "Source buffer and destination buffer are the same, but row " - "pitches do not match"); + ERR_RET(CL_INVALID_VALUE, command_queue->context, + "Source buffer and destination buffer are the same, but row " + "pitches do not match"); } if (src_slice_pitch != dst_slice_pitch) { - UNLOCK_ERR_RET( - CL_INVALID_VALUE, command_queue->context, - "Source buffer and destination buffer are the same, but slice " - "pitches do not match"); + ERR_RET(CL_INVALID_VALUE, command_queue->context, + "Source buffer and destination buffer are the same, but slice " + "pitches do not match"); } if (check_copy_overlap(tmp_src_offset, tmp_dst_offset, tmp_cb, src_row_pitch, src_slice_pitch)) { - UNLOCK_ERR_RET( - CL_MEM_COPY_OVERLAP, command_queue->context, - "Source buffer and destination buffer are the same and regions " - "overlaps"); + ERR_RET(CL_MEM_COPY_OVERLAP, command_queue->context, + "Source buffer and destination buffer are the same and regions " + "overlaps"); } } { @@ -3870,7 +3835,7 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueCopyBufferRectIntelFPGA( tmp_dst_offset, // see creation of the unwrapped_host_mem dst_row_pitch, dst_slice_pitch, tmp_cb, num_events_in_wait_list, event_wait_list, event, CL_COMMAND_COPY_BUFFER, 0); - UNLOCK_RETURN(ret); + return ret; } } @@ -3893,10 +3858,10 @@ CL_API_ENTRY cl_mem CL_API_CALL clCreatePipeIntelFPGA( cl_uint pipe_max_packets, const cl_pipe_properties *properties, cl_int *errcode_ret) { cl_mem mem; - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; if (!acl_context_is_valid(context)) { - UNLOCK_BAIL(CL_INVALID_CONTEXT); + BAIL(CL_INVALID_CONTEXT); } // Check flags @@ -3904,8 +3869,7 @@ CL_API_ENTRY cl_mem CL_API_CALL clCreatePipeIntelFPGA( // Check for invalid enum bits if (flags & ~(CL_MEM_READ_WRITE | CL_MEM_READ_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_WRITE_ONLY)) { - UNLOCK_BAIL_INFO(CL_INVALID_VALUE, context, - "Invalid or unsupported flags"); + BAIL_INFO(CL_INVALID_VALUE, context, "Invalid or unsupported flags"); } { @@ -3925,12 +3889,12 @@ CL_API_ENTRY cl_mem CL_API_CALL clCreatePipeIntelFPGA( // Check for exactly one read/write spec if (num_rw_specs > 1) { - UNLOCK_BAIL_INFO(CL_INVALID_VALUE, context, - "More than one read/write flag is specified"); + BAIL_INFO(CL_INVALID_VALUE, context, + "More than one read/write flag is specified"); } if (num_hostrw_specs > 1) { - UNLOCK_BAIL_INFO(CL_INVALID_VALUE, context, - "More than one host read/write flag is specified"); + BAIL_INFO(CL_INVALID_VALUE, context, + "More than one host read/write flag is specified"); } // Default to CL_MEM_READ_WRITE. @@ -3941,29 +3905,28 @@ CL_API_ENTRY cl_mem CL_API_CALL clCreatePipeIntelFPGA( if (((flags & CL_MEM_HOST_READ_ONLY) && (flags & CL_MEM_READ_ONLY)) || ((flags & CL_MEM_HOST_WRITE_ONLY) && (flags & CL_MEM_WRITE_ONLY)) || (num_hostrw_specs && (flags & CL_MEM_READ_WRITE))) { - UNLOCK_BAIL_INFO(CL_INVALID_VALUE, context, - "Conflicting read/write flags specified"); + BAIL_INFO(CL_INVALID_VALUE, context, + "Conflicting read/write flags specified"); } } } if (pipe_packet_size == 0) { - UNLOCK_BAIL_INFO(CL_INVALID_PIPE_SIZE, context, "Pipe packet size is zero"); + BAIL_INFO(CL_INVALID_PIPE_SIZE, context, "Pipe packet size is zero"); } if (pipe_packet_size > acl_platform.pipe_max_packet_size) { - UNLOCK_BAIL_INFO(CL_INVALID_PIPE_SIZE, context, - "Pipe packet size exceeds maximum allowed"); + BAIL_INFO(CL_INVALID_PIPE_SIZE, context, + "Pipe packet size exceeds maximum allowed"); } if (properties != NULL) { - UNLOCK_BAIL_INFO(CL_INVALID_VALUE, context, - "Properties must be NULL for pipes"); + BAIL_INFO(CL_INVALID_VALUE, context, "Properties must be NULL for pipes"); } mem = acl_alloc_cl_mem(); if (!mem) { - UNLOCK_BAIL_INFO(CL_OUT_OF_HOST_MEMORY, context, - "Could not allocate a cl_mem object"); + BAIL_INFO(CL_OUT_OF_HOST_MEMORY, context, + "Could not allocate a cl_mem object"); } acl_reset_ref_count(mem); @@ -3994,8 +3957,8 @@ CL_API_ENTRY cl_mem CL_API_CALL clCreatePipeIntelFPGA( host_pipe_info = acl_new(); if (!host_pipe_info) { acl_free_cl_mem(mem); - UNLOCK_BAIL_INFO(CL_OUT_OF_HOST_MEMORY, context, - "Could not allocate memory for internal data structure"); + BAIL_INFO(CL_OUT_OF_HOST_MEMORY, context, + "Could not allocate memory for internal data structure"); } host_pipe_info->m_physical_device_id = 0; host_pipe_info->m_channel_handle = -1; @@ -4022,7 +3985,7 @@ CL_API_ENTRY cl_mem CL_API_CALL clCreatePipeIntelFPGA( acl_track_object(ACL_OBJ_MEM_OBJECT, mem); - UNLOCK_RETURN(mem); + return mem; } ACL_EXPORT @@ -4038,17 +4001,17 @@ ACL_EXPORT CL_API_ENTRY cl_int CL_API_CALL clGetPipeInfoIntelFPGA( cl_mem pipe, cl_pipe_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret) { acl_result_t result; - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; RESULT_INIT; if (!acl_mem_is_valid(pipe)) { - UNLOCK_RETURN(CL_INVALID_MEM_OBJECT); + return CL_INVALID_MEM_OBJECT; } // Wrong object type if (pipe->mem_object_type != CL_MEM_OBJECT_PIPE) { - UNLOCK_RETURN(CL_INVALID_MEM_OBJECT); + return CL_INVALID_MEM_OBJECT; } switch (param_name) { @@ -4064,14 +4027,14 @@ ACL_EXPORT CL_API_ENTRY cl_int CL_API_CALL clGetPipeInfoIntelFPGA( if (result.size == 0) { // We didn't implement the enum. Error out semi-gracefully. - UNLOCK_RETURN(CL_INVALID_VALUE); + return CL_INVALID_VALUE; } if (param_value) { // Actually try to return the string. if (param_value_size < result.size) { // Buffer is too small to hold the return value. - UNLOCK_RETURN(CL_INVALID_VALUE); + return CL_INVALID_VALUE; } RESULT_COPY(param_value, param_value_size); } @@ -4079,7 +4042,7 @@ ACL_EXPORT CL_API_ENTRY cl_int CL_API_CALL clGetPipeInfoIntelFPGA( if (param_value_size_ret) { *param_value_size_ret = result.size; } - UNLOCK_RETURN(CL_SUCCESS); + return CL_SUCCESS; } ACL_EXPORT CL_API_ENTRY cl_int CL_API_CALL @@ -4101,35 +4064,34 @@ ACL_EXPORT CL_API_ENTRY cl_int CL_API_CALL clEnqueueMigrateMemObjectsIntelFPGA( unsigned int mem_id; int *needs_release_on_fail; - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; if (!acl_command_queue_is_valid(command_queue)) { - UNLOCK_RETURN(CL_INVALID_COMMAND_QUEUE); + return CL_INVALID_COMMAND_QUEUE; } if (num_mem_objects == 0) { - UNLOCK_ERR_RET(CL_INVALID_VALUE, command_queue->context, - "Number of memory objects is zero"); + ERR_RET(CL_INVALID_VALUE, command_queue->context, + "Number of memory objects is zero"); } if (mem_objects == 0) { - UNLOCK_ERR_RET(CL_INVALID_VALUE, command_queue->context, - "Array of memory objects is NULL"); + ERR_RET(CL_INVALID_VALUE, command_queue->context, + "Array of memory objects is NULL"); } for (i = 0; i < num_mem_objects; ++i) { if (!acl_mem_is_valid(mem_objects[i])) { - UNLOCK_RETURN(CL_INVALID_MEM_OBJECT); + return CL_INVALID_MEM_OBJECT; } if (command_queue->context != mem_objects[i]->context) { - UNLOCK_RETURN(CL_INVALID_CONTEXT); + return CL_INVALID_CONTEXT; } } if (flags != 0 && (flags & ~(CL_MIGRATE_MEM_OBJECT_HOST | CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED))) { - UNLOCK_ERR_RET(CL_INVALID_VALUE, command_queue->context, - "Invalid flags provided"); + ERR_RET(CL_INVALID_VALUE, command_queue->context, "Invalid flags provided"); } physical_id = command_queue->device->def.physical_device_id; @@ -4138,8 +4100,8 @@ ACL_EXPORT CL_API_ENTRY cl_int CL_API_CALL clEnqueueMigrateMemObjectsIntelFPGA( int tmp_mem_id = acl_get_default_device_global_memory(command_queue->device->def); if (tmp_mem_id < 0) { - UNLOCK_ERR_RET(CL_OUT_OF_RESOURCES, command_queue->context, - "Can not find default global memory system"); + ERR_RET(CL_OUT_OF_RESOURCES, command_queue->context, + "Can not find default global memory system"); } mem_id = (unsigned int)tmp_mem_id; @@ -4180,7 +4142,7 @@ ACL_EXPORT CL_API_ENTRY cl_int CL_API_CALL clEnqueueMigrateMemObjectsIntelFPGA( mem_objects[i]->reserved_allocations_count[physical_id][mem_id]--; } free(needs_release_on_fail); - UNLOCK_RETURN(status); + return status; } // All space is reserved, create an event/command to actually move the data at @@ -4191,7 +4153,7 @@ ACL_EXPORT CL_API_ENTRY cl_int CL_API_CALL clEnqueueMigrateMemObjectsIntelFPGA( if (status != CL_SUCCESS) { free(needs_release_on_fail); - UNLOCK_RETURN(status); // already signalled callback + return status; // already signalled callback } local_event->cmd.info.memory_migration.num_mem_objects = num_mem_objects; @@ -4203,7 +4165,7 @@ ACL_EXPORT CL_API_ENTRY cl_int CL_API_CALL clEnqueueMigrateMemObjectsIntelFPGA( num_mem_objects * sizeof(acl_mem_migrate_wrapper_t)); if (!new_src_mem_list) { - UNLOCK_RETURN(CL_OUT_OF_RESOURCES); + return CL_OUT_OF_RESOURCES; } local_event->cmd.info.memory_migration.src_mem_list = new_src_mem_list; @@ -4213,7 +4175,7 @@ ACL_EXPORT CL_API_ENTRY cl_int CL_API_CALL clEnqueueMigrateMemObjectsIntelFPGA( num_mem_objects * sizeof(acl_mem_migrate_wrapper_t)); if (!local_event->cmd.info.memory_migration.src_mem_list) { - UNLOCK_RETURN(CL_OUT_OF_RESOURCES); + return CL_OUT_OF_RESOURCES; } } @@ -4238,7 +4200,7 @@ ACL_EXPORT CL_API_ENTRY cl_int CL_API_CALL clEnqueueMigrateMemObjectsIntelFPGA( free(needs_release_on_fail); - UNLOCK_RETURN(CL_SUCCESS); + return CL_SUCCESS; } ACL_EXPORT CL_API_ENTRY cl_int CL_API_CALL clEnqueueMigrateMemObjects( @@ -4592,10 +4554,10 @@ void acl_mem_destructor_callback(cl_mem memobj) { cb_head = cb_head->next; memobj->destructor_callback_list = cb_head; acl_free(temp); - - lock_count = acl_suspend_lock(); - mem_destructor_notify_fn(memobj, notify_user_data); - acl_resume_lock(lock_count); + { + acl_suspend_lock_guard l(acl_mutex_wrapper); + mem_destructor_notify_fn(memobj, notify_user_data); + } } } diff --git a/src/acl_platform.cpp b/src/acl_platform.cpp index 0a6ba6e5..687aa093 100644 --- a/src/acl_platform.cpp +++ b/src/acl_platform.cpp @@ -89,7 +89,7 @@ ACL_EXPORT CL_API_ENTRY cl_int CL_API_CALL clGetPlatformIDsIntelFPGA(cl_uint num_entries, cl_platform_id *platforms, cl_uint *num_platforms_ret) { - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; // Set this in case of early return due to error in other arguments. if (num_platforms_ret) { @@ -97,10 +97,10 @@ clGetPlatformIDsIntelFPGA(cl_uint num_entries, cl_platform_id *platforms, } if (platforms && num_entries <= 0) { - UNLOCK_RETURN(CL_INVALID_VALUE); + return CL_INVALID_VALUE; } if (num_platforms_ret == 0 && platforms == 0) { - UNLOCK_RETURN(CL_INVALID_VALUE); + return CL_INVALID_VALUE; } // We want to support two kinds of flows: @@ -133,21 +133,21 @@ clGetPlatformIDsIntelFPGA(cl_uint num_entries, cl_platform_id *platforms, // acl_platform.initialized = 1. result = acl_init_from_hal_discovery(); if (!result) { - UNLOCK_RETURN(CL_PLATFORM_NOT_FOUND_KHR); + return CL_PLATFORM_NOT_FOUND_KHR; } } if (!acl_get_hal()) { - UNLOCK_RETURN(CL_PLATFORM_NOT_FOUND_KHR); + return CL_PLATFORM_NOT_FOUND_KHR; } if (!acl_platform.initialized) { - UNLOCK_RETURN(CL_PLATFORM_NOT_FOUND_KHR); + return CL_PLATFORM_NOT_FOUND_KHR; } // Return some data if (platforms) { platforms[0] = &acl_platform; } - UNLOCK_RETURN(CL_SUCCESS); + return CL_SUCCESS; } ACL_EXPORT @@ -170,14 +170,14 @@ CL_API_ENTRY cl_int CL_API_CALL clGetPlatformInfoIntelFPGA( size_t param_value_size, void *param_value, size_t *param_value_size_ret) { const char *str = 0; size_t result_len; - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; if (!acl_platform_is_valid(platform)) { - UNLOCK_RETURN(CL_INVALID_PLATFORM); + return CL_INVALID_PLATFORM; } - UNLOCK_VALIDATE_ARRAY_OUT_ARGS(param_value_size, param_value, - param_value_size_ret, 0); + VALIDATE_ARRAY_OUT_ARGS(param_value_size, param_value, param_value_size_ret, + 0); switch (param_name) { // We don't offer an online compiler. @@ -200,7 +200,7 @@ CL_API_ENTRY cl_int CL_API_CALL clGetPlatformInfoIntelFPGA( str = acl_platform.suffix; break; default: - UNLOCK_RETURN(CL_INVALID_VALUE); + return CL_INVALID_VALUE; break; } assert(str); @@ -210,7 +210,7 @@ CL_API_ENTRY cl_int CL_API_CALL clGetPlatformInfoIntelFPGA( // Actually try to return the string. if (param_value_size < result_len) { // Buffer is too small to hold the return value. - UNLOCK_RETURN(CL_INVALID_VALUE); + return CL_INVALID_VALUE; } strncpy((char *)param_value, str, result_len); } @@ -219,7 +219,7 @@ CL_API_ENTRY cl_int CL_API_CALL clGetPlatformInfoIntelFPGA( *param_value_size_ret = result_len; } - UNLOCK_RETURN(CL_SUCCESS); + return CL_SUCCESS; } ACL_EXPORT @@ -234,13 +234,13 @@ ACL_EXPORT CL_API_ENTRY cl_int CL_API_CALL clUnloadPlatformCompilerIntelFPGA(cl_platform_id platform) { // Not fully implemented yet. - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; if (!acl_platform_is_valid(platform)) { - UNLOCK_RETURN(CL_INVALID_PLATFORM); + return CL_INVALID_PLATFORM; } // For the sake of MSVC compiler warnings. // We don't have any platform compilers, so unloading is successful! - UNLOCK_RETURN(CL_SUCCESS); + return CL_SUCCESS; } ACL_EXPORT @@ -896,8 +896,9 @@ void acl_receive_device_exception(unsigned physical_device_id, // acl_assert_locked_or_sig() instead of just acl_assert_locked(). CL_EXCEPTION_TYPE_INTEL current_exception, listen_mask; + std::unique_lock lock{acl_mutex_wrapper, std::defer_lock}; if (!acl_is_inside_sig()) { - acl_lock(); + lock.lock(); } current_exception = acl_platform.device[physical_device_id].device_exception_status; @@ -950,22 +951,16 @@ void acl_receive_device_exception(unsigned physical_device_id, // callback acl_signal_device_update(); } - - if (!acl_is_inside_sig()) { - acl_unlock(); - } } ACL_EXPORT CL_API_ENTRY void CL_API_CALL clTrackLiveObjectsIntelFPGA(cl_platform_id platform) { - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; if (platform == &acl_platform) { acl_platform.track_leaked_objects = 1; } - - acl_unlock(); } ACL_EXPORT @@ -973,7 +968,7 @@ CL_API_ENTRY void CL_API_CALL clReportLiveObjectsIntelFPGA( cl_platform_id platform, void(CL_CALLBACK *report_fn)(void *, void *, const char *, cl_uint), void *user_data) { - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; if (platform == &acl_platform) { acl_cl_object_node_t *node = acl_platform.cl_obj_head; @@ -1010,16 +1005,15 @@ CL_API_ENTRY void CL_API_CALL clReportLiveObjectsIntelFPGA( } if (report_fn) { void *object = node->object; - int lock_count = acl_suspend_lock(); - report_fn(user_data, object, name, refcount); - acl_resume_lock(lock_count); + { + acl_suspend_lock_guard l(acl_mutex_wrapper); + report_fn(user_data, object, name, refcount); + } } node = next; } } - - acl_unlock(); } #ifdef __GNUC__ diff --git a/src/acl_printf.cpp b/src/acl_printf.cpp index fa66a8b7..60280fcf 100644 --- a/src/acl_printf.cpp +++ b/src/acl_printf.cpp @@ -997,8 +997,9 @@ void acl_schedule_printf_buffer_pickup(int activation_id, int size, // signal handler, which can't lock mutexes, so we don't lock in that case. // All functions called from this one therefore have to use // acl_assert_locked_or_sig() instead of just acl_assert_locked(). + std::unique_lock lock{acl_mutex_wrapper, std::defer_lock}; if (!acl_is_inside_sig()) { - acl_lock(); + lock.lock(); } #ifdef DEBUG @@ -1016,10 +1017,6 @@ void acl_schedule_printf_buffer_pickup(int activation_id, int size, } // Signal all waiters. acl_signal_device_update(); - - if (!acl_is_inside_sig()) { - acl_unlock(); - } } void acl_process_printf_buffer(void *user_data, acl_device_op_t *op) { diff --git a/src/acl_profiler.cpp b/src/acl_profiler.cpp index 22079c84..b5a090ae 100644 --- a/src/acl_profiler.cpp +++ b/src/acl_profiler.cpp @@ -293,8 +293,9 @@ int write_profile_info_to_file(unsigned num_profile_counters, if (!profile_enable) return 0; + std::unique_lock lock{acl_mutex_wrapper, std::defer_lock}; if (!acl_is_inside_sig()) { - acl_lock(); + lock.lock(); } acl_open_profiler_file(); @@ -303,9 +304,6 @@ int write_profile_info_to_file(unsigned num_profile_counters, if (opened_count < 1) { acl_print_debug_msg("Profiler output file is not opened: " STR( ACL_PROFILER_OUTPUT_FILENAME) "\n"); - if (!acl_is_inside_sig()) { - acl_unlock(); - } return 0; } @@ -339,17 +337,8 @@ int write_profile_info_to_file(unsigned num_profile_counters, if (bytes_written < 0 || (unsigned long int)bytes_written != temp_buf.size()) { acl_print_debug_msg("Could not write profile data to file!\n"); - - if (!acl_is_inside_sig()) { - acl_unlock(); - } return 0; } - - if (!acl_is_inside_sig()) { - acl_unlock(); - } - return 1; } @@ -467,9 +456,8 @@ unsigned long is_profile_enabled() { return profile_enable; } unsigned long is_profile_timer_on() { return profile_timer_on; } void acl_set_autorun_start_time() { - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; autorun_start_time = acl_get_hal()->get_timestamp(); - acl_unlock(); } CL_API_ENTRY cl_int CL_API_CALL clGetProfileInfoIntelFPGA(cl_event event) { @@ -482,16 +470,16 @@ CL_API_ENTRY cl_int CL_API_CALL clGetProfileInfoIntelFPGA(cl_event event) { int i; _cl_command_queue *command_queue; cl_device_id device_id; - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; if (!acl_event_is_valid(event)) { acl_print_debug_msg("clGetProfileInfoIntelFPGA is called for NULL event\n"); - UNLOCK_RETURN(CL_INVALID_EVENT); + return CL_INVALID_EVENT; } if (event->execution_status != CL_RUNNING) { acl_print_debug_msg( "clGetProfileInfoIntelFPGA is called for non-running event\n"); - UNLOCK_RETURN(CL_INVALID_EVENT); + return CL_INVALID_EVENT; } context = event->context; @@ -499,29 +487,27 @@ CL_API_ENTRY cl_int CL_API_CALL clGetProfileInfoIntelFPGA(cl_event event) { if (!acl_context_is_valid(context)) { acl_print_debug_msg( "clGetProfileInfoIntelFPGA is called for NULL context\n"); - UNLOCK_RETURN(CL_INVALID_CONTEXT); + return CL_INVALID_CONTEXT; } command_queue = event->command_queue; if (!acl_command_queue_is_valid(command_queue)) { - UNLOCK_ERR_RET( - CL_INVALID_COMMAND_QUEUE, context, - "clGetProfileInfoIntelFPGA is called for NULL command_queue"); + ERR_RET(CL_INVALID_COMMAND_QUEUE, context, + "clGetProfileInfoIntelFPGA is called for NULL command_queue"); } device_id = command_queue->device; if (!acl_device_is_valid(device_id)) { - UNLOCK_ERR_RET(CL_INVALID_DEVICE, context, - "clGetProfileInfoIntelFPGA is called for NULL device_id"); + ERR_RET(CL_INVALID_DEVICE, context, + "clGetProfileInfoIntelFPGA is called for NULL device_id"); } profile_data = 0; kernel = event->cmd.info.ndrange_kernel.kernel; if (!acl_kernel_is_valid(kernel)) { - UNLOCK_ERR_RET(CL_INVALID_KERNEL, context, - "Invalid kernel attached to event"); + ERR_RET(CL_INVALID_KERNEL, context, "Invalid kernel attached to event"); } // use autodiscovery info to find out how many words will be read from the @@ -531,8 +517,7 @@ CL_API_ENTRY cl_int CL_API_CALL clGetProfileInfoIntelFPGA(cl_event event) { if (num_profile_counters == 0) { // there is not profiler data and we are not printing timers // nothing to print - UNLOCK_ERR_RET(CL_PROFILING_INFO_NOT_AVAILABLE, context, - "No profile information"); + ERR_RET(CL_PROFILING_INFO_NOT_AVAILABLE, context, "No profile information"); } // this kernel has profiling data, get it @@ -569,11 +554,10 @@ CL_API_ENTRY cl_int CL_API_CALL clGetProfileInfoIntelFPGA(cl_event event) { profile_data, ACL_DEVICE_OP_KERNEL, (unsigned long long)event->timestamp[CL_RUNNING], (unsigned long long)0, curr_shared_counters)) { - UNLOCK_ERR_RET(CL_OUT_OF_RESOURCES, context, - "Unabled to dump profile data"); + ERR_RET(CL_OUT_OF_RESOURCES, context, "Unabled to dump profile data"); } - UNLOCK_RETURN(CL_SUCCESS); + return CL_SUCCESS; } CL_API_ENTRY cl_int CL_API_CALL clGetProfileDataDeviceIntelFPGA( @@ -597,16 +581,16 @@ CL_API_ENTRY cl_int CL_API_CALL clGetProfileDataDeviceIntelFPGA( param_value_size_ret = param_value_size_ret; errcode_ret = errcode_ret; - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; // Check if valid device_id if (!acl_device_is_valid(device_id)) { - UNLOCK_RETURN(CL_INVALID_DEVICE); + return CL_INVALID_DEVICE; } // Check if valid program if (!acl_program_is_valid(program)) { - UNLOCK_RETURN(CL_INVALID_PROGRAM); + return CL_INVALID_PROGRAM; } // If program is valid, then context is valid because acl_program_is_valid @@ -628,7 +612,7 @@ CL_API_ENTRY cl_int CL_API_CALL clGetProfileDataDeviceIntelFPGA( ACL_PROFILE_AUTORUN_KERNEL_NAME) " for autorun profiling. Make sure " "the .aocx was compiled with " "autorun kernel profiling enabled"; - UNLOCK_ERR_RET(status, program->context, message); + ERR_RET(status, program->context, message); } // use autodiscovery info to find out how many words will be read from the @@ -642,8 +626,7 @@ CL_API_ENTRY cl_int CL_API_CALL clGetProfileDataDeviceIntelFPGA( const char *message = "No profile information for kernel " STR( ACL_PROFILE_AUTORUN_KERNEL_NAME) " for reading back autorun profile " "data"; - UNLOCK_ERR_RET(CL_PROFILING_INFO_NOT_AVAILABLE, program->context, - message); + ERR_RET(CL_PROFILING_INFO_NOT_AVAILABLE, program->context, message); } else { uint64_t *readback_profile_data; readback_profile_data = (uint64_t *)acl_malloc( @@ -677,12 +660,12 @@ CL_API_ENTRY cl_int CL_API_CALL clGetProfileDataDeviceIntelFPGA( device_id, context, accel_def->iface.name.c_str(), readback_profile_data, ACL_DEVICE_OP_KERNEL, autorun_start_time, profiled_time, num_profile_counters, curr_shared_counters)) { - UNLOCK_RETURN(CL_OUT_OF_RESOURCES); + return CL_OUT_OF_RESOURCES; } } } - UNLOCK_RETURN(CL_SUCCESS); + return CL_SUCCESS; } int acl_process_autorun_profiler_scan_chain(unsigned int physical_device_id, @@ -694,25 +677,20 @@ int acl_process_autorun_profiler_scan_chain(unsigned int physical_device_id, uint64_t *profile_data = nullptr; std::string name = ""; + std::unique_lock lock{acl_mutex_wrapper, std::defer_lock}; if (!acl_is_inside_sig()) { - acl_lock(); + lock.lock(); } const acl_device_binary_t *binary = acl_get_platform()->device[physical_device_id].loaded_bin; if (binary == nullptr) { - if (!acl_is_inside_sig()) { - acl_unlock(); - } return 0; } const acl_accel_def_t *accel_def = binary->get_dev_prog()->get_kernel_accel_def( ACL_PROFILE_AUTORUN_KERNEL_NAME); if (accel_def == nullptr) { - if (!acl_is_inside_sig()) { - acl_unlock(); - } return 0; } @@ -730,9 +708,6 @@ int acl_process_autorun_profiler_scan_chain(unsigned int physical_device_id, num_profile_counters); } else { // There is no profiler data - nothing to print - if (!acl_is_inside_sig()) { - acl_unlock(); - } return 0; } @@ -750,9 +725,6 @@ int acl_process_autorun_profiler_scan_chain(unsigned int physical_device_id, acl_free(profile_data); } - if (!acl_is_inside_sig()) { - acl_unlock(); - } return 1; } @@ -770,8 +742,9 @@ int acl_process_profiler_scan_chain(acl_device_op_t *op) { cl_command_type op_type; _cl_command_queue *command_queue; cl_device_id device_id; + std::unique_lock lock{acl_mutex_wrapper, std::defer_lock}; if (!acl_is_inside_sig()) { - acl_lock(); + lock.lock(); } char name[MAX_NAME_SIZE]; @@ -787,25 +760,16 @@ int acl_process_profiler_scan_chain(acl_device_op_t *op) { if (!acl_event_is_valid(event)) { acl_print_debug_msg( "acl_process_profiler_scan_chain is called for an invalid event\n"); - if (!acl_is_inside_sig()) { - acl_unlock(); - } return 0; } if (!acl_command_queue_is_valid(event->command_queue)) { acl_print_debug_msg("acl_process_profiler_scan_chain is called for an " "event with an invalid command_queue\n"); - if (!acl_is_inside_sig()) { - acl_unlock(); - } return 0; } if (acl_event_is_done(event)) { acl_print_debug_msg( "acl_process_profiler_scan_chain is called for a completed event\n"); - if (!acl_is_inside_sig()) { - acl_unlock(); - } return 0; } } @@ -816,18 +780,12 @@ int acl_process_profiler_scan_chain(acl_device_op_t *op) { if (!device_id) { acl_print_debug_msg( "acl_process_profiler_scan_chain is called for NULL device_id\n"); - if (!acl_is_inside_sig()) { - acl_unlock(); - } return 0; } // this is not a kernel event and we are not printing timers // so nothing to print if (op_type != ACL_DEVICE_OP_KERNEL && profile_timer_on != 1) { - if (!acl_is_inside_sig()) { - acl_unlock(); - } return 0; } @@ -842,9 +800,6 @@ int acl_process_profiler_scan_chain(acl_device_op_t *op) { if (num_profile_counters == 0 && profile_timer_on != 1) { // there is not profiler data and we are not printing timers // nothing to print - if (!acl_is_inside_sig()) { - acl_unlock(); - } return 0; } @@ -867,9 +822,6 @@ int acl_process_profiler_scan_chain(acl_device_op_t *op) { } else if (profile_timer_on != 1) { // if ACL_PROFILE_TIMER is not set, do not print info about the rest of // the events - if (!acl_is_inside_sig()) { - acl_unlock(); - } return 0; } else if (op_type == ACL_DEVICE_OP_MEM_TRANSFER_COPY) { snprintf(name, MAX_NAME_SIZE, ".mem_transfer_copy"); @@ -887,9 +839,6 @@ int acl_process_profiler_scan_chain(acl_device_op_t *op) { // Ignore unknown op_type (don't attempt to extract any profiling from it or // get timestamps) acl_print_debug_msg("Unknown device op type: '%d'\n", int(op_type)); - if (!acl_is_inside_sig()) { - acl_unlock(); - } return 0; } @@ -916,9 +865,6 @@ int acl_process_profiler_scan_chain(acl_device_op_t *op) { dump_profile_buffer_to_file(); } - if (!acl_is_inside_sig()) { - acl_unlock(); - } return 1; } diff --git a/src/acl_program.cpp b/src/acl_program.cpp index d7a55f7d..00201865 100644 --- a/src/acl_program.cpp +++ b/src/acl_program.cpp @@ -112,12 +112,12 @@ l_device_memory_definition_copy(acl_device_def_autodiscovery_t *dest_dev, ACL_EXPORT CL_API_ENTRY cl_int CL_API_CALL clRetainProgramIntelFPGA(cl_program program) { - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; if (!acl_program_is_valid(program)) { - UNLOCK_RETURN(CL_INVALID_PROGRAM); + return CL_INVALID_PROGRAM; } acl_retain(program); - UNLOCK_RETURN(CL_SUCCESS); + return CL_SUCCESS; } ACL_EXPORT @@ -127,9 +127,9 @@ CL_API_ENTRY cl_int CL_API_CALL clRetainProgram(cl_program program) { ACL_EXPORT CL_API_ENTRY cl_int CL_API_CALL clReleaseProgramIntelFPGA(cl_program program) { - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; if (!acl_program_is_valid(program)) { - UNLOCK_RETURN(CL_INVALID_PROGRAM); + return CL_INVALID_PROGRAM; } acl_release(program); if (!acl_ref_count(program)) { @@ -145,7 +145,7 @@ CL_API_ENTRY cl_int CL_API_CALL clReleaseProgramIntelFPGA(cl_program program) { if (program) l_free_program(program); } - UNLOCK_RETURN(CL_SUCCESS); + return CL_SUCCESS; } ACL_EXPORT @@ -172,28 +172,28 @@ CL_API_ENTRY cl_program CL_API_CALL clCreateProgramWithSourceIntelFPGA( int pass; cl_program program = 0; struct acl_file_handle_t *capture_fp = NULL; - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; if (!acl_context_is_valid(context)) - UNLOCK_BAIL(CL_INVALID_CONTEXT); + BAIL(CL_INVALID_CONTEXT); if (count == 0) { - UNLOCK_BAIL_INFO(CL_INVALID_VALUE, context, "Count parameter is zero"); + BAIL_INFO(CL_INVALID_VALUE, context, "Count parameter is zero"); } if (strings == 0) { - UNLOCK_BAIL_INFO(CL_INVALID_VALUE, context, "No source strings specified"); + BAIL_INFO(CL_INVALID_VALUE, context, "No source strings specified"); } for (i = 0; i < count; i++) { if (strings[i] == 0) { - UNLOCK_BAIL_INFO(CL_INVALID_VALUE, context, "A string pointers is NULL"); + BAIL_INFO(CL_INVALID_VALUE, context, "A string pointers is NULL"); } } // Go ahead and allocate it. program = acl_alloc_cl_program(); if (program == 0) { - UNLOCK_BAIL_INFO(CL_OUT_OF_HOST_MEMORY, context, - "Could not allocate a program object"); + BAIL_INFO(CL_OUT_OF_HOST_MEMORY, context, + "Could not allocate a program object"); } l_init_program(program, context); @@ -233,8 +233,8 @@ CL_API_ENTRY cl_program CL_API_CALL clCreateProgramWithSourceIntelFPGA( if (capture_fp) { acl_fclose(capture_fp); } - UNLOCK_BAIL_INFO(CL_OUT_OF_HOST_MEMORY, context, - "Could not allocate memory to store program source"); + BAIL_INFO(CL_OUT_OF_HOST_MEMORY, context, + "Could not allocate memory to store program source"); } program->source_text = buffer; } @@ -273,7 +273,7 @@ CL_API_ENTRY cl_program CL_API_CALL clCreateProgramWithSourceIntelFPGA( acl_track_object(ACL_OBJ_PROGRAM, program); - UNLOCK_RETURN(program); + return program; } ACL_EXPORT @@ -292,38 +292,38 @@ CL_API_ENTRY cl_program CL_API_CALL clCreateProgramWithBinaryIntelFPGA( cl_uint i; cl_uint idev; cl_program program = 0; - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; if (!acl_context_is_valid(context)) - UNLOCK_BAIL(CL_INVALID_CONTEXT); + BAIL(CL_INVALID_CONTEXT); if (num_devices == 0 || device_list == 0) { - UNLOCK_BAIL_INFO(CL_INVALID_VALUE, context, "Invalid device list"); + BAIL_INFO(CL_INVALID_VALUE, context, "Invalid device list"); } for (i = 0; i < num_devices; i++) { if (!acl_device_is_valid(device_list[i])) { - UNLOCK_BAIL_INFO(CL_INVALID_DEVICE, context, "Invalid device"); + BAIL_INFO(CL_INVALID_DEVICE, context, "Invalid device"); } if (!acl_context_uses_device(context, device_list[i])) { - UNLOCK_BAIL_INFO(CL_INVALID_DEVICE, context, - "Device is not associated with the context"); + BAIL_INFO(CL_INVALID_DEVICE, context, + "Device is not associated with the context"); } if (lengths[i] == 0 || binaries[i] == 0) { if (binary_status) { binary_status[i] = CL_INVALID_VALUE; } - UNLOCK_BAIL_INFO(CL_INVALID_VALUE, context, - lengths[i] == 0 ? "A binary length is zero" - : "A binary pointer is NULL"); + BAIL_INFO(CL_INVALID_VALUE, context, + lengths[i] == 0 ? "A binary length is zero" + : "A binary pointer is NULL"); } } // Go ahead and allocate it. program = acl_alloc_cl_program(); if (program == 0) { - UNLOCK_BAIL_INFO(CL_OUT_OF_HOST_MEMORY, context, - "Could not allocate a program object"); + BAIL_INFO(CL_OUT_OF_HOST_MEMORY, context, + "Could not allocate a program object"); } l_init_program(program, context); @@ -347,7 +347,7 @@ CL_API_ENTRY cl_program CL_API_CALL clCreateProgramWithBinaryIntelFPGA( if (binary_status) { binary_status[idev] = CL_INVALID_BINARY; } - UNLOCK_BAIL_INFO(CL_INVALID_BINARY, context, "Invalid binary"); + BAIL_INFO(CL_INVALID_BINARY, context, "Invalid binary"); } } else { assert(context->uses_dynamic_sysdef); @@ -384,7 +384,7 @@ CL_API_ENTRY cl_program CL_API_CALL clCreateProgramWithBinaryIntelFPGA( if (binary_status) { binary_status[idev] = CL_INVALID_BINARY; } - UNLOCK_BAIL_INFO(CL_INVALID_BINARY, context, "Invalid binary"); + BAIL_INFO(CL_INVALID_BINARY, context, "Invalid binary"); } // Need to unload the binary and only load it on an as needed @@ -410,8 +410,8 @@ CL_API_ENTRY cl_program CL_API_CALL clCreateProgramWithBinaryIntelFPGA( if (binary_status) { binary_status[idev] = CL_INVALID_VALUE; } - UNLOCK_BAIL_INFO(CL_OUT_OF_HOST_MEMORY, context, - "Could not allocate memory to store program binaries"); + BAIL_INFO(CL_OUT_OF_HOST_MEMORY, context, + "Could not allocate memory to store program binaries"); } // Wait to set status until after failures may have occurred for this @@ -431,7 +431,7 @@ CL_API_ENTRY cl_program CL_API_CALL clCreateProgramWithBinaryIntelFPGA( l_try_to_eagerly_program_device(program); - UNLOCK_RETURN(program); + return program; } ACL_EXPORT @@ -453,40 +453,40 @@ clCreateProgramWithBinaryAndProgramDeviceIntelFPGA( cl_uint i; cl_uint idev; cl_program program = 0; - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; if (!acl_context_is_valid(context)) - UNLOCK_BAIL(CL_INVALID_CONTEXT); + BAIL(CL_INVALID_CONTEXT); // split_kernel mode is not supported in this special extension API which is // not part of the OpenCL standard. assert(context->split_kernel == 0); if (num_devices == 0 || device_list == 0) { - UNLOCK_BAIL_INFO(CL_INVALID_VALUE, context, "Invalid device list"); + BAIL_INFO(CL_INVALID_VALUE, context, "Invalid device list"); } for (i = 0; i < num_devices; i++) { if (!acl_device_is_valid(device_list[i])) { - UNLOCK_BAIL_INFO(CL_INVALID_DEVICE, context, "Invalid device"); + BAIL_INFO(CL_INVALID_DEVICE, context, "Invalid device"); } if (!acl_context_uses_device(context, device_list[i])) { - UNLOCK_BAIL_INFO(CL_INVALID_DEVICE, context, - "Device is not associated with the context"); + BAIL_INFO(CL_INVALID_DEVICE, context, + "Device is not associated with the context"); } if (lengths[i] == 0 || binaries[i] == 0) { if (binary_status) { binary_status[i] = CL_INVALID_VALUE; } - UNLOCK_BAIL_INFO(CL_INVALID_VALUE, context, - lengths[i] == 0 ? "A binary length is zero" - : "A binary pointer is NULL"); + BAIL_INFO(CL_INVALID_VALUE, context, + lengths[i] == 0 ? "A binary length is zero" + : "A binary pointer is NULL"); } } program = acl_alloc_cl_program(); if (program == 0) { - UNLOCK_BAIL_INFO(CL_OUT_OF_HOST_MEMORY, context, - "Could not allocate a program object"); + BAIL_INFO(CL_OUT_OF_HOST_MEMORY, context, + "Could not allocate a program object"); } l_init_program(program, context); @@ -509,7 +509,7 @@ clCreateProgramWithBinaryAndProgramDeviceIntelFPGA( if (binary_status) { binary_status[idev] = CL_INVALID_BINARY; } - UNLOCK_BAIL_INFO(CL_INVALID_BINARY, context, "Invalid binary"); + BAIL_INFO(CL_INVALID_BINARY, context, "Invalid binary"); } } else { // Copy memory definition from initial device def to program in @@ -526,8 +526,8 @@ clCreateProgramWithBinaryAndProgramDeviceIntelFPGA( if (binary_status) { binary_status[idev] = CL_INVALID_VALUE; } - UNLOCK_BAIL_INFO(CL_OUT_OF_HOST_MEMORY, context, - "Could not allocate memory to store program binaries"); + BAIL_INFO(CL_OUT_OF_HOST_MEMORY, context, + "Could not allocate memory to store program binaries"); } // Wait to set status until after failures may have occurred for this @@ -583,16 +583,16 @@ clCreateProgramWithBinaryAndProgramDeviceIntelFPGA( acl_program_device(NULL, &reprogram_op); if (reprogram_op.execution_status != CL_SUCCESS) { - UNLOCK_BAIL_INFO(CL_DEVICE_NOT_AVAILABLE, context, - "Reprogram of device failed"); + BAIL_INFO(CL_DEVICE_NOT_AVAILABLE, context, + "Reprogram of device failed"); } } else { - UNLOCK_BAIL_INFO(CL_OUT_OF_HOST_MEMORY, context, "Invalid binary"); + BAIL_INFO(CL_OUT_OF_HOST_MEMORY, context, "Invalid binary"); } } else { - UNLOCK_BAIL_INFO(CL_BUILD_PROGRAM_FAILURE, context, - "Program is not built correctly"); + BAIL_INFO(CL_BUILD_PROGRAM_FAILURE, context, + "Program is not built correctly"); } } @@ -600,24 +600,29 @@ clCreateProgramWithBinaryAndProgramDeviceIntelFPGA( *errcode_ret = CL_SUCCESS; } - UNLOCK_RETURN(program); + return program; } ACL_EXPORT CL_API_ENTRY cl_program CL_API_CALL clCreateProgramWithBuiltInKernelsIntelFPGA( cl_context context, cl_uint num_devices, const cl_device_id *device_list, const char *kernel_names, cl_int *errcode_ret) { - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; if (!acl_context_is_valid(context)) - UNLOCK_BAIL(CL_INVALID_CONTEXT); + BAIL(CL_INVALID_CONTEXT); if (num_devices == 0 || device_list == 0) { - UNLOCK_BAIL_INFO(CL_INVALID_VALUE, context, "Invalid device list"); + BAIL_INFO(CL_INVALID_VALUE, context, "Invalid device list"); } if (kernel_names == NULL) { - UNLOCK_BAIL_INFO(CL_INVALID_VALUE, context, "kernel_names is NULL"); + BAIL_INFO(CL_INVALID_VALUE, context, "kernel_names is NULL"); + } + + if (num_devices >= ACL_MAX_DEVICE) { + BAIL_INFO(CL_INVALID_VALUE, context, + "num_dives specified is great thatn ACL_MAX_DEVICES"); } // list of semicolon delimited string of kernel names @@ -630,12 +635,12 @@ CL_API_ENTRY cl_program CL_API_CALL clCreateProgramWithBuiltInKernelsIntelFPGA( for (cl_uint i = 0; i < num_devices; i++) { if (!acl_device_is_valid(device_list[i])) { - UNLOCK_BAIL_INFO(CL_INVALID_DEVICE, context, "Invalid device"); + BAIL_INFO(CL_INVALID_DEVICE, context, "Invalid device"); } if (!acl_context_uses_device(context, device_list[i])) { - UNLOCK_BAIL_INFO(CL_INVALID_DEVICE, context, - "Device is not associated with the context"); + BAIL_INFO(CL_INVALID_DEVICE, context, + "Device is not associated with the context"); } // make sure current device contains all the builtin kernels @@ -651,17 +656,17 @@ CL_API_ENTRY cl_program CL_API_CALL clCreateProgramWithBuiltInKernelsIntelFPGA( break; } if (find_count != 0) { - UNLOCK_BAIL_INFO(CL_INVALID_VALUE, context, - "kernel_names contains a kernel name that is not " - "supported by all of the devices in device_list"); + BAIL_INFO(CL_INVALID_VALUE, context, + "kernel_names contains a kernel name that is not " + "supported by all of the devices in device_list"); } } // Go ahead and allocate it. cl_program program = acl_alloc_cl_program(); if (program == 0) { - UNLOCK_BAIL_INFO(CL_OUT_OF_HOST_MEMORY, context, - "Could not allocate a program object"); + BAIL_INFO(CL_OUT_OF_HOST_MEMORY, context, + "Could not allocate a program object"); } l_init_program(program, context); @@ -676,8 +681,7 @@ CL_API_ENTRY cl_program CL_API_CALL clCreateProgramWithBuiltInKernelsIntelFPGA( l_create_dev_prog(program, device_list[idev], 0, NULL); if (program->dev_prog[idev]) { if (context->programs_devices || context->uses_dynamic_sysdef) { - UNLOCK_BAIL_INFO(CL_INVALID_VALUE, context, - "No builtin kernels available\n"); + BAIL_INFO(CL_INVALID_VALUE, context, "No builtin kernels available\n"); } else { // i put this here since dla flow makes call to clGetProgramInfo which @@ -695,8 +699,8 @@ CL_API_ENTRY cl_program CL_API_CALL clCreateProgramWithBuiltInKernelsIntelFPGA( } else { // Release all the memory we've allocated. l_free_program(program); - UNLOCK_BAIL_INFO(CL_OUT_OF_HOST_MEMORY, context, - "Could not allocate memory to store program binaries"); + BAIL_INFO(CL_OUT_OF_HOST_MEMORY, context, + "Could not allocate memory to store program binaries"); } } @@ -710,7 +714,7 @@ CL_API_ENTRY cl_program CL_API_CALL clCreateProgramWithBuiltInKernelsIntelFPGA( acl_track_object(ACL_OBJ_PROGRAM, program); - UNLOCK_RETURN(program); + return program; } ACL_EXPORT @@ -727,9 +731,9 @@ CL_API_ENTRY cl_int CL_API_CALL clCompileProgramIntelFPGA( const char *options, cl_uint num_input_headers, const cl_program *input_headers, const char **header_include_names, acl_program_build_notify_fn_t pfn_notify, void *user_data) { - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; if (!acl_program_is_valid(program)) - UNLOCK_RETURN(CL_INVALID_PROGRAM); + return CL_INVALID_PROGRAM; // Suppress compiler warnings. num_devices = num_devices; @@ -741,8 +745,8 @@ CL_API_ENTRY cl_int CL_API_CALL clCompileProgramIntelFPGA( pfn_notify = pfn_notify; user_data = user_data; - UNLOCK_ERR_RET(CL_COMPILER_NOT_AVAILABLE, program->context, - "Device compiler is not available"); + ERR_RET(CL_COMPILER_NOT_AVAILABLE, program->context, + "Device compiler is not available"); } ACL_EXPORT @@ -762,9 +766,9 @@ CL_API_ENTRY cl_program CL_API_CALL clLinkProgramIntelFPGA( const char *options, cl_uint num_input_programs, const cl_program *input_programs, acl_program_build_notify_fn_t pfn_notify, void *user_data, cl_int *errcode_ret) { - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; if (!acl_context_is_valid(context)) - UNLOCK_BAIL(CL_INVALID_CONTEXT); + BAIL(CL_INVALID_CONTEXT); // For the sake of MSVC compiler warnings. num_devices = num_devices; device_list = device_list; @@ -774,8 +778,7 @@ CL_API_ENTRY cl_program CL_API_CALL clLinkProgramIntelFPGA( pfn_notify = pfn_notify; user_data = user_data; - UNLOCK_BAIL_INFO(CL_LINKER_NOT_AVAILABLE, context, - "Device linker is not available"); + BAIL_INFO(CL_LINKER_NOT_AVAILABLE, context, "Device linker is not available"); } ACL_EXPORT @@ -795,14 +798,14 @@ CL_API_ENTRY cl_int CL_API_CALL clGetProgramInfoIntelFPGA( void *param_value, size_t *param_value_size_ret) { cl_context context; acl_result_t result; - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; if (!acl_program_is_valid(program)) { - UNLOCK_RETURN(CL_INVALID_PROGRAM); + return CL_INVALID_PROGRAM; } context = program->context; - UNLOCK_VALIDATE_ARRAY_OUT_ARGS(param_value_size, param_value, - param_value_size_ret, context); + VALIDATE_ARRAY_OUT_ARGS(param_value_size, param_value, param_value_size_ret, + context); RESULT_INIT; @@ -838,8 +841,8 @@ CL_API_ENTRY cl_int CL_API_CALL clGetProgramInfoIntelFPGA( // They actually want the values if (param_value_size < (program->num_devices * sizeof(size_t))) { - UNLOCK_ERR_RET(CL_INVALID_VALUE, context, - "Parameter return buffer is too small"); + ERR_RET(CL_INVALID_VALUE, context, + "Parameter return buffer is too small"); } for (unsigned i = 0; i < program->num_devices; i++) { // program->dev_prog[] could be NULL if a compile failed. @@ -848,7 +851,7 @@ CL_API_ENTRY cl_int CL_API_CALL clGetProgramInfoIntelFPGA( dev_prog ? dev_prog->device_binary.get_binary_len() : 0; } } - UNLOCK_RETURN(CL_SUCCESS); + return CL_SUCCESS; } case CL_PROGRAM_BINARIES: { @@ -863,8 +866,8 @@ CL_API_ENTRY cl_int CL_API_CALL clGetProgramInfoIntelFPGA( // They actually want the values unsigned char **dest = (unsigned char **)param_value; if (param_value_size < (program->num_devices * sizeof(char *))) { - UNLOCK_ERR_RET(CL_INVALID_VALUE, context, - "Parameter return buffer is too small"); + ERR_RET(CL_INVALID_VALUE, context, + "Parameter return buffer is too small"); } for (unsigned i = 0; i < program->num_devices; ++i) { auto *dev_prog = program->dev_prog[i]; @@ -886,7 +889,7 @@ CL_API_ENTRY cl_int CL_API_CALL clGetProgramInfoIntelFPGA( } } } - UNLOCK_RETURN(CL_SUCCESS); + return CL_SUCCESS; } case CL_PROGRAM_NUM_KERNELS: { @@ -908,10 +911,9 @@ CL_API_ENTRY cl_int CL_API_CALL clGetProgramInfoIntelFPGA( } } if (!exists_built_dev_prog) - UNLOCK_ERR_RET( - CL_INVALID_PROGRAM_EXECUTABLE, context, - "A successfully built program executable was not found for any " - "device in the list of devices associated with program"); + ERR_RET(CL_INVALID_PROGRAM_EXECUTABLE, context, + "A successfully built program executable was not found for any " + "device in the list of devices associated with program"); RESULT_SIZE_T(kernel_cnt); break; @@ -952,10 +954,10 @@ CL_API_ENTRY cl_int CL_API_CALL clGetProgramInfoIntelFPGA( } if (!exists_built_dev_prog) - UNLOCK_ERR_RET(CL_INVALID_PROGRAM_EXECUTABLE, context, - "A successfully built program executable was not " - "found for any device in the list of devices " - "associated with program"); + ERR_RET(CL_INVALID_PROGRAM_EXECUTABLE, context, + "A successfully built program executable was not " + "found for any device in the list of devices " + "associated with program"); // Based on the OpenCL 1.2 CTS api test, total_ret_len must include the // space for the null terminator. @@ -966,8 +968,8 @@ CL_API_ENTRY cl_int CL_API_CALL clGetProgramInfoIntelFPGA( if (param_value) { if (total_ret_len > param_value_size) { - UNLOCK_ERR_RET(CL_INVALID_VALUE, context, - "Parameter return buffer is too small"); + ERR_RET(CL_INVALID_VALUE, context, + "Parameter return buffer is too small"); } std::stringstream ss; @@ -985,17 +987,17 @@ CL_API_ENTRY cl_int CL_API_CALL clGetProgramInfoIntelFPGA( total_ret_len); } } - UNLOCK_RETURN(CL_SUCCESS); + return CL_SUCCESS; default: - UNLOCK_ERR_RET(CL_INVALID_VALUE, context, "Invalid program info query"); + ERR_RET(CL_INVALID_VALUE, context, "Invalid program info query"); } // zero size result is valid! if (param_value) { if (param_value_size < result.size) { - UNLOCK_ERR_RET(CL_INVALID_VALUE, context, - "Parameter return buffer is too small"); + ERR_RET(CL_INVALID_VALUE, context, + "Parameter return buffer is too small"); } RESULT_COPY(param_value, param_value_size); } @@ -1003,7 +1005,7 @@ CL_API_ENTRY cl_int CL_API_CALL clGetProgramInfoIntelFPGA( if (param_value_size_ret) { *param_value_size_ret = result.size; } - UNLOCK_RETURN(CL_SUCCESS); + return CL_SUCCESS; } ACL_EXPORT @@ -1024,14 +1026,14 @@ CL_API_ENTRY cl_int CL_API_CALL clGetProgramBuildInfoIntelFPGA( cl_context context; acl_device_program_info_t *dev_prog; acl_result_t result; - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; if (!acl_program_is_valid(program)) { - UNLOCK_RETURN(CL_INVALID_PROGRAM); + return CL_INVALID_PROGRAM; } context = program->context; - UNLOCK_VALIDATE_ARRAY_OUT_ARGS(param_value_size, param_value, - param_value_size_ret, context); + VALIDATE_ARRAY_OUT_ARGS(param_value_size, param_value, param_value_size_ret, + context); RESULT_INIT; @@ -1041,8 +1043,8 @@ CL_API_ENTRY cl_int CL_API_CALL clGetProgramBuildInfoIntelFPGA( } } if (dev_idx >= program->num_devices) { - UNLOCK_ERR_RET(CL_INVALID_DEVICE, context, - "The specified device is not associated with the program"); + ERR_RET(CL_INVALID_DEVICE, context, + "The specified device is not associated with the program"); } dev_prog = program->dev_prog[dev_idx]; @@ -1068,18 +1070,17 @@ CL_API_ENTRY cl_int CL_API_CALL clGetProgramBuildInfoIntelFPGA( } break; default: - UNLOCK_ERR_RET(CL_INVALID_VALUE, context, - "Invalid program build info query"); + ERR_RET(CL_INVALID_VALUE, context, "Invalid program build info query"); } if (result.size == 0) { - UNLOCK_RETURN(CL_INVALID_VALUE); + return CL_INVALID_VALUE; } // should already have signalled if (param_value) { if (param_value_size < result.size) { - UNLOCK_ERR_RET(CL_INVALID_VALUE, context, - "Parameter return buffer is too small"); + ERR_RET(CL_INVALID_VALUE, context, + "Parameter return buffer is too small"); } RESULT_COPY(param_value, param_value_size); } @@ -1087,7 +1088,7 @@ CL_API_ENTRY cl_int CL_API_CALL clGetProgramBuildInfoIntelFPGA( if (param_value_size_ret) { *param_value_size_ret = result.size; } - UNLOCK_RETURN(CL_SUCCESS); + return CL_SUCCESS; } ACL_EXPORT @@ -1106,32 +1107,32 @@ CL_API_ENTRY cl_int CL_API_CALL clBuildProgramIntelFPGA( void *user_data) { cl_context context; cl_int status = CL_SUCCESS; - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; if (!acl_program_is_valid(program)) { - UNLOCK_RETURN(CL_INVALID_PROGRAM); + return CL_INVALID_PROGRAM; } context = program->context; acl_print_debug_msg("Building program...\n"); if (program->num_kernels > 0) { - UNLOCK_ERR_RET(CL_INVALID_OPERATION, context, - "At least one kernel is still attached to the program"); + ERR_RET(CL_INVALID_OPERATION, context, + "At least one kernel is still attached to the program"); } if (device_list && num_devices == 0) { - UNLOCK_ERR_RET(CL_INVALID_VALUE, context, - "Invalid device list: num_devices is zero but device_list " - "is specified"); + ERR_RET(CL_INVALID_VALUE, context, + "Invalid device list: num_devices is zero but device_list " + "is specified"); } if (0 == device_list && num_devices > 0) { - UNLOCK_ERR_RET( + ERR_RET( CL_INVALID_VALUE, context, "Invalid device list: num_devices is non-zero but device_list is NULL"); } if (pfn_notify == 0 && user_data != 0) { - UNLOCK_ERR_RET(CL_INVALID_VALUE, context, - "user_data is set but pfn_notify is not"); + ERR_RET(CL_INVALID_VALUE, context, + "user_data is set but pfn_notify is not"); } if (device_list) { @@ -1148,8 +1149,8 @@ CL_API_ENTRY cl_int CL_API_CALL clBuildProgramIntelFPGA( saw_it = (program->device[iprogdev] == device_list[idev]); } if (!saw_it) { - UNLOCK_ERR_RET(CL_INVALID_DEVICE, context, - "A specified device is not associated with the program"); + ERR_RET(CL_INVALID_DEVICE, context, + "A specified device is not associated with the program"); } } // Ok, each device is associated with the program. @@ -1186,7 +1187,6 @@ CL_API_ENTRY cl_int CL_API_CALL clBuildProgramIntelFPGA( if (status == CL_SUCCESS) l_try_to_eagerly_program_device(program); - acl_unlock(); // Call the notification callback. if (pfn_notify) pfn_notify(program, user_data); diff --git a/src/acl_sampler.cpp b/src/acl_sampler.cpp index 5d9d4c74..07a41c11 100644 --- a/src/acl_sampler.cpp +++ b/src/acl_sampler.cpp @@ -40,7 +40,7 @@ CL_API_ENTRY cl_sampler clCreateSamplerWithPropertiesIntelFPGA( cl_sampler sampler; int next_free_sampler_head; - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; sampler_id = acl_platform.free_sampler_head; sampler = &(acl_platform.sampler[sampler_id]); @@ -53,7 +53,7 @@ CL_API_ENTRY cl_sampler clCreateSamplerWithPropertiesIntelFPGA( sampler->filter_mode = 0xFFFFFFFF; if (!acl_context_is_valid(context)) { - UNLOCK_BAIL(CL_INVALID_CONTEXT); + BAIL(CL_INVALID_CONTEXT); } sampler->context = context; @@ -73,8 +73,8 @@ CL_API_ENTRY cl_sampler clCreateSamplerWithPropertiesIntelFPGA( } } if (!some_device_supports_images) { - UNLOCK_BAIL_INFO(CL_INVALID_OPERATION, context, - "No devices in context support images"); + BAIL_INFO(CL_INVALID_OPERATION, context, + "No devices in context support images"); } iprop = 0; @@ -82,21 +82,20 @@ CL_API_ENTRY cl_sampler clCreateSamplerWithPropertiesIntelFPGA( if (sampler_properties[iprop] == CL_SAMPLER_NORMALIZED_COORDS) { ++iprop; if (sampler->normalized_coords != 0xFFFFFFFF) { - UNLOCK_BAIL_INFO( + BAIL_INFO( CL_INVALID_VALUE, context, "Normalized coords property specified more than once for sampler"); } if (sampler_properties[iprop] != CL_FALSE && sampler_properties[iprop] != CL_TRUE) { - UNLOCK_BAIL_INFO( - CL_INVALID_VALUE, context, - "Invalid value for normalized coords property of sampler"); + BAIL_INFO(CL_INVALID_VALUE, context, + "Invalid value for normalized coords property of sampler"); } sampler->normalized_coords = sampler_properties[iprop]; } else if (sampler_properties[iprop] == CL_SAMPLER_ADDRESSING_MODE) { ++iprop; if (sampler->addressing_mode != 0xFFFFFFFF) { - UNLOCK_BAIL_INFO( + BAIL_INFO( CL_INVALID_VALUE, context, "Addressing mode property specified more than once for sampler"); } @@ -105,29 +104,27 @@ CL_API_ENTRY cl_sampler clCreateSamplerWithPropertiesIntelFPGA( sampler_properties[iprop] != CL_ADDRESS_CLAMP_TO_EDGE && sampler_properties[iprop] != CL_ADDRESS_CLAMP && sampler_properties[iprop] != CL_ADDRESS_NONE) { - UNLOCK_BAIL_INFO( - CL_INVALID_VALUE, context, - "Invalid value for addressing mode property of sampler"); + BAIL_INFO(CL_INVALID_VALUE, context, + "Invalid value for addressing mode property of sampler"); } sampler->addressing_mode = sampler_properties[iprop]; } else if (sampler_properties[iprop] == CL_SAMPLER_FILTER_MODE) { ++iprop; if (sampler->filter_mode != 0xFFFFFFFF) { - UNLOCK_BAIL_INFO( - CL_INVALID_VALUE, context, - "Filter mode property specified more than once for sampler"); + BAIL_INFO(CL_INVALID_VALUE, context, + "Filter mode property specified more than once for sampler"); } if (sampler_properties[iprop] != CL_FILTER_NEAREST && sampler_properties[iprop] != CL_FILTER_LINEAR) { - UNLOCK_BAIL_INFO(CL_INVALID_VALUE, context, - "Invalid value for filter mode property of sampler"); + BAIL_INFO(CL_INVALID_VALUE, context, + "Invalid value for filter mode property of sampler"); } sampler->filter_mode = sampler_properties[iprop]; } else { std::stringstream msg; msg << "Invalid sampler property name " << sampler_properties[iprop] << "\n"; - UNLOCK_BAIL_INFO(CL_INVALID_VALUE, context, msg.str().c_str()); + BAIL_INFO(CL_INVALID_VALUE, context, msg.str().c_str()); } ++iprop; } @@ -155,7 +152,7 @@ CL_API_ENTRY cl_sampler clCreateSamplerWithPropertiesIntelFPGA( acl_track_object(ACL_OBJ_MEM_OBJECT, result); - UNLOCK_RETURN(result); + return result; } ACL_EXPORT @@ -196,9 +193,9 @@ clCreateSampler(cl_context context, cl_bool normalized_coords, ACL_EXPORT CL_API_ENTRY cl_int CL_API_CALL clRetainSamplerIntelFPGA(cl_sampler sampler) { - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; if (!acl_sampler_is_valid(sampler)) { - UNLOCK_RETURN(CL_INVALID_SAMPLER); + return CL_INVALID_SAMPLER; } acl_retain(sampler); @@ -207,7 +204,6 @@ CL_API_ENTRY cl_int CL_API_CALL clRetainSamplerIntelFPGA(cl_sampler sampler) { acl_ref_count(sampler)); #endif - acl_unlock(); return CL_SUCCESS; } @@ -218,11 +214,11 @@ CL_API_ENTRY cl_int CL_API_CALL clRetainSampler(cl_sampler sampler) { ACL_EXPORT CL_API_ENTRY cl_int CL_API_CALL clReleaseSamplerIntelFPGA(cl_sampler sampler) { - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; // In the double-free case, we'll error out here because the reference count // will be 0. if (!acl_sampler_is_valid(sampler)) { - UNLOCK_RETURN(CL_INVALID_SAMPLER); + return CL_INVALID_SAMPLER; } acl_release(sampler); @@ -250,7 +246,7 @@ CL_API_ENTRY cl_int CL_API_CALL clReleaseSamplerIntelFPGA(cl_sampler sampler) { clReleaseContext(context); } - UNLOCK_RETURN(CL_SUCCESS); + return CL_SUCCESS; } ACL_EXPORT @@ -266,9 +262,9 @@ CL_API_ENTRY cl_int CL_API_CALL clGetSamplerInfoIntelFPGA( cl_context context; RESULT_INIT; - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; if (!acl_sampler_is_valid(sampler)) { - UNLOCK_RETURN(CL_INVALID_SAMPLER); + return CL_INVALID_SAMPLER; } context = sampler->context; @@ -295,8 +291,8 @@ CL_API_ENTRY cl_int CL_API_CALL clGetSamplerInfoIntelFPGA( } if (result.size == 0) - UNLOCK_ERR_RET(CL_INVALID_VALUE, context, - "Invalid or unsupported sampler object query"); + ERR_RET(CL_INVALID_VALUE, context, + "Invalid or unsupported sampler object query"); if (param_value) { if (param_value_size < result.size) @@ -308,7 +304,7 @@ CL_API_ENTRY cl_int CL_API_CALL clGetSamplerInfoIntelFPGA( if (param_value_size_ret) { *param_value_size_ret = result.size; } - UNLOCK_RETURN(CL_SUCCESS); + return CL_SUCCESS; } ACL_EXPORT diff --git a/src/acl_svm.cpp b/src/acl_svm.cpp index acb36459..79aac8a1 100644 --- a/src/acl_svm.cpp +++ b/src/acl_svm.cpp @@ -42,18 +42,18 @@ CL_API_ENTRY void *CL_API_CALL clSVMAllocIntelFPGA(cl_context context, // this context supports SVM cl_bool context_has_svm; #endif - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; // Valid context #ifndef REMOVE_VALID_CHECKS if (!acl_context_is_valid(context)) - UNLOCK_RETURN(NULL); + return NULL; // Check for invalid enum bits if (flags & ~(CL_MEM_READ_WRITE | CL_MEM_READ_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS)) { - UNLOCK_RETURN(NULL); + return NULL; } // Check for exactly one read/write spec @@ -65,32 +65,32 @@ CL_API_ENTRY void *CL_API_CALL clSVMAllocIntelFPGA(cl_context context, num_rw_specs++; // Default to CL_MEM_READ_WRITE. if (num_rw_specs > 1) - UNLOCK_RETURN(NULL); + return NULL; if (num_rw_specs == 0) flags |= CL_MEM_READ_WRITE; // Cannot specify SVM atomics without fine grain if ((flags & CL_MEM_SVM_ATOMICS) && !(flags & CL_MEM_SVM_FINE_GRAIN_BUFFER)) { - UNLOCK_RETURN(NULL); + return NULL; } // If SVM atomics specified, check if any device in context supports SVM // atomics Right now though, we don't support SVM atomics so just return NULL if (flags & CL_MEM_SVM_ATOMICS) { - UNLOCK_RETURN(NULL); + return NULL; } // If fine grain specified, check if any device in context supports fine grain // Right now though, we don't support SVM fine grain so just return NULL if (flags & CL_MEM_SVM_FINE_GRAIN_BUFFER) { - UNLOCK_RETURN(NULL); + return NULL; } // size is 0 or > CL_DEVICE_MAX_MEM_ALLOC_SIZE value for any device in context if (size == 0) - UNLOCK_RETURN(NULL); + return NULL; if (size > context->max_mem_alloc_size) { - UNLOCK_RETURN(NULL); + return NULL; } // alignment is not a power of two or the OpenCL implementation cannot support @@ -101,7 +101,7 @@ CL_API_ENTRY void *CL_API_CALL clSVMAllocIntelFPGA(cl_context context, if (alignment == 0) alignment = ACL_MEM_ALIGN; if (alignment != ACL_MEM_ALIGN) - UNLOCK_RETURN(NULL); + return NULL; #endif // !REMOVE_VALID_CHECKS @@ -111,7 +111,7 @@ CL_API_ENTRY void *CL_API_CALL clSVMAllocIntelFPGA(cl_context context, #else // LINUX mem_result = posix_memalign(&result, alignment, size); if (mem_result != 0) { - UNLOCK_RETURN(NULL); + return NULL; } #endif #else @@ -135,7 +135,7 @@ CL_API_ENTRY void *CL_API_CALL clSVMAllocIntelFPGA(cl_context context, #else // LINUX mem_result = posix_memalign(&result, alignment, size); if (mem_result != 0) { - UNLOCK_RETURN(NULL); + return NULL; } #endif } else { @@ -161,7 +161,7 @@ CL_API_ENTRY void *CL_API_CALL clSVMAllocIntelFPGA(cl_context context, context->svm_list->ptr = result; context->svm_list->size = size; - UNLOCK_RETURN(result); + return result; } ACL_EXPORT @@ -177,7 +177,7 @@ CL_API_ENTRY void clSVMFreeIntelFPGA(cl_context context, void *svm_pointer) { acl_svm_entry_t *next_entry; unsigned int idevice; cl_bool context_has_svm; - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; context_has_svm = CL_FALSE; if (acl_get_hal()) { for (idevice = 0; idevice < context->num_devices; ++idevice) { @@ -189,15 +189,15 @@ CL_API_ENTRY void clSVMFreeIntelFPGA(cl_context context, void *svm_pointer) { } #ifndef REMOVE_VALID_CHECKS if (!acl_context_is_valid(context)) - UNLOCK_RETURN_VOID; + return; if (svm_pointer == NULL) - UNLOCK_RETURN_VOID; + return; #endif // !REMOVE_VALID_CHECKS // Only free the SVM pointer if it is from this context if (context->svm_list == NULL) - UNLOCK_RETURN_VOID; + return; last_entry = NULL; next_entry = context->svm_list; @@ -233,8 +233,6 @@ CL_API_ENTRY void clSVMFreeIntelFPGA(cl_context context, void *svm_pointer) { last_entry = next_entry; next_entry = next_entry->next; } - - acl_unlock(); } ACL_EXPORT @@ -249,31 +247,31 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueSVMMemcpyIntelFPGA( const cl_event *event_wait_list, cl_event *event) { cl_event local_event = 0; // used for blocking cl_int status; - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; if (!acl_command_queue_is_valid(command_queue)) { - UNLOCK_RETURN(CL_INVALID_COMMAND_QUEUE); + return CL_INVALID_COMMAND_QUEUE; } if (src_ptr == 0) { - UNLOCK_ERR_RET(CL_INVALID_VALUE, command_queue->context, - "Pointer argument cannot be NULL"); + ERR_RET(CL_INVALID_VALUE, command_queue->context, + "Pointer argument cannot be NULL"); } if (dst_ptr == 0) { - UNLOCK_ERR_RET(CL_INVALID_VALUE, command_queue->context, - "Pointer argument cannot be NULL"); + ERR_RET(CL_INVALID_VALUE, command_queue->context, + "Pointer argument cannot be NULL"); } if (size == 0) { - UNLOCK_ERR_RET(CL_INVALID_VALUE, command_queue->context, - "Pointer size cannot be 0"); + ERR_RET(CL_INVALID_VALUE, command_queue->context, + "Pointer size cannot be 0"); } if (((char *)src_ptr < (char *)dst_ptr && (char *)src_ptr + size > (char *)dst_ptr) || ((char *)dst_ptr < (char *)src_ptr && (char *)dst_ptr + size > (char *)src_ptr)) { - UNLOCK_ERR_RET(CL_MEM_COPY_OVERLAP, command_queue->context, - "Source and destination memory overlaps"); + ERR_RET(CL_MEM_COPY_OVERLAP, command_queue->context, + "Source and destination memory overlaps"); } // Create an event/command to actually move the data at the appropriate @@ -282,7 +280,7 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueSVMMemcpyIntelFPGA( acl_create_event(command_queue, num_events_in_wait_list, event_wait_list, CL_COMMAND_SVM_MEMCPY, &local_event); if (status != CL_SUCCESS) - UNLOCK_RETURN(status); // already signalled callback + return status; // already signalled callback local_event->cmd.info.svm_xfer.src_ptr = src_ptr; local_event->cmd.info.svm_xfer.dst_ptr = dst_ptr; local_event->cmd.info.svm_xfer.src_size = size; @@ -303,7 +301,7 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueSVMMemcpyIntelFPGA( clReleaseEvent(local_event); acl_idle_update(command_queue->context); // Clean up early } - UNLOCK_RETURN(CL_SUCCESS); + return CL_SUCCESS; } ACL_EXPORT @@ -322,42 +320,42 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueSVMMemFillIntelFPGA( const cl_event *event_wait_list, cl_event *event) { cl_event local_event = 0; // used for blocking cl_int status; - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; if (!acl_command_queue_is_valid(command_queue)) { - UNLOCK_RETURN(CL_INVALID_COMMAND_QUEUE); + return CL_INVALID_COMMAND_QUEUE; } if (svm_ptr == 0) { - UNLOCK_ERR_RET(CL_INVALID_VALUE, command_queue->context, - "Pointer argument cannot be NULL"); + ERR_RET(CL_INVALID_VALUE, command_queue->context, + "Pointer argument cannot be NULL"); } if (((uintptr_t)svm_ptr) % (pattern_size * 8) != 0) { - UNLOCK_ERR_RET(CL_INVALID_VALUE, command_queue->context, - "Pointer not aligned with pattern size"); + ERR_RET(CL_INVALID_VALUE, command_queue->context, + "Pointer not aligned with pattern size"); } if (pattern == 0) { - UNLOCK_ERR_RET(CL_INVALID_VALUE, command_queue->context, - "Pattern argument cannot be NULL"); + ERR_RET(CL_INVALID_VALUE, command_queue->context, + "Pattern argument cannot be NULL"); } if (pattern_size == 0) { - UNLOCK_ERR_RET(CL_INVALID_VALUE, command_queue->context, - "Pattern size argument cannot be 0"); + ERR_RET(CL_INVALID_VALUE, command_queue->context, + "Pattern size argument cannot be 0"); } if (pattern_size != 1 && pattern_size != 2 && pattern_size != 4 && pattern_size != 8 && pattern_size != 16 && pattern_size != 32 && pattern_size != 64 && pattern_size != 128) { - UNLOCK_ERR_RET( + ERR_RET( CL_INVALID_VALUE, command_queue->context, "Pattern size argument must be one of {1, 2, 4, 8, 16, 32, 64, 128}"); } if (size == 0) { - UNLOCK_ERR_RET(CL_INVALID_VALUE, command_queue->context, - "Pointer size cannot be 0"); + ERR_RET(CL_INVALID_VALUE, command_queue->context, + "Pointer size cannot be 0"); } if (size % pattern_size != 0) { - UNLOCK_ERR_RET(CL_INVALID_VALUE, command_queue->context, - "Pointer size must be multiple of pattern size"); + ERR_RET(CL_INVALID_VALUE, command_queue->context, + "Pointer size must be multiple of pattern size"); } // Create an event/command to actually move the data at the appropriate @@ -366,7 +364,7 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueSVMMemFillIntelFPGA( acl_create_event(command_queue, num_events_in_wait_list, event_wait_list, CL_COMMAND_SVM_MEMFILL, &local_event); if (status != CL_SUCCESS) - UNLOCK_RETURN(status); // already signalled callback + return status; // already signalled callback local_event->cmd.info.svm_xfer.src_ptr = pattern; local_event->cmd.info.svm_xfer.dst_ptr = svm_ptr; local_event->cmd.info.svm_xfer.src_size = pattern_size; @@ -383,7 +381,7 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueSVMMemFillIntelFPGA( clReleaseEvent(local_event); acl_idle_update(command_queue->context); // Clean up early } - UNLOCK_RETURN(CL_SUCCESS); + return CL_SUCCESS; } CL_API_ENTRY cl_int CL_API_CALL clEnqueueSVMMemFill( @@ -401,22 +399,22 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueSVMMapIntelFPGA( const cl_event *event_wait_list, cl_event *event) { cl_event local_event = 0; // used for blocking cl_int status; - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; if (!acl_command_queue_is_valid(command_queue)) { - UNLOCK_RETURN(CL_INVALID_COMMAND_QUEUE); + return CL_INVALID_COMMAND_QUEUE; } if (svm_ptr == NULL) { - UNLOCK_ERR_RET(CL_INVALID_VALUE, command_queue->context, - "Pointer argument cannot be NULL"); + ERR_RET(CL_INVALID_VALUE, command_queue->context, + "Pointer argument cannot be NULL"); } if (size == 0) { - UNLOCK_ERR_RET(CL_INVALID_VALUE, command_queue->context, - "Pointer size cannot be 0"); + ERR_RET(CL_INVALID_VALUE, command_queue->context, + "Pointer size cannot be 0"); } if (flags & ~(CL_MAP_READ | CL_MAP_WRITE)) { - UNLOCK_ERR_RET(CL_INVALID_VALUE, command_queue->context, - "Invalid or unsupported flags"); + ERR_RET(CL_INVALID_VALUE, command_queue->context, + "Invalid or unsupported flags"); } // Create an event/command to actually move the data at the appropriate @@ -424,7 +422,7 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueSVMMapIntelFPGA( status = acl_create_event(command_queue, num_events_in_wait_list, event_wait_list, CL_COMMAND_SVM_MAP, &local_event); if (status != CL_SUCCESS) - UNLOCK_RETURN(status); // already signalled callback + return status; // already signalled callback // We don't use this right now, but if we ever have to sync up caches we will // need this. local_event->cmd.info.svm_xfer.dst_ptr = svm_ptr; @@ -445,7 +443,7 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueSVMMapIntelFPGA( clReleaseEvent(local_event); acl_idle_update(command_queue->context); // Clean up early } - UNLOCK_RETURN(CL_SUCCESS); + return CL_SUCCESS; } CL_API_ENTRY cl_int CL_API_CALL clEnqueueSVMMap( @@ -463,14 +461,14 @@ clEnqueueSVMUnmapIntelFPGA(cl_command_queue command_queue, void *svm_ptr, const cl_event *event_wait_list, cl_event *event) { cl_event local_event = 0; // used for blocking cl_int status; - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; if (!acl_command_queue_is_valid(command_queue)) { - UNLOCK_RETURN(CL_INVALID_COMMAND_QUEUE); + return CL_INVALID_COMMAND_QUEUE; } if (svm_ptr == NULL) { - UNLOCK_ERR_RET(CL_INVALID_VALUE, command_queue->context, - "Pointer argument cannot be NULL"); + ERR_RET(CL_INVALID_VALUE, command_queue->context, + "Pointer argument cannot be NULL"); } // Create an event/command to actually move the data at the appropriate @@ -479,7 +477,7 @@ clEnqueueSVMUnmapIntelFPGA(cl_command_queue command_queue, void *svm_ptr, acl_create_event(command_queue, num_events_in_wait_list, event_wait_list, CL_COMMAND_SVM_UNMAP, &local_event); if (status != CL_SUCCESS) - UNLOCK_RETURN(status); // already signalled callback + return status; // already signalled callback // We don't use this right now, but if we ever have to sync up caches we will // need this. local_event->cmd.info.svm_xfer.dst_ptr = svm_ptr; @@ -495,7 +493,7 @@ clEnqueueSVMUnmapIntelFPGA(cl_command_queue command_queue, void *svm_ptr, clReleaseEvent(local_event); acl_idle_update(command_queue->context); // Clean up early } - UNLOCK_RETURN(CL_SUCCESS); + return CL_SUCCESS; } CL_API_ENTRY cl_int CL_API_CALL @@ -517,18 +515,18 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueSVMFreeIntelFPGA( const cl_event *event_wait_list, cl_event *event) { cl_event local_event = 0; // used for blocking cl_int status; - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; if (!acl_command_queue_is_valid(command_queue)) { - UNLOCK_RETURN(CL_INVALID_COMMAND_QUEUE); + return CL_INVALID_COMMAND_QUEUE; } if (svm_pointers == NULL) { - UNLOCK_ERR_RET(CL_INVALID_VALUE, command_queue->context, - "List of SVM pointers argument cannot be NULL"); + ERR_RET(CL_INVALID_VALUE, command_queue->context, + "List of SVM pointers argument cannot be NULL"); } if (num_svm_pointers == 0) { - UNLOCK_ERR_RET(CL_INVALID_VALUE, command_queue->context, - "Number of SVM pointers cannot be 0"); + ERR_RET(CL_INVALID_VALUE, command_queue->context, + "Number of SVM pointers cannot be 0"); } // Create an event/command to actually move the data at the appropriate @@ -536,7 +534,7 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueSVMFreeIntelFPGA( status = acl_create_event(command_queue, num_events_in_wait_list, event_wait_list, CL_COMMAND_SVM_FREE, &local_event); if (status != CL_SUCCESS) - UNLOCK_RETURN(status); // already signalled callback + return status; // already signalled callback // We don't use this right now, but if we ever have to sync up caches we will // need this. local_event->cmd.info.svm_free.pfn_free_func = pfn_free_func; @@ -555,7 +553,7 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueSVMFreeIntelFPGA( clReleaseEvent(local_event); acl_idle_update(command_queue->context); // Clean up early } - UNLOCK_RETURN(CL_SUCCESS); + return CL_SUCCESS; } CL_API_ENTRY cl_int CL_API_CALL clEnqueueSVMFree( diff --git a/src/acl_thread.cpp b/src/acl_thread.cpp index d7b2e70c..b0511505 100644 --- a/src/acl_thread.cpp +++ b/src/acl_thread.cpp @@ -1,9 +1,6 @@ // Copyright (C) 2015-2021 Intel Corporation // SPDX-License-Identifier: BSD-3-Clause -// System headers. -#include - // External library headers. #include @@ -16,13 +13,14 @@ ACL_TLS int acl_global_lock_count = 0; ACL_TLS int acl_inside_sig_flag = 0; ACL_TLS int acl_inside_sig_old_lock_count = 0; +acl_mutex_wrapper_t acl_mutex_wrapper; static struct acl_condvar_s l_acl_global_condvar; // l_init_once() is defined in an OS-specific section below static void l_init_once(); -void acl_lock() { +void acl_mutex_wrapper_t::lock() { l_init_once(); if (acl_global_lock_count == 0) { acl_acquire_condvar(&l_acl_global_condvar); @@ -30,7 +28,7 @@ void acl_lock() { acl_global_lock_count++; } -void acl_unlock() { +void acl_mutex_wrapper_t::unlock() { acl_assert_locked(); acl_global_lock_count--; if (acl_global_lock_count == 0) { @@ -40,7 +38,7 @@ void acl_unlock() { int acl_is_locked_callback(void) { return (acl_global_lock_count > 0); } -int acl_suspend_lock() { +int acl_mutex_wrapper_t::suspend_lock() { int old_lock_count = acl_global_lock_count; acl_global_lock_count = 0; if (old_lock_count > 0) @@ -48,7 +46,7 @@ int acl_suspend_lock() { return old_lock_count; } -void acl_resume_lock(int lock_count) { +void acl_mutex_wrapper_t::resume_lock(int lock_count) { acl_assert_unlocked(); if (lock_count > 0) acl_acquire_condvar(&l_acl_global_condvar); @@ -100,6 +98,7 @@ static void l_init_once() { __attribute__((constructor)) static void l_global_lock_init() { acl_init_condvar(&l_acl_global_condvar); + acl_mutex_wrapper = acl_mutex_wrapper_t(); } __attribute__((destructor)) static void l_global_lock_uninit() { @@ -124,6 +123,7 @@ static BOOL CALLBACK l_init_once_callback(PINIT_ONCE InitOnce, PVOID Parameter, (void)(Context); acl_init_condvar(&l_acl_global_condvar); + acl_mutex_wrapper = acl_mutex_wrapper_t(); return TRUE; } @@ -140,8 +140,7 @@ static void l_init_once() { // e.g. polling BSPs (using yield) to prevent one thread from hogging the mutex // while waiting for something like clFinish. void acl_yield_lock_and_thread() { - int lock_count; - lock_count = acl_suspend_lock(); + acl_suspend_lock_guard lock(acl_mutex_wrapper); #ifdef __arm__ // arm-linux-gnueabihf-g++ version used is 4.7.1. // std::this_thread::yield can be enabled for it by defining @@ -152,5 +151,4 @@ void acl_yield_lock_and_thread() { #else std::this_thread::yield(); #endif - acl_resume_lock(lock_count); } diff --git a/src/acl_usm.cpp b/src/acl_usm.cpp index 695117b1..f2fcbc82 100644 --- a/src/acl_usm.cpp +++ b/src/acl_usm.cpp @@ -50,32 +50,32 @@ ACL_EXPORT CL_API_ENTRY void *CL_API_CALL clHostMemAllocINTEL( cl_context context, const cl_mem_properties_intel *properties, size_t size, cl_uint alignment, cl_int *errcode_ret) { - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; if (errcode_ret) { *errcode_ret = CL_SUCCESS; } if (!acl_context_is_valid(context)) { - UNLOCK_BAIL(CL_INVALID_CONTEXT); + BAIL(CL_INVALID_CONTEXT); } if (size == 0) { - UNLOCK_BAIL_INFO(CL_INVALID_BUFFER_SIZE, context, - "Memory buffer cannot be of size zero"); + BAIL_INFO(CL_INVALID_BUFFER_SIZE, context, + "Memory buffer cannot be of size zero"); } // Spec only allows for power of 2 allignment. // Alignment of '0' means use the default if (alignment & (alignment - 1)) { - UNLOCK_BAIL_INFO(CL_INVALID_VALUE, context, "alignment must be power of 2"); + BAIL_INFO(CL_INVALID_VALUE, context, "alignment must be power of 2"); } // Spec specifies that alignment is no bigger than the largest supported data // type if (alignment > sizeof(cl_long16)) { - UNLOCK_BAIL_INFO(CL_INVALID_VALUE, context, - "Requested alignment greater than largest data type " - "supported by device (long16)"); + BAIL_INFO(CL_INVALID_VALUE, context, + "Requested alignment greater than largest data type " + "supported by device (long16)"); } std::vector devices = std::vector( @@ -113,7 +113,7 @@ CL_API_ENTRY void *CL_API_CALL clHostMemAllocINTEL( mem_id = (cl_uint) * (properties + 1); } break; default: { - UNLOCK_BAIL_INFO(CL_INVALID_PROPERTY, context, "Invalid properties"); + BAIL_INFO(CL_INVALID_PROPERTY, context, "Invalid properties"); } } properties += 2; @@ -122,7 +122,7 @@ CL_API_ENTRY void *CL_API_CALL clHostMemAllocINTEL( for (const auto dev : devices) { if (!acl_usm_has_access_capability(dev, CL_DEVICE_HOST_MEM_CAPABILITIES_INTEL)) { - UNLOCK_BAIL_INFO( + BAIL_INFO( CL_INVALID_OPERATION, context, "Device does not support host Unified Shared Memory allocations: " + dev->def.autodiscovery_def.name); @@ -132,16 +132,14 @@ CL_API_ENTRY void *CL_API_CALL clHostMemAllocINTEL( cl_int ret = clGetDeviceInfo(dev, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(max_alloc), &max_alloc, 0); if (ret) { - UNLOCK_BAIL_INFO( - ret, context, - "Failed to query CL_DEVICE_MAX_MEM_ALLOC_SIZE for device: " + - dev->def.autodiscovery_def.name); + BAIL_INFO(ret, context, + "Failed to query CL_DEVICE_MAX_MEM_ALLOC_SIZE for device: " + + dev->def.autodiscovery_def.name); } if (size > max_alloc) { - UNLOCK_BAIL_INFO( - CL_INVALID_BUFFER_SIZE, context, - "Size larger than allocation size supported by device: " + - dev->def.autodiscovery_def.name); + BAIL_INFO(CL_INVALID_BUFFER_SIZE, context, + "Size larger than allocation size supported by device: " + + dev->def.autodiscovery_def.name); } } @@ -164,7 +162,7 @@ CL_API_ENTRY void *CL_API_CALL clHostMemAllocINTEL( (acl_usm_allocation_t *)acl_malloc(sizeof(acl_usm_allocation_t)); if (!usm_alloc) { - UNLOCK_BAIL_INFO(CL_OUT_OF_HOST_MEMORY, context, "Out of host memory"); + BAIL_INFO(CL_OUT_OF_HOST_MEMORY, context, "Out of host memory"); } int error = 0; @@ -175,21 +173,21 @@ CL_API_ENTRY void *CL_API_CALL clHostMemAllocINTEL( acl_free(usm_alloc); switch (error) { case CL_OUT_OF_HOST_MEMORY: - UNLOCK_BAIL_INFO(error, context, - "Error: Unable to allocate " + std::to_string(size) + - " bytes"); + BAIL_INFO(error, context, + "Error: Unable to allocate " + std::to_string(size) + + " bytes"); break; case CL_INVALID_VALUE: - UNLOCK_BAIL_INFO(error, context, - "Error: Unsupported alignment of " + - std::to_string(alignment)); + BAIL_INFO(error, context, + "Error: Unsupported alignment of " + + std::to_string(alignment)); break; case CL_INVALID_PROPERTY: - UNLOCK_BAIL_INFO(error, context, "Error: Unsuported properties"); + BAIL_INFO(error, context, "Error: Unsuported properties"); break; default: - UNLOCK_BAIL_INFO(CL_OUT_OF_HOST_MEMORY, context, - "Error: Unable to allocate memory"); + BAIL_INFO(CL_OUT_OF_HOST_MEMORY, context, + "Error: Unable to allocate memory"); break; } } @@ -203,12 +201,11 @@ CL_API_ENTRY void *CL_API_CALL clHostMemAllocINTEL( usm_alloc->alignment = alignment; l_add_usm_alloc_to_context(context, usm_alloc); - UNLOCK_RETURN(mem); + return mem; } - UNLOCK_BAIL_INFO( - CL_INVALID_VALUE, context, - "Host allocation is not supported for devices in this context"); + BAIL_INFO(CL_INVALID_VALUE, context, + "Host allocation is not supported for devices in this context"); } ACL_EXPORT @@ -216,22 +213,22 @@ CL_API_ENTRY void *CL_API_CALL clDeviceMemAllocINTEL(cl_context context, cl_device_id device, const cl_mem_properties_intel *properties, size_t size, cl_uint alignment, cl_int *errcode_ret) { - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; // Valid argument check if (!acl_context_is_valid(context)) { - UNLOCK_BAIL(CL_INVALID_CONTEXT); + BAIL(CL_INVALID_CONTEXT); } if (!acl_device_is_valid(device)) { - UNLOCK_BAIL_INFO(CL_INVALID_DEVICE, context, "Invalid device"); + BAIL_INFO(CL_INVALID_DEVICE, context, "Invalid device"); } if (!acl_context_uses_device(context, device)) { - UNLOCK_BAIL_INFO(CL_INVALID_DEVICE, context, - "Device is not associated with the context"); + BAIL_INFO(CL_INVALID_DEVICE, context, + "Device is not associated with the context"); } if (size == 0) { - UNLOCK_BAIL_INFO(CL_INVALID_BUFFER_SIZE, context, - "Memory buffer cannot be of size zero"); + BAIL_INFO(CL_INVALID_BUFFER_SIZE, context, + "Memory buffer cannot be of size zero"); } cl_ulong max_alloc = 0; @@ -239,9 +236,8 @@ clDeviceMemAllocINTEL(cl_context context, cl_device_id device, &max_alloc, 0); if (size > max_alloc) { - UNLOCK_BAIL_INFO( - CL_INVALID_BUFFER_SIZE, context, - "Memory buffer size is larger than max size supported by device"); + BAIL_INFO(CL_INVALID_BUFFER_SIZE, context, + "Memory buffer size is larger than max size supported by device"); } // Spec allows for power of 2 allignment. @@ -252,11 +248,10 @@ clDeviceMemAllocINTEL(cl_context context, cl_device_id device, alignment = ACL_MEM_ALIGN; } if (alignment & (alignment - 1)) { - UNLOCK_BAIL_INFO(CL_INVALID_VALUE, context, "alignment must be power of 2"); + BAIL_INFO(CL_INVALID_VALUE, context, "alignment must be power of 2"); } if (alignment > ACL_MEM_ALIGN) { - UNLOCK_BAIL_INFO(CL_INVALID_VALUE, context, - "Alignment value is not supported"); + BAIL_INFO(CL_INVALID_VALUE, context, "Alignment value is not supported"); } alignment = ACL_MEM_ALIGN; @@ -273,7 +268,7 @@ clDeviceMemAllocINTEL(cl_context context, cl_device_id device, mem_id = (cl_uint) * (properties + 1); } break; default: { - UNLOCK_BAIL_INFO(CL_INVALID_DEVICE, context, "Invalid properties"); + BAIL_INFO(CL_INVALID_DEVICE, context, "Invalid properties"); } } properties += 2; @@ -287,20 +282,19 @@ clDeviceMemAllocINTEL(cl_context context, cl_device_id device, cl_mem usm_device_buffer = clCreateBufferWithPropertiesINTEL( context, props, CL_MEM_READ_WRITE, size, NULL, &status); if (status != CL_SUCCESS) { - UNLOCK_BAIL_INFO(status, context, "Failed to allocate device memory"); + BAIL_INFO(status, context, "Failed to allocate device memory"); } // Runtime will do device allocation on bind to device if (!acl_bind_buffer_to_device(device, usm_device_buffer)) { clReleaseMemObjectIntelFPGA(usm_device_buffer); - UNLOCK_BAIL_INFO(CL_OUT_OF_RESOURCES, context, - "Failed to allocate device memory"); + BAIL_INFO(CL_OUT_OF_RESOURCES, context, "Failed to allocate device memory"); } acl_usm_allocation_t *usm_alloc = (acl_usm_allocation_t *)acl_malloc(sizeof(acl_usm_allocation_t)); if (!usm_alloc) { clReleaseMemObjectIntelFPGA(usm_device_buffer); - UNLOCK_BAIL_INFO(CL_OUT_OF_RESOURCES, context, "Out of host memory"); + BAIL_INFO(CL_OUT_OF_RESOURCES, context, "Out of host memory"); } void *ptr = acl_get_physical_address(usm_device_buffer, device); @@ -320,7 +314,7 @@ clDeviceMemAllocINTEL(cl_context context, cl_device_id device, *errcode_ret = CL_SUCCESS; } - UNLOCK_RETURN(ptr); + return ptr; } ACL_EXPORT @@ -328,28 +322,28 @@ CL_API_ENTRY void *CL_API_CALL clSharedMemAllocINTEL(cl_context context, cl_device_id device, const cl_mem_properties_intel *properties, size_t size, cl_uint alignment, cl_int *errcode_ret) { - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; if (errcode_ret) { *errcode_ret = CL_SUCCESS; } if (!acl_context_is_valid(context)) { - UNLOCK_BAIL(CL_INVALID_CONTEXT); + BAIL(CL_INVALID_CONTEXT); } if (device != nullptr && !acl_device_is_valid(device)) { - UNLOCK_BAIL_INFO(CL_INVALID_DEVICE, context, "Invalid device"); + BAIL_INFO(CL_INVALID_DEVICE, context, "Invalid device"); } if (device != nullptr && !acl_context_uses_device(context, device)) { - UNLOCK_BAIL_INFO(CL_INVALID_DEVICE, context, - "Device is not associated with the context"); + BAIL_INFO(CL_INVALID_DEVICE, context, + "Device is not associated with the context"); } if (size == 0) { - UNLOCK_BAIL_INFO(CL_INVALID_BUFFER_SIZE, context, - "Allocation cannot be of size zero"); + BAIL_INFO(CL_INVALID_BUFFER_SIZE, context, + "Allocation cannot be of size zero"); } // USM spec allows only power-of-2 alignment, or 0 (default alignment) if (alignment & (alignment - 1)) { - UNLOCK_BAIL_INFO(CL_INVALID_VALUE, context, "alignment must be power of 2"); + BAIL_INFO(CL_INVALID_VALUE, context, "alignment must be power of 2"); } // Ensure the specified device, or at least one of the devices in the context @@ -364,7 +358,7 @@ clSharedMemAllocINTEL(cl_context context, cl_device_id device, for (const auto dev : devices) { if (!acl_usm_has_access_capability( dev, CL_DEVICE_SINGLE_DEVICE_SHARED_MEM_CAPABILITIES_INTEL)) { - UNLOCK_BAIL_INFO( + BAIL_INFO( CL_INVALID_OPERATION, context, "Device does not support shared Unified Shared Memory allocations"); } @@ -373,9 +367,9 @@ clSharedMemAllocINTEL(cl_context context, cl_device_id device, // Spec specifies that alignment is no bigger than the largest supported data // type if (alignment > sizeof(cl_long16)) { - UNLOCK_BAIL_INFO(CL_INVALID_VALUE, context, - "Requested alignment greater than largest data type " - "supported by device (long16)"); + BAIL_INFO(CL_INVALID_VALUE, context, + "Requested alignment greater than largest data type " + "supported by device (long16)"); } // Ensure requested size is valid and supported by the specified device, or at @@ -385,17 +379,16 @@ clSharedMemAllocINTEL(cl_context context, cl_device_id device, cl_int ret = clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(dev_alloc), &dev_alloc, 0); if (ret) { - UNLOCK_BAIL_INFO( - ret, context, - "Failed to query CL_DEVICE_MAX_MEM_ALLOC_SIZE for device"); + BAIL_INFO(ret, context, + "Failed to query CL_DEVICE_MAX_MEM_ALLOC_SIZE for device"); } if (size > dev_alloc) { - UNLOCK_BAIL_INFO(CL_INVALID_BUFFER_SIZE, context, - "Size larger than allocation size supported by device"); + BAIL_INFO(CL_INVALID_BUFFER_SIZE, context, + "Size larger than allocation size supported by device"); } } if (device == nullptr && (size > context->max_mem_alloc_size)) { - UNLOCK_BAIL_INFO( + BAIL_INFO( CL_INVALID_BUFFER_SIZE, context, "Size larger than allocation size supported by any device in context"); } @@ -409,28 +402,27 @@ clSharedMemAllocINTEL(cl_context context, cl_device_id device, switch (*properties) { case CL_MEM_ALLOC_FLAGS_INTEL: { if (seen_flags.insert(CL_MEM_ALLOC_FLAGS_INTEL).second == false) { - UNLOCK_BAIL_INFO(CL_INVALID_PROPERTY, context, - "Property specified multiple times"); + BAIL_INFO(CL_INVALID_PROPERTY, context, + "Property specified multiple times"); } switch (*(properties + 1)) { case CL_MEM_ALLOC_WRITE_COMBINED_INTEL: break; default: - UNLOCK_BAIL_INFO(CL_INVALID_PROPERTY, context, - "Invalid value for property"); + BAIL_INFO(CL_INVALID_PROPERTY, context, "Invalid value for property"); } alloc_flags = *(properties + 1); } break; case CL_MEM_ALLOC_BUFFER_LOCATION_INTEL: { if (seen_flags.insert(CL_MEM_ALLOC_BUFFER_LOCATION_INTEL).second == false) { - UNLOCK_BAIL_INFO(CL_INVALID_PROPERTY, context, - "Property specified multiple times"); + BAIL_INFO(CL_INVALID_PROPERTY, context, + "Property specified multiple times"); } mem_id = (cl_uint) * (properties + 1); } break; default: { - UNLOCK_BAIL_INFO(CL_INVALID_PROPERTY, context, "Invalid properties"); + BAIL_INFO(CL_INVALID_PROPERTY, context, "Invalid properties"); } } properties += 2; @@ -455,7 +447,7 @@ clSharedMemAllocINTEL(cl_context context, cl_device_id device, (acl_usm_allocation_t *)acl_malloc(sizeof(acl_usm_allocation_t)); if (!usm_alloc) { - UNLOCK_BAIL_INFO(CL_OUT_OF_HOST_MEMORY, context, "Out of host memory"); + BAIL_INFO(CL_OUT_OF_HOST_MEMORY, context, "Out of host memory"); } int error; @@ -466,21 +458,21 @@ clSharedMemAllocINTEL(cl_context context, cl_device_id device, acl_free(usm_alloc); switch (error) { case CL_OUT_OF_HOST_MEMORY: - UNLOCK_BAIL_INFO(error, context, - "Error: Unable to allocate " + std::to_string(size) + - " bytes"); + BAIL_INFO(error, context, + "Error: Unable to allocate " + std::to_string(size) + + " bytes"); break; case CL_INVALID_VALUE: - UNLOCK_BAIL_INFO(error, context, - "Error: Unsupported alignment of " + - std::to_string(alignment)); + BAIL_INFO(error, context, + "Error: Unsupported alignment of " + + std::to_string(alignment)); break; case CL_INVALID_PROPERTY: - UNLOCK_BAIL_INFO(error, context, "Error: Unsuported properties"); + BAIL_INFO(error, context, "Error: Unsuported properties"); break; default: - UNLOCK_BAIL_INFO(CL_OUT_OF_HOST_MEMORY, context, - "Error: Unable to allocate memory"); + BAIL_INFO(CL_OUT_OF_HOST_MEMORY, context, + "Error: Unable to allocate memory"); break; } } @@ -494,45 +486,43 @@ clSharedMemAllocINTEL(cl_context context, cl_device_id device, usm_alloc->alignment = alignment; l_add_usm_alloc_to_context(context, usm_alloc); - UNLOCK_RETURN(mem); + return mem; } // After all the error check, still error out // Shared allocation is not supported yet - UNLOCK_BAIL_INFO( - CL_INVALID_VALUE, context, - "Shared allocation is not supported for devices in this context"); + BAIL_INFO(CL_INVALID_VALUE, context, + "Shared allocation is not supported for devices in this context"); } ACL_EXPORT CL_API_ENTRY cl_int CL_API_CALL clMemFreeINTEL(cl_context context, void *ptr) { - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; if (!acl_context_is_valid(context)) { - UNLOCK_RETURN(CL_INVALID_CONTEXT); + return CL_INVALID_CONTEXT; } // NULL is valid input where nothing happens if (ptr == NULL) { - UNLOCK_RETURN(CL_SUCCESS); + return CL_SUCCESS; } acl_usm_allocation_t *usm_alloc = acl_get_usm_alloc_from_ptr(context, ptr); if (!usm_alloc) { - UNLOCK_ERR_RET(CL_INVALID_VALUE, context, - "Memory must be USM allocation in context"); + ERR_RET(CL_INVALID_VALUE, context, + "Memory must be USM allocation in context"); } if (usm_alloc->range.begin != ptr) { - UNLOCK_ERR_RET(CL_INVALID_VALUE, context, - "Pointer must be exact value returned by allocation"); + ERR_RET(CL_INVALID_VALUE, context, + "Pointer must be exact value returned by allocation"); } switch (usm_alloc->type) { case CL_MEM_TYPE_HOST_INTEL: { if (acl_get_hal()->free) { if (acl_get_hal()->free(context, const_cast(ptr))) { - UNLOCK_ERR_RET(CL_INVALID_VALUE, context, - "Failed to free host allocation"); + ERR_RET(CL_INVALID_VALUE, context, "Failed to free host allocation"); } } break; @@ -540,22 +530,20 @@ CL_API_ENTRY cl_int CL_API_CALL clMemFreeINTEL(cl_context context, void *ptr) { case CL_MEM_TYPE_DEVICE_INTEL: { cl_int status = clReleaseMemObjectIntelFPGA(usm_alloc->mem); if (status != CL_SUCCESS) { - UNLOCK_RETURN(status); + return status; } break; } case CL_MEM_TYPE_SHARED_INTEL: { if (acl_get_hal()->free) { if (acl_get_hal()->free(context, const_cast(ptr))) { - UNLOCK_ERR_RET(CL_INVALID_VALUE, context, - "Failed to free shared allocation"); + ERR_RET(CL_INVALID_VALUE, context, "Failed to free shared allocation"); } } break; } default: { - UNLOCK_ERR_RET(CL_INVALID_VALUE, context, - "Pointer must be from USM allocation"); + ERR_RET(CL_INVALID_VALUE, context, "Pointer must be from USM allocation"); break; } } @@ -563,31 +551,31 @@ CL_API_ENTRY cl_int CL_API_CALL clMemFreeINTEL(cl_context context, void *ptr) { l_remove_usm_alloc_from_context(context, usm_alloc); acl_free(usm_alloc); - UNLOCK_RETURN(CL_SUCCESS); + return CL_SUCCESS; } ACL_EXPORT CL_API_ENTRY cl_int CL_API_CALL clMemBlockingFreeINTEL(cl_context context, void *ptr) { - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; if (!acl_context_is_valid(context)) { - UNLOCK_RETURN(CL_INVALID_CONTEXT); + return CL_INVALID_CONTEXT; } // NULL is valid input where nothing happens if (ptr == NULL) { - UNLOCK_RETURN(CL_SUCCESS); + return CL_SUCCESS; } acl_usm_allocation_t *usm_alloc = acl_get_usm_alloc_from_ptr(context, ptr); if (!usm_alloc) { - UNLOCK_ERR_RET(CL_INVALID_VALUE, context, - "Memory must be USM allocation in context"); + ERR_RET(CL_INVALID_VALUE, context, + "Memory must be USM allocation in context"); } if (usm_alloc->range.begin != ptr) { - UNLOCK_ERR_RET(CL_INVALID_VALUE, context, - "Pointer must be exact value returned by allocation"); + ERR_RET(CL_INVALID_VALUE, context, + "Pointer must be exact value returned by allocation"); } // wait for enqueued commands that uses ptr to finish before free @@ -597,8 +585,7 @@ CL_API_ENTRY cl_int CL_API_CALL clMemBlockingFreeINTEL(cl_context context, case CL_MEM_TYPE_HOST_INTEL: { if (acl_get_hal()->free) { if (acl_get_hal()->free(context, const_cast(ptr))) { - UNLOCK_ERR_RET(CL_INVALID_VALUE, context, - "Failed to free host allocation"); + ERR_RET(CL_INVALID_VALUE, context, "Failed to free host allocation"); } } break; @@ -606,22 +593,20 @@ CL_API_ENTRY cl_int CL_API_CALL clMemBlockingFreeINTEL(cl_context context, case CL_MEM_TYPE_DEVICE_INTEL: { cl_int status = clReleaseMemObjectIntelFPGA(usm_alloc->mem); if (status != CL_SUCCESS) { - UNLOCK_RETURN(status); + return status; } break; } case CL_MEM_TYPE_SHARED_INTEL: { if (acl_get_hal()->free) { if (acl_get_hal()->free(context, const_cast(ptr))) { - UNLOCK_ERR_RET(CL_INVALID_VALUE, context, - "Failed to free shared allocation"); + ERR_RET(CL_INVALID_VALUE, context, "Failed to free shared allocation"); } } break; } default: { - UNLOCK_ERR_RET(CL_INVALID_VALUE, context, - "Pointer must be from USM allocation"); + ERR_RET(CL_INVALID_VALUE, context, "Pointer must be from USM allocation"); break; } } @@ -629,20 +614,20 @@ CL_API_ENTRY cl_int CL_API_CALL clMemBlockingFreeINTEL(cl_context context, l_remove_usm_alloc_from_context(context, usm_alloc); acl_free(usm_alloc); - UNLOCK_RETURN(CL_SUCCESS); + return CL_SUCCESS; } ACL_EXPORT CL_API_ENTRY cl_int CL_API_CALL clGetMemAllocInfoINTEL( cl_context context, const void *ptr, cl_mem_info_intel param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret) { - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; if (!acl_context_is_valid(context)) { - UNLOCK_RETURN(CL_INVALID_CONTEXT); + return CL_INVALID_CONTEXT; } - UNLOCK_VALIDATE_ARRAY_OUT_ARGS(param_value_size, param_value, - param_value_size_ret, context); + VALIDATE_ARRAY_OUT_ARGS(param_value_size, param_value, param_value_size_ret, + context); // Get USM allocation associated with ptr acl_usm_allocation_t *usm_alloc = acl_get_usm_alloc_from_ptr(context, ptr); @@ -701,8 +686,7 @@ CL_API_ENTRY cl_int CL_API_CALL clGetMemAllocInfoINTEL( } break; default: { - UNLOCK_ERR_RET(CL_INVALID_VALUE, context, - "Param name is not a valid query"); + ERR_RET(CL_INVALID_VALUE, context, "Param name is not a valid query"); } break; } @@ -710,8 +694,8 @@ CL_API_ENTRY cl_int CL_API_CALL clGetMemAllocInfoINTEL( // Try to return the param value. if (param_value_size < result.size) { // Buffer is too small to hold the return value. - UNLOCK_ERR_RET(CL_INVALID_VALUE, context, - "Param value size is smaller than query return type"); + ERR_RET(CL_INVALID_VALUE, context, + "Param value size is smaller than query return type"); } RESULT_COPY(param_value, param_value_size); } @@ -719,7 +703,7 @@ CL_API_ENTRY cl_int CL_API_CALL clGetMemAllocInfoINTEL( if (param_value_size_ret) { *param_value_size_ret = result.size; } - UNLOCK_RETURN(CL_SUCCESS); + return CL_SUCCESS; } // clEnqueueMemsetINTEL has been removed in the latest OpenCL spec, but SYCl @@ -741,60 +725,59 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueMemFillINTEL( size_t pattern_size, size_t size, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event) { char *ptr; - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; if (!acl_command_queue_is_valid(command_queue)) { - UNLOCK_RETURN(CL_INVALID_COMMAND_QUEUE); + return CL_INVALID_COMMAND_QUEUE; } if (dst_ptr == NULL) { - UNLOCK_ERR_RET(CL_INVALID_VALUE, command_queue->context, - "Pointer argument cannot be NULL"); + ERR_RET(CL_INVALID_VALUE, command_queue->context, + "Pointer argument cannot be NULL"); } if (((uintptr_t)dst_ptr) % (pattern_size) != 0) { - UNLOCK_ERR_RET(CL_INVALID_VALUE, command_queue->context, - "Pointer not aligned with pattern size"); + ERR_RET(CL_INVALID_VALUE, command_queue->context, + "Pointer not aligned with pattern size"); } if (pattern == NULL) { - UNLOCK_ERR_RET(CL_INVALID_VALUE, command_queue->context, - "Pattern argument cannot be NULL"); + ERR_RET(CL_INVALID_VALUE, command_queue->context, + "Pattern argument cannot be NULL"); } if (pattern_size == 0) { - UNLOCK_ERR_RET(CL_INVALID_VALUE, command_queue->context, - "Pattern size argument cannot be 0"); + ERR_RET(CL_INVALID_VALUE, command_queue->context, + "Pattern size argument cannot be 0"); } // Pattern size must be less than largest supported int/float vec type if (pattern_size > sizeof(double) * 16) { - UNLOCK_ERR_RET(CL_INVALID_VALUE, command_queue->context, - "Patern size must be less than double16"); + ERR_RET(CL_INVALID_VALUE, command_queue->context, + "Patern size must be less than double16"); } // Pattern size can only be power of 2 if (pattern_size & (pattern_size - 1)) { - UNLOCK_ERR_RET(CL_INVALID_VALUE, command_queue->context, - "Patern size must be power of 2"); + ERR_RET(CL_INVALID_VALUE, command_queue->context, + "Patern size must be power of 2"); } if (size == 0) { - UNLOCK_ERR_RET(CL_INVALID_VALUE, command_queue->context, - "Size cannot be 0"); + ERR_RET(CL_INVALID_VALUE, command_queue->context, "Size cannot be 0"); } if (size % pattern_size != 0) { - UNLOCK_ERR_RET(CL_INVALID_VALUE, command_queue->context, - "Size must be multiple of pattern size"); + ERR_RET(CL_INVALID_VALUE, command_queue->context, + "Size must be multiple of pattern size"); } // This array is passed to clSetEventCallback for releasing the // allocated memory and releasing the event, if *event is null. void **callback_data = (void **)acl_malloc(sizeof(void *) * 2); if (!callback_data) { - UNLOCK_ERR_RET(CL_OUT_OF_HOST_MEMORY, command_queue->context, - "Out of host memory"); + ERR_RET(CL_OUT_OF_HOST_MEMORY, command_queue->context, + "Out of host memory"); } acl_aligned_ptr_t *aligned_ptr = (acl_aligned_ptr_t *)acl_malloc(sizeof(acl_aligned_ptr_t)); if (!aligned_ptr) { acl_free(callback_data); - UNLOCK_ERR_RET(CL_OUT_OF_HOST_MEMORY, command_queue->context, - "Out of host memory"); + ERR_RET(CL_OUT_OF_HOST_MEMORY, command_queue->context, + "Out of host memory"); } // Replicating the value, size times. @@ -803,8 +786,8 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueMemFillINTEL( if (!ptr) { acl_free(aligned_ptr); acl_free(callback_data); - UNLOCK_ERR_RET(CL_OUT_OF_HOST_MEMORY, command_queue->context, - "Out of host memory"); + ERR_RET(CL_OUT_OF_HOST_MEMORY, command_queue->context, + "Out of host memory"); } for (cl_uint i = 0; i < size / pattern_size; i++) { @@ -822,8 +805,8 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueMemFillINTEL( acl_mem_aligned_free(command_queue->context, aligned_ptr); acl_free(aligned_ptr); acl_free(callback_data); - UNLOCK_ERR_RET(CL_INVALID_VALUE, command_queue->context, - "Size accesses outside of USM allocation dst_ptr range"); + ERR_RET(CL_INVALID_VALUE, command_queue->context, + "Size accesses outside of USM allocation dst_ptr range"); } if (usm_alloc->type == CL_MEM_TYPE_DEVICE_INTEL) { dst_device = usm_alloc->device; @@ -838,8 +821,8 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueMemFillINTEL( acl_mem_aligned_free(command_queue->context, aligned_ptr); acl_free(aligned_ptr); acl_free(callback_data); - UNLOCK_ERR_RET(CL_INVALID_VALUE, command_queue->context, - "Memory allocation needs to be on command queue device"); + ERR_RET(CL_INVALID_VALUE, command_queue->context, + "Memory allocation needs to be on command queue device"); } cl_event tmp_event = NULL; @@ -853,7 +836,7 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueMemFillINTEL( acl_mem_aligned_free(command_queue->context, aligned_ptr); acl_free(aligned_ptr); acl_free(callback_data); - UNLOCK_RETURN(status); // already signalled callback + return status; // already signalled callback } tmp_event->cmd.info.usm_xfer.src_ptr = ptr; tmp_event->cmd.info.usm_xfer.dst_ptr = dst_ptr; @@ -878,7 +861,7 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueMemFillINTEL( acl_free_allocation_after_event_completion, (void *)callback_data); - UNLOCK_RETURN(CL_SUCCESS); + return CL_SUCCESS; } ACL_EXPORT @@ -886,30 +869,30 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueMemcpyINTEL( cl_command_queue command_queue, cl_bool blocking, void *dst_ptr, const void *src_ptr, size_t size, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event) { - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; if (!acl_command_queue_is_valid(command_queue)) { - UNLOCK_RETURN(CL_INVALID_COMMAND_QUEUE); + return CL_INVALID_COMMAND_QUEUE; } if (dst_ptr == NULL) { - UNLOCK_ERR_RET(CL_INVALID_VALUE, command_queue->context, - "Pointer argument cannot be NULL"); + ERR_RET(CL_INVALID_VALUE, command_queue->context, + "Pointer argument cannot be NULL"); } if (src_ptr == NULL) { - UNLOCK_ERR_RET(CL_INVALID_VALUE, command_queue->context, - "Pointer argument cannot be NULL"); + ERR_RET(CL_INVALID_VALUE, command_queue->context, + "Pointer argument cannot be NULL"); } if (size == 0) { - UNLOCK_ERR_RET(CL_INVALID_VALUE, command_queue->context, - "Pointer size cannot be 0"); + ERR_RET(CL_INVALID_VALUE, command_queue->context, + "Pointer size cannot be 0"); } if (((char *)src_ptr < (char *)dst_ptr && (char *)src_ptr + size > (char *)dst_ptr) || ((char *)dst_ptr < (char *)src_ptr && (char *)dst_ptr + size > (char *)src_ptr)) { - UNLOCK_ERR_RET(CL_MEM_COPY_OVERLAP, command_queue->context, - "Source and destination memory overlaps"); + ERR_RET(CL_MEM_COPY_OVERLAP, command_queue->context, + "Source and destination memory overlaps"); } acl_usm_allocation_t *dst_usm_alloc = @@ -919,8 +902,8 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueMemcpyINTEL( cl_device_id dst_device = NULL; if (dst_usm_alloc) { if (l_ptr_in_usm_alloc_range(dst_usm_alloc, dst_ptr, size) != CL_TRUE) { - UNLOCK_ERR_RET(CL_INVALID_VALUE, command_queue->context, - "Size accesses outside of USM allocation dst_ptr range"); + ERR_RET(CL_INVALID_VALUE, command_queue->context, + "Size accesses outside of USM allocation dst_ptr range"); } if (dst_usm_alloc->type == CL_MEM_TYPE_DEVICE_INTEL) { dst_device = dst_usm_alloc->device; @@ -936,8 +919,8 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueMemcpyINTEL( // Even if src_ptr is not USM pointer, continue assuming it's system mem if (src_usm_alloc) { if (l_ptr_in_usm_alloc_range(src_usm_alloc, src_ptr, size) != CL_TRUE) { - UNLOCK_ERR_RET(CL_INVALID_VALUE, command_queue->context, - "Size accesses outside of USM allocation src_ptr range"); + ERR_RET(CL_INVALID_VALUE, command_queue->context, + "Size accesses outside of USM allocation src_ptr range"); } if (src_usm_alloc->type == CL_MEM_TYPE_DEVICE_INTEL) { src_device = src_usm_alloc->device; @@ -947,8 +930,8 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueMemcpyINTEL( if ((dst_device && dst_device->id != command_queue->device->id) || (src_device && src_device->id != command_queue->device->id)) { - UNLOCK_ERR_RET(CL_INVALID_VALUE, command_queue->context, - "Memory allocation needs to be on command queue's device"); + ERR_RET(CL_INVALID_VALUE, command_queue->context, + "Memory allocation needs to be on command queue's device"); } cl_event tmp_event = NULL; @@ -958,7 +941,7 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueMemcpyINTEL( acl_create_event(command_queue, num_events_in_wait_list, event_wait_list, CL_COMMAND_MEMCPY_INTEL, &tmp_event); if (status != CL_SUCCESS) { - UNLOCK_RETURN(status); // already signalled callback + return status; // already signalled callback } tmp_event->cmd.info.usm_xfer.src_ptr = src_ptr; tmp_event->cmd.info.usm_xfer.dst_ptr = dst_ptr; @@ -982,10 +965,10 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueMemcpyINTEL( } if (blocking && status == CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST) { - UNLOCK_RETURN(status); + return status; } - UNLOCK_RETURN(CL_SUCCESS); + return CL_SUCCESS; } // Unused argument names are commented out to avoid Windows compile warning: @@ -996,14 +979,14 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueMigrateMemINTEL( cl_command_queue command_queue, const void *ptr, size_t /* size */, cl_mem_migration_flags /* flags */, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event) { - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; if (!acl_command_queue_is_valid(command_queue)) { - UNLOCK_RETURN(CL_INVALID_COMMAND_QUEUE); + return CL_INVALID_COMMAND_QUEUE; } if (ptr == NULL) { - UNLOCK_ERR_RET(CL_INVALID_VALUE, command_queue->context, - "Pointer argument can not be NULL"); + ERR_RET(CL_INVALID_VALUE, command_queue->context, + "Pointer argument can not be NULL"); } // Migrate currently doesn't do anything @@ -1015,7 +998,7 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueMigrateMemINTEL( CL_COMMAND_MIGRATEMEM_INTEL, &local_event); if (result != CL_SUCCESS) { - UNLOCK_RETURN(result); + return result; } if (event) { @@ -1023,7 +1006,7 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueMigrateMemINTEL( } else { clReleaseEvent(local_event); } - UNLOCK_RETURN(CL_SUCCESS); + return CL_SUCCESS; } // Unused argument names are commented out to avoid Windows compile warning: @@ -1034,14 +1017,14 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueMemAdviseINTEL( cl_command_queue command_queue, const void *ptr, size_t /* size */, cl_mem_advice_intel /* advice */, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event) { - acl_lock(); + std::scoped_lock lock{acl_mutex_wrapper}; if (!acl_command_queue_is_valid(command_queue)) { - UNLOCK_RETURN(CL_INVALID_COMMAND_QUEUE); + return CL_INVALID_COMMAND_QUEUE; } if (ptr == NULL) { - UNLOCK_ERR_RET(CL_INVALID_VALUE, command_queue->context, - "Pointer argument can not be NULL"); + ERR_RET(CL_INVALID_VALUE, command_queue->context, + "Pointer argument can not be NULL"); } // MemAdvise currently doesn't do anything @@ -1053,7 +1036,7 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueMemAdviseINTEL( CL_COMMAND_MEMADVISE_INTEL, &local_event); if (result != CL_SUCCESS) { - UNLOCK_RETURN(result); + return result; } if (event) { @@ -1061,7 +1044,7 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueMemAdviseINTEL( } else { clReleaseEvent(local_event); } - UNLOCK_RETURN(CL_SUCCESS); + return CL_SUCCESS; } void acl_usm_memcpy(void *, acl_device_op_t *op) { diff --git a/test/acl_device_op_test.cpp b/test/acl_device_op_test.cpp index e6c116fb..2934e2cb 100644 --- a/test/acl_device_op_test.cpp +++ b/test/acl_device_op_test.cpp @@ -133,7 +133,7 @@ static struct _cl_event myevents[EVENT_NUM] = {{0}}; TEST_GROUP(device_op) { virtual void setup() { - acl_lock(); + acl_mutex_wrapper.lock(); acl_test_setup_generic_system(); acl_init_device_op_queue(&m_doq); clear_queue_callbacks(&m_doq); @@ -152,7 +152,7 @@ TEST_GROUP(device_op) { virtual void teardown() { unload(); acl_test_teardown_generic_system(); - acl_unlock(); + acl_mutex_wrapper.unlock(); acl_test_run_standard_teardown_checks(); } diff --git a/test/acl_event_test.cpp b/test/acl_event_test.cpp index 84f1186b..eec5d763 100644 --- a/test/acl_event_test.cpp +++ b/test/acl_event_test.cpp @@ -292,7 +292,7 @@ MT_TEST(acl_event, acl_create_event) { // Bad command queue struct _cl_command_queue fake_cq = {0}; - acl_lock(); + acl_mutex_wrapper.lock(); CHECK_EQUAL(CL_INVALID_COMMAND_QUEUE, acl_create_event(0, 0, 0, CL_COMMAND_MARKER, &user_event)); CHECK_EQUAL(CL_INVALID_COMMAND_QUEUE, @@ -338,7 +338,7 @@ MT_TEST(acl_event, acl_create_event) { CHECK(cq0); acl_update_queue(cq0); - acl_unlock(); + acl_mutex_wrapper.unlock(); // Check the callback functions initialization. CHECK_EQUAL(NULL, event[0]->callback_list); @@ -614,7 +614,7 @@ MT_TEST(acl_event, event_liveness) { CHECK_EQUAL(CL_SUCCESS, status); CHECK(cq0); - acl_lock(); + acl_mutex_wrapper.lock(); cl_event event; status = acl_create_event(cq0, 0, 0, CL_COMMAND_MARKER, &event); CHECK_EQUAL(CL_SUCCESS, status); @@ -659,7 +659,7 @@ MT_TEST(acl_event, event_liveness) { acl_set_execution_status(event, CL_QUEUED); CHECK(acl_event_is_live(event)); acl_retain(event); - acl_unlock(); + acl_mutex_wrapper.unlock(); this->kill_event(event); @@ -690,7 +690,7 @@ MT_TEST(acl_event, event_callbacks) { CHECK_EQUAL(CL_SUCCESS, status); CHECK(cq0); - acl_lock(); + acl_mutex_wrapper.lock(); cl_event event, event2, event3; status = acl_create_event(cq0, 0, 0, CL_COMMAND_MARKER, &event); CHECK_EQUAL(CL_SUCCESS, status); @@ -807,7 +807,7 @@ MT_TEST(acl_event, event_callbacks) { CHECK_EQUAL(1, call_flags_event3[i]); } clReleaseEvent(user_event); - acl_unlock(); + acl_mutex_wrapper.unlock(); this->kill_event(event); this->kill_event(event2); diff --git a/test/acl_globals_test.cpp b/test/acl_globals_test.cpp index 7c1a361e..5376d4e6 100644 --- a/test/acl_globals_test.cpp +++ b/test/acl_globals_test.cpp @@ -693,12 +693,12 @@ const acl_system_def_t *acl_test_get_empty_system_def() { return &acltest_empty_system; } -TEST_GROUP(acl_globals_undef){void setup(){acl_lock(); +TEST_GROUP(acl_globals_undef){void setup(){acl_mutex_wrapper.lock(); acl_set_hal(acl_test_get_simple_hal()); } void teardown() { acl_reset_hal(); - acl_unlock(); + acl_mutex_wrapper.unlock(); acl_test_run_standard_teardown_checks(); } diff --git a/test/acl_hal_mmd_test.cpp b/test/acl_hal_mmd_test.cpp index 41dcc390..5b901ee9 100644 --- a/test/acl_hal_mmd_test.cpp +++ b/test/acl_hal_mmd_test.cpp @@ -19,14 +19,14 @@ #include #include -TEST_GROUP(acl_hal_mmd){void setup(){acl_lock(); +TEST_GROUP(acl_hal_mmd){void setup(){acl_mutex_wrapper.lock(); acl_hal_test_setup_generic_system(); this->load(); } void teardown() { this->unload(); acl_hal_test_teardown_generic_system(); - acl_unlock(); + acl_mutex_wrapper.unlock(); acl_test_run_standard_teardown_checks(); } diff --git a/test/acl_hal_test.cpp b/test/acl_hal_test.cpp index 266a9cd1..6e27ab2a 100644 --- a/test/acl_hal_test.cpp +++ b/test/acl_hal_test.cpp @@ -162,10 +162,10 @@ bool acltest_hal_emulate_device_mem = false; static void *acltest_hal_device_mem = 0; static size_t acltest_hal_device_mem_size = 0; -TEST_GROUP(acl_hal){void setup(){acl_lock(); +TEST_GROUP(acl_hal){void setup(){acl_mutex_wrapper.lock(); } void teardown() { - acl_unlock(); + acl_mutex_wrapper.unlock(); acl_assert_unlocked(); } } diff --git a/test/acl_support_test.cpp b/test/acl_support_test.cpp index fa33b99a..114c0c1c 100644 --- a/test/acl_support_test.cpp +++ b/test/acl_support_test.cpp @@ -23,12 +23,12 @@ #include #endif -TEST_GROUP(support){void setup(){acl_lock(); +TEST_GROUP(support){void setup(){acl_mutex_wrapper.lock(); acl_test_setup_generic_system(); } void teardown() { acl_test_teardown_generic_system(); - acl_unlock(); + acl_mutex_wrapper.unlock(); acl_test_run_standard_teardown_checks(); } diff --git a/test/acl_test.cpp b/test/acl_test.cpp index 1a194255..890735fc 100644 --- a/test/acl_test.cpp +++ b/test/acl_test.cpp @@ -91,24 +91,24 @@ int main(int argc, const char **argv) { } void acl_test_setup_generic_system() { - acl_lock(); + acl_mutex_wrapper.lock(); acl_set_hal(acl_test_get_simple_hal()); acl_init(acl_test_get_complex_system_def()); - acl_unlock(); + acl_mutex_wrapper.unlock(); } void acl_test_setup_empty_system() { - acl_lock(); + acl_mutex_wrapper.lock(); acl_set_hal(acl_test_get_simple_hal()); acl_init(acl_test_get_empty_system_def()); - acl_unlock(); + acl_mutex_wrapper.unlock(); } void acl_test_setup_sample_default_board_system(void) { - acl_lock(); + acl_mutex_wrapper.lock(); acl_set_hal(acl_test_get_simple_hal()); acl_init(&acl_test_example_binary_sysdef); - acl_unlock(); + acl_mutex_wrapper.unlock(); } void acl_test_teardown_sample_default_board_system(void) { @@ -117,11 +117,11 @@ void acl_test_teardown_sample_default_board_system(void) { void acl_test_teardown_generic_system(void) { acl_test_teardown_system(); } void acl_test_teardown_system(void) { - acl_lock(); + acl_mutex_wrapper.lock(); acl_reset(); acl_reset_hal(); acltest_hal_teardown(); - acl_unlock(); + acl_mutex_wrapper.unlock(); } void acl_hal_test_setup_generic_system(void) { return; }; @@ -482,8 +482,8 @@ static void l_run_benchmark() { times = &results["acl_lock/acl_unlock"]; for (int inner_rep = 0; inner_rep < INNER_REPS; ++inner_rep) { start_time = l_get_timestamp(); - acl_lock(); - acl_unlock(); + acl_mutex_wrapper.lock(); + acl_mutex_wrapper.unlock(); end_time = l_get_timestamp(); times->push_back(end_time - start_time); } @@ -491,11 +491,11 @@ static void l_run_benchmark() { std::cout << "Measuring acl_assert_locked..." << std::endl; times = &results["acl_assert_locked"]; for (int inner_rep = 0; inner_rep < INNER_REPS; ++inner_rep) { - acl_lock(); + acl_mutex_wrapper.lock(); start_time = l_get_timestamp(); acl_assert_locked(); end_time = l_get_timestamp(); - acl_unlock(); + acl_mutex_wrapper.unlock(); times->push_back(end_time - start_time); } diff --git a/test/acl_test.h b/test/acl_test.h index a7943d52..97897b01 100644 --- a/test/acl_test.h +++ b/test/acl_test.h @@ -55,9 +55,8 @@ void CL_CALLBACK acl_test_notify_print(const char *errinfo, #define ACL_LOCKED(...) \ do { \ - acl_lock(); \ + std::scoped_lock lock{acl_mutex_wrapper}; \ { __VA_ARGS__; } \ - acl_unlock(); \ } while (0) /* CAUTION. These are only used in self-tests.