diff --git a/include/acl_thread.h b/include/acl_thread.h
index 753adf47..64a3f8b3 100644
--- a/include/acl_thread.h
+++ b/include/acl_thread.h
@@ -12,6 +12,10 @@
 #include <stdarg.h>
 #include <stdio.h>
 
+// System headers.
+#include <mutex>
+#include <thread>
+
 #if defined(__cplusplus)
 extern "C" {
 #endif
@@ -23,10 +27,15 @@ extern "C" {
 #define ACL_TLS __declspec(thread)
 #endif
 
+// Foward declaration
+class acl_mutex_wrapper_t;
+
 extern ACL_TLS int acl_global_lock_count;
 extern ACL_TLS int acl_inside_sig_flag;
 extern ACL_TLS int acl_inside_sig_old_lock_count;
 
+extern acl_mutex_wrapper_t acl_mutex_wrapper;
+
 // -- signal handler functions --
 // When we enter a signal handler, we save "acl_global_lock_count" to
 // "acl_inside_sig_old_lock_count" temporarily. This is because the signal
@@ -75,10 +84,6 @@ static inline void acl_sig_unblock_signals() {
 
 // -- global lock functions --
 
-void acl_lock();
-void acl_unlock();
-int acl_suspend_lock();
-void acl_resume_lock(int lock_count);
 void acl_wait_for_device_update(cl_context context);
 void acl_signal_device_update();
 
@@ -105,4 +110,31 @@ void acl_yield_lock_and_thread();
 } /* extern "C" */
 #endif
 
+// -- RAII wrapper classes --
+
+// To follow RAII, provide a mutex class acl_mutex_wrapper_t which may be used
+// with std::scoped_lock and std::unique_lock. Note that std::scoped_lock may
+// only be constructed with a single instance of acl_mutex_wrapper_t since the
+// latter only implements BasicLockable but not Lockable, due to a lack of
+// try_lock() functionality in acl_threadsupport.
+class acl_mutex_wrapper_t {
+public:
+  void lock();
+  void unlock();
+  int suspend_lock();
+  void resume_lock(int lock_count);
+};
+
+class acl_suspend_lock_guard {
+public:
+  explicit acl_suspend_lock_guard(acl_mutex_wrapper_t &mutex) : mutex(mutex) {
+    lock_count = mutex.suspend_lock();
+  };
+  ~acl_suspend_lock_guard() { mutex.resume_lock(lock_count); }
+
+private:
+  int lock_count;
+  acl_mutex_wrapper_t &mutex;
+};
+
 #endif // ACL_THREAD_H
diff --git a/include/acl_util.h b/include/acl_util.h
index 950716e1..d6af3a9c 100644
--- a/include/acl_util.h
+++ b/include/acl_util.h
@@ -116,17 +116,6 @@ void acl_dump_mem(cl_mem mem);
 #endif
 /////////////////////
 
-#define UNLOCK_RETURN(ret)                                                     \
-  do {                                                                         \
-    acl_unlock();                                                              \
-    return (ret);                                                              \
-  } while (0)
-#define UNLOCK_RETURN_VOID                                                     \
-  do {                                                                         \
-    acl_unlock();                                                              \
-    return;                                                                    \
-  } while (0)
-
 // This macro is used to signal failure from a function via "errcode_ret"
 // and return 0.
 #define BAIL(STATUS)                                                           \
@@ -136,13 +125,6 @@ void acl_dump_mem(cl_mem mem);
     }                                                                          \
     return 0;                                                                  \
   } while (0)
-#define UNLOCK_BAIL(STATUS)                                                    \
-  do {                                                                         \
-    if (errcode_ret) {                                                         \
-      *errcode_ret = (STATUS);                                                 \
-    }                                                                          \
-    UNLOCK_RETURN(0);                                                          \
-  } while (0)
 
 // This is used to callback for a context error, assuming C is an
 // initialized context.
@@ -151,42 +133,31 @@ void acl_dump_mem(cl_mem mem);
     acl_context_callback(C, STR);                                              \
     BAIL(STATUS);                                                              \
   } while (0)
-#define UNLOCK_BAIL_INFO(STATUS, C, STR)                                       \
-  do {                                                                         \
-    acl_context_callback(C, STR);                                              \
-    UNLOCK_BAIL(STATUS);                                                       \
-  } while (0)
 
 #define ERR_RET(STATUS, C, STR)                                                \
   do {                                                                         \
     acl_context_callback(C, STR);                                              \
     return STATUS;                                                             \
   } while (0)
-#define UNLOCK_ERR_RET(STATUS, C, STR)                                         \
-  do {                                                                         \
-    acl_context_callback(C, STR);                                              \
-    UNLOCK_RETURN(STATUS);                                                     \
-  } while (0)
 
 // Caller only partly specified the buffer?
 // Caller isn't asking for any info at all?
-#define UNLOCK_VALIDATE_ARRAY_OUT_ARGS(buf_size, buf, answer_size_out,         \
-                                       context)                                \
+#define VALIDATE_ARRAY_OUT_ARGS(buf_size, buf, answer_size_out, context)       \
   do {                                                                         \
     if (buf && buf_size <= 0) {                                                \
       acl_context_callback(context,                                            \
                            #buf " is specified but " #buf_size " is zero");    \
-      UNLOCK_RETURN(CL_INVALID_VALUE);                                         \
+      return CL_INVALID_VALUE;                                                 \
     }                                                                          \
     if (buf == 0 && buf_size > 0) {                                            \
       acl_context_callback(context, #buf " is not specified but " #buf_size    \
                                          " is positive");                      \
-      UNLOCK_RETURN(CL_INVALID_VALUE);                                         \
+      return CL_INVALID_VALUE;                                                 \
     }                                                                          \
     if (answer_size_out == 0 && buf == 0) {                                    \
       acl_context_callback(context,                                            \
                            #buf " and " #answer_size_out " are both zero");    \
-      UNLOCK_RETURN(CL_INVALID_VALUE);                                         \
+      return CL_INVALID_VALUE;                                                 \
     }                                                                          \
   } while (0)
 
diff --git a/src/acl_command.cpp b/src/acl_command.cpp
index 8abf3575..6e71fc45 100644
--- a/src/acl_command.cpp
+++ b/src/acl_command.cpp
@@ -38,22 +38,22 @@
 ACL_EXPORT
 CL_API_ENTRY cl_int CL_API_CALL
 clEnqueueBarrierIntelFPGA(cl_command_queue command_queue) {
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
 
   if (!acl_command_queue_is_valid(command_queue)) {
-    UNLOCK_RETURN(CL_INVALID_COMMAND_QUEUE);
+    return CL_INVALID_COMMAND_QUEUE;
   }
 
   // For in order queue, since every event is executed in sequence,
   // there is an implicit barrier after each event.
   // enqueue barrier does not need to do anything
   if (!(command_queue->properties & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE)) {
-    UNLOCK_RETURN(CL_SUCCESS);
+    return CL_SUCCESS;
   }
   // OpenCL 1.2 spec: If event_wait_list is NULL, then this particular command
   // waits until all previous enqueued commands to command_queue have completed.
   cl_int status = clEnqueueBarrierWithWaitList(command_queue, 0, 0, NULL);
-  UNLOCK_RETURN(status);
+  return status;
 }
 
 ACL_EXPORT
@@ -66,18 +66,18 @@ ACL_EXPORT
 CL_API_ENTRY cl_int CL_API_CALL
 clEnqueueMarkerIntelFPGA(cl_command_queue command_queue, cl_event *event) {
   cl_int result;
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
 
   if (!acl_command_queue_is_valid(command_queue)) {
-    UNLOCK_RETURN(CL_INVALID_COMMAND_QUEUE);
+    return CL_INVALID_COMMAND_QUEUE;
   }
 
   if (!event)
-    UNLOCK_RETURN(CL_INVALID_VALUE);
+    return CL_INVALID_VALUE;
 
   result = acl_create_event(command_queue, 0, 0, CL_COMMAND_MARKER, event);
 
-  UNLOCK_RETURN(result);
+  return result;
 }
 
 ACL_EXPORT
@@ -91,13 +91,13 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueWaitForEventsIntelFPGA(
     cl_command_queue command_queue, cl_uint num_event, const cl_event *events) {
   cl_int result;
 
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
 
   if (!acl_command_queue_is_valid(command_queue)) {
-    UNLOCK_RETURN(CL_INVALID_COMMAND_QUEUE);
+    return CL_INVALID_COMMAND_QUEUE;
   }
   if (num_event == 0 || events == 0) {
-    UNLOCK_RETURN(CL_INVALID_VALUE);
+    return CL_INVALID_VALUE;
   }
   cl_event event = NULL;
   result = acl_create_event(command_queue, num_event, events,
@@ -110,7 +110,7 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueWaitForEventsIntelFPGA(
     result = CL_INVALID_EVENT;
   }
 
-  UNLOCK_RETURN(result);
+  return result;
 }
 
 ACL_EXPORT
@@ -129,16 +129,16 @@ clWaitForEventsIntelFPGA(cl_uint num_events, const cl_event *event_list) {
   cl_context context;
   bool first_yield_to_hal = true;
 
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
 
   if (num_events == 0 || event_list == 0) {
-    UNLOCK_RETURN(CL_INVALID_VALUE);
+    return CL_INVALID_VALUE;
   }
 
 #ifndef REMOVE_VALID_CHECKS
   result = acl_check_events(num_events, event_list);
   if (result != CL_SUCCESS) {
-    UNLOCK_RETURN(CL_INVALID_EVENT);
+    return CL_INVALID_EVENT;
   }
 #endif
 
@@ -193,12 +193,12 @@ clWaitForEventsIntelFPGA(cl_uint num_events, const cl_event *event_list) {
     cl_uint i = 0;
     for (i = 0; i < num_events; ++i) {
       if (event_list[i]->execution_status < 0)
-        UNLOCK_RETURN(CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST);
+        return CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST;
     }
   }
 #endif
 
-  UNLOCK_RETURN(result);
+  return result;
 }
 
 ACL_EXPORT
@@ -214,10 +214,10 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueMarkerWithWaitListIntelFPGA(
   cl_int result;
   cl_event ret_event = NULL;
 
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
 
   if (!acl_command_queue_is_valid(command_queue)) {
-    UNLOCK_RETURN(CL_INVALID_COMMAND_QUEUE);
+    return CL_INVALID_COMMAND_QUEUE;
   }
 
   // Spec says:
@@ -248,7 +248,7 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueMarkerWithWaitListIntelFPGA(
   if (ret_event)
     clReleaseEvent(ret_event); // free the ret event if the caller doesn't want
                                // to return it
-  UNLOCK_RETURN(result);
+  return result;
 }
 
 ACL_EXPORT
@@ -265,12 +265,12 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueBarrierWithWaitListIntelFPGA(
     const cl_event *event_wait_list, cl_event *event) {
   cl_int result;
   cl_event local_event;
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
 
   result = clEnqueueMarkerWithWaitList(command_queue, num_events_in_wait_list,
                                        event_wait_list, &local_event);
   if (result != CL_SUCCESS) {
-    UNLOCK_RETURN(result);
+    return result;
   }
 
   if (command_queue->properties & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) {
@@ -282,7 +282,7 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueBarrierWithWaitListIntelFPGA(
   } else {
     clReleaseEvent(local_event);
   }
-  UNLOCK_RETURN(result);
+  return result;
 }
 
 ACL_EXPORT
diff --git a/src/acl_command_queue.cpp b/src/acl_command_queue.cpp
index e51b73f6..ed4e9774 100644
--- a/src/acl_command_queue.cpp
+++ b/src/acl_command_queue.cpp
@@ -114,17 +114,17 @@ clCreateCommandQueueWithPropertiesIntelFPGA(
   cl_command_queue result = 0;
   cl_command_queue_properties cq_properties = 0;
   cl_uint q_size_properties = 0, idx = 0;
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
 
   if (!acl_context_is_valid(context)) {
-    UNLOCK_BAIL(CL_INVALID_CONTEXT);
+    BAIL(CL_INVALID_CONTEXT);
   }
   if (!acl_device_is_valid(device)) {
-    UNLOCK_BAIL_INFO(CL_INVALID_DEVICE, context, "Invalid device");
+    BAIL_INFO(CL_INVALID_DEVICE, context, "Invalid device");
   }
   if (!acl_context_uses_device(context, device)) {
-    UNLOCK_BAIL_INFO(CL_INVALID_DEVICE, context,
-                     "Device is not associated with the context");
+    BAIL_INFO(CL_INVALID_DEVICE, context,
+              "Device is not associated with the context");
   }
 
   // Get the properties. Only two possible properties: CL_QUEUE_PROPERTIES and
@@ -138,9 +138,9 @@ clCreateCommandQueueWithPropertiesIntelFPGA(
       if (q_size_properties == 0)
         q_size_properties = (cl_uint)properties[idx + 1];
       else // This property was already given.
-        UNLOCK_BAIL_INFO(CL_INVALID_VALUE, context, "Invalid queue properties");
+        BAIL_INFO(CL_INVALID_VALUE, context, "Invalid queue properties");
     } else {
-      UNLOCK_BAIL_INFO(CL_INVALID_VALUE, context, "Invalid queue properties");
+      BAIL_INFO(CL_INVALID_VALUE, context, "Invalid queue properties");
     }
     idx += 2;
   }
@@ -152,14 +152,14 @@ clCreateCommandQueueWithPropertiesIntelFPGA(
         CL_QUEUE_PROFILING_ENABLE | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE |
         CL_QUEUE_ON_DEVICE | CL_QUEUE_ON_DEVICE_DEFAULT;
     if (cq_properties & ~(valid_properties)) {
-      UNLOCK_BAIL_INFO(CL_INVALID_VALUE, context, "Invalid queue properties");
+      BAIL_INFO(CL_INVALID_VALUE, context, "Invalid queue properties");
     }
     // Also check the dependency of options:
     if (((cq_properties & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) == 0 &&
          (cq_properties & CL_QUEUE_ON_DEVICE)) ||
         ((cq_properties & CL_QUEUE_ON_DEVICE) == 0 &&
          (cq_properties & CL_QUEUE_ON_DEVICE_DEFAULT))) {
-      UNLOCK_BAIL_INFO(CL_INVALID_VALUE, context, "Invalid queue properties");
+      BAIL_INFO(CL_INVALID_VALUE, context, "Invalid queue properties");
     }
   }
   {
@@ -169,13 +169,11 @@ clCreateCommandQueueWithPropertiesIntelFPGA(
     // queried from current version of clGetDeviceInfo. So manually failing on
     // those properties for now.
     if (cq_properties & (CL_QUEUE_ON_DEVICE | CL_QUEUE_ON_DEVICE_DEFAULT))
-      UNLOCK_BAIL_INFO(
-          CL_INVALID_QUEUE_PROPERTIES, context,
-          "Device does not support the specified queue properties");
+      BAIL_INFO(CL_INVALID_QUEUE_PROPERTIES, context,
+                "Device does not support the specified queue properties");
     if (q_size_properties != 0) { // not supported yet.
-      UNLOCK_BAIL_INFO(
-          CL_INVALID_QUEUE_PROPERTIES, context,
-          "Device does not support the specified queue properties");
+      BAIL_INFO(CL_INVALID_QUEUE_PROPERTIES, context,
+                "Device does not support the specified queue properties");
     }
 
     // Internal user may want to turn off support for OOO Queues
@@ -183,9 +181,8 @@ clCreateCommandQueueWithPropertiesIntelFPGA(
         acl_getenv("CL_CONTEXT_DISABLE_OOO_QUEUES_INTELFPGA");
     if (disable_oooq &&
         (cq_properties & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE)) {
-      UNLOCK_BAIL_INFO(
-          CL_INVALID_QUEUE_PROPERTIES, context,
-          "Device does not support the specified queue properties");
+      BAIL_INFO(CL_INVALID_QUEUE_PROPERTIES, context,
+                "Device does not support the specified queue properties");
     }
 
     // What does the device support?
@@ -195,24 +192,23 @@ clCreateCommandQueueWithPropertiesIntelFPGA(
     clGetDeviceInfo(device, CL_DEVICE_QUEUE_PROPERTIES, sizeof(device_props),
                     &device_props, 0);
     if (cq_properties & ~(device_props)) {
-      UNLOCK_BAIL_INFO(
-          CL_INVALID_QUEUE_PROPERTIES, context,
-          "Device does not support the specified queue properties");
+      BAIL_INFO(CL_INVALID_QUEUE_PROPERTIES, context,
+                "Device does not support the specified queue properties");
     }
   }
 
   // Now actually allocate the command queue.
   result = acl_alloc_cl_command_queue();
   if (result == 0) {
-    UNLOCK_BAIL_INFO(CL_OUT_OF_HOST_MEMORY, context,
-                     "Could not allocate a command queue");
+    BAIL_INFO(CL_OUT_OF_HOST_MEMORY, context,
+              "Could not allocate a command queue");
   }
 
   // Fail to double the capacity of the pointer array
   if (!l_init_queue(result, cq_properties, context, device)) {
     acl_free_cl_command_queue(result);
-    UNLOCK_BAIL_INFO(CL_OUT_OF_HOST_MEMORY, context,
-                     "Could not allocate a command queue");
+    BAIL_INFO(CL_OUT_OF_HOST_MEMORY, context,
+              "Could not allocate a command queue");
   }
 
   if (errcode_ret) {
@@ -220,7 +216,7 @@ clCreateCommandQueueWithPropertiesIntelFPGA(
   }
 
   acl_track_object(ACL_OBJ_COMMAND_QUEUE, result);
-  UNLOCK_RETURN(result);
+  return result;
 }
 
 ACL_EXPORT
@@ -255,13 +251,13 @@ CL_API_ENTRY cl_command_queue CL_API_CALL clCreateCommandQueue(
 ACL_EXPORT
 CL_API_ENTRY cl_int CL_API_CALL
 clRetainCommandQueueIntelFPGA(cl_command_queue command_queue) {
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
 
   if (!acl_command_queue_is_valid(command_queue)) {
-    UNLOCK_RETURN(CL_INVALID_COMMAND_QUEUE);
+    return CL_INVALID_COMMAND_QUEUE;
   }
   acl_retain(command_queue);
-  UNLOCK_RETURN(CL_SUCCESS);
+  return CL_SUCCESS;
 }
 
 ACL_EXPORT
@@ -273,10 +269,10 @@ clRetainCommandQueue(cl_command_queue command_queue) {
 ACL_EXPORT
 CL_API_ENTRY cl_int CL_API_CALL
 clReleaseCommandQueueIntelFPGA(cl_command_queue command_queue) {
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
 
   if (!acl_command_queue_is_valid(command_queue)) {
-    UNLOCK_RETURN(CL_INVALID_COMMAND_QUEUE);
+    return CL_INVALID_COMMAND_QUEUE;
   }
 
   acl_release(command_queue);
@@ -288,7 +284,7 @@ clReleaseCommandQueueIntelFPGA(cl_command_queue command_queue) {
     acl_delete_command_queue(command_queue);
   }
 
-  UNLOCK_RETURN(CL_SUCCESS);
+  return CL_SUCCESS;
 }
 
 ACL_EXPORT
@@ -302,10 +298,10 @@ CL_API_ENTRY cl_int CL_API_CALL clGetCommandQueueInfoIntelFPGA(
     cl_command_queue command_queue, cl_command_queue_info param_name,
     size_t param_value_size, void *param_value, size_t *param_value_size_ret) {
   acl_result_t result;
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
 
   if (!acl_command_queue_is_valid(command_queue)) {
-    UNLOCK_RETURN(CL_INVALID_COMMAND_QUEUE);
+    return CL_INVALID_COMMAND_QUEUE;
   }
 
   RESULT_INIT;
@@ -328,14 +324,14 @@ CL_API_ENTRY cl_int CL_API_CALL clGetCommandQueueInfoIntelFPGA(
   }
 
   if (result.size == 0) {
-    UNLOCK_ERR_RET(CL_INVALID_VALUE, command_queue->context,
-                   "Invalid or unsupported command queue property");
+    ERR_RET(CL_INVALID_VALUE, command_queue->context,
+            "Invalid or unsupported command queue property");
   }
 
   if (param_value) {
     if (param_value_size < result.size) {
-      UNLOCK_ERR_RET(CL_INVALID_VALUE, command_queue->context,
-                     "Parameter return buffer is too small");
+      ERR_RET(CL_INVALID_VALUE, command_queue->context,
+              "Parameter return buffer is too small");
     }
     RESULT_COPY(param_value, param_value_size);
   }
@@ -343,7 +339,7 @@ CL_API_ENTRY cl_int CL_API_CALL clGetCommandQueueInfoIntelFPGA(
   if (param_value_size_ret) {
     *param_value_size_ret = result.size;
   }
-  UNLOCK_RETURN(CL_SUCCESS);
+  return CL_SUCCESS;
 }
 
 ACL_EXPORT
@@ -360,23 +356,23 @@ CL_API_ENTRY cl_int CL_API_CALL clSetCommandQueuePropertyIntelFPGA(
     cl_command_queue command_queue, cl_command_queue_properties properties,
     cl_bool enable, cl_command_queue_properties *old_properties) {
   cl_command_queue_properties bad_properties;
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
 
   bad_properties =
       ~((cl_command_queue_properties)CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE |
         (cl_command_queue_properties)CL_QUEUE_PROFILING_ENABLE);
 
   if (!acl_command_queue_is_valid(command_queue)) {
-    UNLOCK_RETURN(CL_INVALID_COMMAND_QUEUE);
+    return CL_INVALID_COMMAND_QUEUE;
   }
 
   // Internal user may want to turn off support for OOO Queues
   const char *disable_oooq =
       acl_getenv("CL_CONTEXT_DISABLE_OOO_QUEUES_INTELFPGA");
   if (disable_oooq && (properties & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE)) {
-    UNLOCK_ERR_RET(CL_INVALID_QUEUE_PROPERTIES, command_queue->context,
-                   "Can't set CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE property, "
-                   "unsupported");
+    ERR_RET(CL_INVALID_QUEUE_PROPERTIES, command_queue->context,
+            "Can't set CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE property, "
+            "unsupported");
   }
 
   if (old_properties) {
@@ -384,8 +380,8 @@ CL_API_ENTRY cl_int CL_API_CALL clSetCommandQueuePropertyIntelFPGA(
   }
 
   if (properties & bad_properties) {
-    UNLOCK_ERR_RET(CL_INVALID_VALUE, command_queue->context,
-                   "Invalid or unsupported command queue property");
+    ERR_RET(CL_INVALID_VALUE, command_queue->context,
+            "Invalid or unsupported command queue property");
   }
 
   if (enable) {
@@ -397,7 +393,7 @@ CL_API_ENTRY cl_int CL_API_CALL clSetCommandQueuePropertyIntelFPGA(
   // No queue synchronization is required because we don't support
   // out-of-order execution.
 
-  UNLOCK_RETURN(CL_SUCCESS);
+  return CL_SUCCESS;
 }
 
 ACL_EXPORT
@@ -415,10 +411,10 @@ CL_API_ENTRY cl_int CL_API_CALL
 clFlushIntelFPGA(cl_command_queue command_queue) {
   bool any_queued = false;
   const acl_hal_t *hal = acl_get_hal();
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
 
   if (!acl_command_queue_is_valid(command_queue)) {
-    UNLOCK_RETURN(CL_INVALID_COMMAND_QUEUE);
+    return CL_INVALID_COMMAND_QUEUE;
   }
 
   // Context is valid too.  Force a schedule update.
@@ -428,7 +424,7 @@ clFlushIntelFPGA(cl_command_queue command_queue) {
     any_queued = 0;
     acl_idle_update(context);
     if (command_queue->num_commands == 0) {
-      UNLOCK_RETURN(CL_SUCCESS);
+      return CL_SUCCESS;
     }
 
     // Find if at least one event is not SUBMITTED
@@ -454,7 +450,7 @@ clFlushIntelFPGA(cl_command_queue command_queue) {
 
   } while (any_queued);
 
-  UNLOCK_RETURN(CL_SUCCESS);
+  return CL_SUCCESS;
 }
 
 ACL_EXPORT
@@ -467,10 +463,10 @@ CL_API_ENTRY cl_int CL_API_CALL
 clFinishIntelFPGA(cl_command_queue command_queue) {
   cl_event event = 0;
   cl_int result;
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
 
   if (!acl_command_queue_is_valid(command_queue)) {
-    UNLOCK_RETURN(CL_INVALID_COMMAND_QUEUE);
+    return CL_INVALID_COMMAND_QUEUE;
   }
 
   // Spec says:
@@ -483,7 +479,7 @@ clFinishIntelFPGA(cl_command_queue command_queue) {
     result = clWaitForEvents(1, &event);
     clReleaseEvent(event);
   }
-  UNLOCK_RETURN(result);
+  return result;
 }
 
 ACL_EXPORT
diff --git a/src/acl_context.cpp b/src/acl_context.cpp
index 405a8b2f..4e1001d3 100644
--- a/src/acl_context.cpp
+++ b/src/acl_context.cpp
@@ -83,24 +83,24 @@ CL_API_ENTRY cl_context CL_API_CALL clCreateContextIntelFPGA(
     cl_int *errcode_ret) {
   cl_context context;
   cl_int status;
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
 
   context = l_create_context(properties, pfn_notify, user_data, &status);
   if (context == NULL || status != CL_SUCCESS) {
     acl_free_cl_context(context);
-    UNLOCK_BAIL(status);
+    BAIL(status);
   }
 
   // Now check the devices.
   if (num_devices == 0) {
     acl_context_callback(context, "No devices specified");
     acl_free_cl_context(context);
-    UNLOCK_BAIL(CL_INVALID_VALUE);
+    BAIL(CL_INVALID_VALUE);
   }
   if (devices == 0) {
     acl_context_callback(context, "No device array specified");
     acl_free_cl_context(context);
-    UNLOCK_BAIL(CL_INVALID_VALUE);
+    BAIL(CL_INVALID_VALUE);
   }
 
   // Make sure all mentioned devices are valid.
@@ -108,7 +108,7 @@ CL_API_ENTRY cl_context CL_API_CALL clCreateContextIntelFPGA(
     if (!acl_device_is_valid_ptr(devices[i])) {
       acl_context_callback(context, "Invalid device specified");
       acl_free_cl_context(context);
-      UNLOCK_BAIL(CL_INVALID_DEVICE);
+      BAIL(CL_INVALID_DEVICE);
     }
 
     if (devices[i]->opened_count) {
@@ -118,7 +118,7 @@ CL_API_ENTRY cl_context CL_API_CALL clCreateContextIntelFPGA(
                      "device in the device list is currently in use in another "
                      "context created with reprogramming disabled.");
         acl_free_cl_context(context);
-        UNLOCK_BAIL(CL_INVALID_VALUE);
+        BAIL(CL_INVALID_VALUE);
       } else if (!context->uses_dynamic_sysdef &&
                  devices[i]->mode_lock == BINARY) {
         acl_context_callback(
@@ -126,7 +126,7 @@ CL_API_ENTRY cl_context CL_API_CALL clCreateContextIntelFPGA(
                      "device in the device list is currently in use in another "
                      "context created with reprogramming enabled.");
         acl_free_cl_context(context);
-        UNLOCK_BAIL(CL_INVALID_VALUE);
+        BAIL(CL_INVALID_VALUE);
       }
     } else {
       // Since this is the first time creating a context for this device, we
@@ -142,7 +142,7 @@ CL_API_ENTRY cl_context CL_API_CALL clCreateContextIntelFPGA(
   status = l_finalize_context(context, num_devices, devices);
 
   if (status != CL_SUCCESS) {
-    UNLOCK_BAIL(status);
+    BAIL(status);
   }
 
   // Open the profiler output file after the first context creation
@@ -153,7 +153,7 @@ CL_API_ENTRY cl_context CL_API_CALL clCreateContextIntelFPGA(
   }
   // the context is created successfully, add it to the set
   acl_platform.contexts_set.insert(context);
-  UNLOCK_RETURN(context);
+  return context;
 }
 
 ACL_EXPORT
@@ -172,12 +172,12 @@ CL_API_ENTRY cl_context CL_API_CALL clCreateContextFromTypeIntelFPGA(
   cl_uint num_devices = 0;
   cl_int status;
   cl_device_id devices[ACL_MAX_DEVICE];
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
 
   context = l_create_context(properties, pfn_notify, user_data, &status);
   if (context == NULL || status != CL_SUCCESS) {
     acl_free_cl_context(context);
-    UNLOCK_BAIL(status);
+    BAIL(status);
   }
 
   // Determine device IDs.
@@ -186,7 +186,7 @@ CL_API_ENTRY cl_context CL_API_CALL clCreateContextFromTypeIntelFPGA(
   if (status != CL_SUCCESS || num_devices == 0) {
     acl_context_callback(context, "Device not found");
     acl_free_cl_context(context);
-    UNLOCK_BAIL(CL_DEVICE_NOT_FOUND);
+    BAIL(CL_DEVICE_NOT_FOUND);
   }
 
   // Filter out devices.
@@ -220,21 +220,21 @@ CL_API_ENTRY cl_context CL_API_CALL clCreateContextFromTypeIntelFPGA(
                    "devices of the given device type are currently in use in "
                    "other contexts created with reprogramming disabled.");
       acl_free_cl_context(context);
-      UNLOCK_BAIL(CL_DEVICE_NOT_AVAILABLE);
+      BAIL(CL_DEVICE_NOT_AVAILABLE);
     } else {
       acl_context_callback(
           context, "Could not create context with reprogramming disabled. All "
                    "devices of the given device type are currently in use in "
                    "other contexts created with reprogramming enabled.");
       acl_free_cl_context(context);
-      UNLOCK_BAIL(CL_DEVICE_NOT_AVAILABLE);
+      BAIL(CL_DEVICE_NOT_AVAILABLE);
     }
   }
 
   status = l_finalize_context(context, num_devices, devices);
 
   if (status != CL_SUCCESS) {
-    UNLOCK_BAIL(status);
+    BAIL(status);
   }
 
   // Open the profiler output file after the first context creation
@@ -243,7 +243,7 @@ CL_API_ENTRY cl_context CL_API_CALL clCreateContextFromTypeIntelFPGA(
   if (errcode_ret) {
     *errcode_ret = CL_SUCCESS;
   }
-  UNLOCK_RETURN(context);
+  return context;
 }
 
 ACL_EXPORT
@@ -256,7 +256,7 @@ CL_API_ENTRY cl_context CL_API_CALL clCreateContextFromType(
 
 ACL_EXPORT
 CL_API_ENTRY cl_int CL_API_CALL clRetainContextIntelFPGA(cl_context context) {
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
 
   // Note: Context creation uses acl_retain<> directly, but users must use
   // clRetainContext.
@@ -264,10 +264,10 @@ CL_API_ENTRY cl_int CL_API_CALL clRetainContextIntelFPGA(cl_context context) {
   // That's why we use acl_context_is_valid() here instead of just
   // acl_is_valid_ptr().
   if (!acl_context_is_valid(context)) {
-    UNLOCK_RETURN(CL_INVALID_CONTEXT);
+    return CL_INVALID_CONTEXT;
   }
   acl_retain(context);
-  UNLOCK_RETURN(CL_SUCCESS);
+  return CL_SUCCESS;
 }
 
 ACL_EXPORT
@@ -277,11 +277,11 @@ CL_API_ENTRY cl_int CL_API_CALL clRetainContext(cl_context context) {
 
 ACL_EXPORT
 CL_API_ENTRY cl_int CL_API_CALL clReleaseContextIntelFPGA(cl_context context) {
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
 
   // Error out if the reference count is already 0
   if (!acl_context_is_valid(context)) {
-    UNLOCK_RETURN(CL_INVALID_CONTEXT);
+    return CL_INVALID_CONTEXT;
   }
 
   // Must mirror what is retained in clRetainContext.
@@ -303,7 +303,7 @@ CL_API_ENTRY cl_int CL_API_CALL clReleaseContextIntelFPGA(cl_context context) {
     // recursively trying to delete them again.
     if (context->is_being_freed) {
       acl_release(context);
-      UNLOCK_RETURN(CL_SUCCESS);
+      return CL_SUCCESS;
     }
     context->is_being_freed = 1;
 
@@ -358,7 +358,7 @@ CL_API_ENTRY cl_int CL_API_CALL clReleaseContextIntelFPGA(cl_context context) {
     acl_close_profiler_file();
   }
 
-  UNLOCK_RETURN(CL_SUCCESS);
+  return CL_SUCCESS;
 }
 
 ACL_EXPORT
@@ -371,13 +371,13 @@ CL_API_ENTRY cl_int CL_API_CALL clGetContextInfoIntelFPGA(
     cl_context context, cl_context_info param_name, size_t param_value_size,
     void *param_value, size_t *param_value_size_ret) {
   acl_result_t result;
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
 
   if (!acl_context_is_valid(context)) {
-    UNLOCK_RETURN(CL_INVALID_CONTEXT);
+    return CL_INVALID_CONTEXT;
   }
-  UNLOCK_VALIDATE_ARRAY_OUT_ARGS(param_value_size, param_value,
-                                 param_value_size_ret, context);
+  VALIDATE_ARRAY_OUT_ARGS(param_value_size, param_value, param_value_size_ret,
+                          context);
 
   RESULT_INIT;
 
@@ -403,14 +403,14 @@ CL_API_ENTRY cl_int CL_API_CALL clGetContextInfoIntelFPGA(
                context->num_property_entries * sizeof(cl_context_properties));
     break;
   default:
-    UNLOCK_ERR_RET(CL_INVALID_VALUE, context,
-                   "Invalid or unsupported context info query");
+    ERR_RET(CL_INVALID_VALUE, context,
+            "Invalid or unsupported context info query");
   }
 
   if (param_value) {
     if (param_value_size < result.size) {
-      UNLOCK_ERR_RET(CL_INVALID_VALUE, context,
-                     "Parameter return buffer is too small");
+      ERR_RET(CL_INVALID_VALUE, context,
+              "Parameter return buffer is too small");
     }
     RESULT_COPY(param_value, param_value_size);
   }
@@ -418,7 +418,7 @@ CL_API_ENTRY cl_int CL_API_CALL clGetContextInfoIntelFPGA(
   if (param_value_size_ret) {
     *param_value_size_ret = result.size;
   }
-  UNLOCK_RETURN(CL_SUCCESS);
+  return CL_SUCCESS;
 }
 
 ACL_EXPORT
@@ -464,10 +464,10 @@ static cl_context l_create_context(const cl_context_properties *properties,
   cl_context context = 0;
   cl_int status;
 
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
 
   if (user_data && !pfn_notify) {
-    UNLOCK_BAIL(CL_INVALID_VALUE);
+    BAIL(CL_INVALID_VALUE);
   }
 
   {
@@ -478,12 +478,13 @@ static cl_context l_create_context(const cl_context_properties *properties,
     if (!allow_mp && platform_owner_pid != 0 &&
         platform_owner_pid != acl_get_pid()) {
       if (pfn_notify) {
-        int lock_count = acl_suspend_lock();
-        (pfn_notify)("Cannot create contexts in more than one process", 0, 0,
-                     user_data);
-        acl_resume_lock(lock_count);
+        {
+          acl_suspend_lock_guard l(acl_mutex_wrapper);
+          (pfn_notify)("Cannot create contexts in more than one process", 0, 0,
+                       user_data);
+        }
       }
-      UNLOCK_BAIL(CL_OUT_OF_RESOURCES);
+      BAIL(CL_OUT_OF_RESOURCES);
     }
   }
 
@@ -491,11 +492,12 @@ static cl_context l_create_context(const cl_context_properties *properties,
   context = acl_alloc_cl_context();
   if (context == 0) {
     if (pfn_notify) {
-      int lock_count = acl_suspend_lock();
-      (pfn_notify)("Could not allocate a context object", 0, 0, user_data);
-      acl_resume_lock(lock_count);
+      {
+        acl_suspend_lock_guard l(acl_mutex_wrapper);
+        (pfn_notify)("Could not allocate a context object", 0, 0, user_data);
+      }
     }
-    UNLOCK_BAIL(CL_OUT_OF_HOST_MEMORY);
+    BAIL(CL_OUT_OF_HOST_MEMORY);
   }
 
   context->notify_fn = pfn_notify;
@@ -505,26 +507,26 @@ static cl_context l_create_context(const cl_context_properties *properties,
   status = l_load_properties(context, properties);
   if (status != CL_SUCCESS) {
     acl_free_cl_context(context);
-    UNLOCK_BAIL(status);
+    BAIL(status);
   } // already called context error callback
 
   if (errcode_ret) {
     *errcode_ret = CL_SUCCESS;
   }
 
-  UNLOCK_RETURN(context);
+  return context;
 }
 
 static cl_int l_finalize_context(cl_context context, cl_uint num_devices,
                                  const cl_device_id *devices) {
   cl_int status;
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
 
   status = acl_get_hal()->try_devices(num_devices, devices, &acl_platform);
   if (status) {
     acl_context_callback(context, "Could not open devices");
     acl_free_cl_context(context);
-    UNLOCK_RETURN(status);
+    return status;
   }
 
   acl_retain(context);
@@ -533,12 +535,12 @@ static cl_int l_finalize_context(cl_context context, cl_uint num_devices,
   if (status != CL_SUCCESS) {
     l_forcibly_release_allocations(context);
     acl_free_cl_context(context);
-    UNLOCK_RETURN(status); // already signaled callback
+    return status; // already signaled callback
   }
 
   acl_track_object(ACL_OBJ_CONTEXT, context);
 
-  UNLOCK_RETURN(CL_SUCCESS);
+  return CL_SUCCESS;
 }
 
 // Analyze and load the context properties.
@@ -1119,10 +1121,11 @@ void acl_update_context(cl_context context) {
                  ++i) {
               CL_EXCEPTION_TYPE_INTEL exception_type = 1ULL << i;
               if (device->device_exception_status & exception_type) {
-                int lock_count = acl_suspend_lock();
-                notify_fn(exception_type, device->exception_private_info[i],
-                          device->exception_cb[i], notify_user_data);
-                acl_resume_lock(lock_count);
+                {
+                  acl_suspend_lock_guard l(acl_mutex_wrapper);
+                  notify_fn(exception_type, device->exception_private_info[i],
+                            device->exception_cb[i], notify_user_data);
+                }
               }
             }
 
@@ -1309,10 +1312,10 @@ void acl_context_callback(cl_context context, const std::string errinfo) {
   if (context && context->notify_fn) {
     acl_notify_fn_t notify_fn = context->notify_fn;
     void *notify_user_data = context->notify_user_data;
-
-    int lock_count = acl_suspend_lock();
-    notify_fn(errinfo.c_str(), 0, 0, notify_user_data);
-    acl_resume_lock(lock_count);
+    {
+      acl_suspend_lock_guard l(acl_mutex_wrapper);
+      notify_fn(errinfo.c_str(), 0, 0, notify_user_data);
+    }
   }
 }
 
diff --git a/src/acl_device.cpp b/src/acl_device.cpp
index 83151016..84a5e63b 100644
--- a/src/acl_device.cpp
+++ b/src/acl_device.cpp
@@ -41,12 +41,12 @@ CL_API_ENTRY cl_int CL_API_CALL clGetDeviceIDsIntelFPGA(
     cl_device_id *devices, cl_uint *num_devices) {
   cl_int status = CL_SUCCESS;
   cl_uint num_matched = 0;
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
 
   if (!acl_platform_is_valid(platform)) {
-    UNLOCK_RETURN(CL_INVALID_PLATFORM);
+    return CL_INVALID_PLATFORM;
   }
-  UNLOCK_VALIDATE_ARRAY_OUT_ARGS(num_entries, devices, num_devices, 0);
+  VALIDATE_ARRAY_OUT_ARGS(num_entries, devices, num_devices, 0);
 
   switch (device_type) {
   case CL_DEVICE_TYPE_CPU:
@@ -70,7 +70,7 @@ CL_API_ENTRY cl_int CL_API_CALL clGetDeviceIDsIntelFPGA(
   } break;
 
   default:
-    UNLOCK_RETURN(CL_INVALID_DEVICE_TYPE);
+    return CL_INVALID_DEVICE_TYPE;
     break;
   }
 
@@ -81,7 +81,7 @@ CL_API_ENTRY cl_int CL_API_CALL clGetDeviceIDsIntelFPGA(
     *num_devices = num_matched;
   }
 
-  UNLOCK_RETURN(status);
+  return status;
 }
 
 ACL_EXPORT
@@ -101,14 +101,14 @@ CL_API_ENTRY cl_int CL_API_CALL clGetDeviceInfoIntelFPGA(
   char name_buf[MAX_NAME_SIZE];
   acl_result_t result;
   cl_context context = 0;
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
 
 #ifndef REMOVE_VALID_CHECKS
   if (!acl_device_is_valid_ptr(device)) {
-    UNLOCK_RETURN(CL_INVALID_DEVICE);
+    return CL_INVALID_DEVICE;
   }
-  UNLOCK_VALIDATE_ARRAY_OUT_ARGS(param_value_size, param_value,
-                                 param_value_size_ret, 0);
+  VALIDATE_ARRAY_OUT_ARGS(param_value_size, param_value, param_value_size_ret,
+                          0);
 #endif
 
   RESULT_INIT;
@@ -130,7 +130,7 @@ CL_API_ENTRY cl_int CL_API_CALL clGetDeviceInfoIntelFPGA(
     case CL_DEVICE_VENDOR:
       context = clCreateContext(0, 1, &device, NULL, NULL, &status);
       if (status != CL_SUCCESS) {
-        UNLOCK_RETURN(status);
+        return status;
       }
       break;
     }
@@ -159,7 +159,7 @@ CL_API_ENTRY cl_int CL_API_CALL clGetDeviceInfoIntelFPGA(
         if (param_name == CL_DEVICE_AVAILABLE) { // special case
           RESULT_BOOL(0);                        // it must not be available
         } else {
-          UNLOCK_RETURN(status);
+          return status;
         }
       } else if (param_name == CL_DEVICE_AVAILABLE) {
         RESULT_BOOL(
@@ -568,14 +568,14 @@ CL_API_ENTRY cl_int CL_API_CALL clGetDeviceInfoIntelFPGA(
 
   if (result.size == 0) {
     // We didn't implement the enum. Error out semi-gracefully.
-    UNLOCK_RETURN(CL_INVALID_VALUE);
+    return CL_INVALID_VALUE;
   }
 
   if (param_value) {
     // Actually try to return the string.
     if (param_value_size < result.size) {
       // Buffer is too small to hold the return value.
-      UNLOCK_RETURN(CL_INVALID_VALUE);
+      return CL_INVALID_VALUE;
     }
     RESULT_COPY(param_value, param_value_size);
   }
@@ -583,7 +583,7 @@ CL_API_ENTRY cl_int CL_API_CALL clGetDeviceInfoIntelFPGA(
   if (param_value_size_ret) {
     *param_value_size_ret = result.size;
   }
-  UNLOCK_RETURN(CL_SUCCESS);
+  return CL_SUCCESS;
 }
 
 ACL_EXPORT
@@ -613,7 +613,7 @@ CL_API_ENTRY cl_int CL_API_CALL clCreateSubDevicesIntelFPGA(
   // Since we don't support creating sub devices, we should follow the first
   // case if in_device is not valid, and the second case if it is.
 
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
   // Suppress compiler warnings.
   partition_properties = partition_properties;
   num_entries = num_entries;
@@ -621,10 +621,10 @@ CL_API_ENTRY cl_int CL_API_CALL clCreateSubDevicesIntelFPGA(
   num_devices = num_devices;
 
   if (!acl_device_is_valid(in_device)) {
-    UNLOCK_RETURN(CL_INVALID_DEVICE);
+    return CL_INVALID_DEVICE;
   }
 
-  UNLOCK_RETURN(CL_INVALID_VALUE);
+  return CL_INVALID_VALUE;
 }
 
 ACL_EXPORT
@@ -638,7 +638,7 @@ CL_API_ENTRY cl_int CL_API_CALL clCreateSubDevices(
 
 ACL_EXPORT
 CL_API_ENTRY cl_int CL_API_CALL clRetainDeviceIntelFPGA(cl_device_id device) {
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
 
   // Spec says:
   // "increments the device reference count if device is a valid sub-device
@@ -664,9 +664,9 @@ CL_API_ENTRY cl_int CL_API_CALL clRetainDeviceIntelFPGA(cl_device_id device) {
   // Since we don't (currently) support sub-devices, valid devices must be
   // root-level:
   if (acl_device_is_valid(device)) {
-    UNLOCK_RETURN(CL_SUCCESS);
+    return CL_SUCCESS;
   } else {
-    UNLOCK_RETURN(CL_INVALID_DEVICE);
+    return CL_INVALID_DEVICE;
   }
 }
 ACL_EXPORT
@@ -676,7 +676,7 @@ CL_API_ENTRY cl_int CL_API_CALL clRetainDevice(cl_device_id device) {
 
 ACL_EXPORT CL_API_ENTRY cl_int CL_API_CALL
 clReleaseDeviceIntelFPGA(cl_device_id device) {
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
 
   // Spec says:
   // "decrements the device reference count if device is a valid sub-device
@@ -695,9 +695,9 @@ clReleaseDeviceIntelFPGA(cl_device_id device) {
   // Since we don't (currently) support sub-devices, valid devices must be
   // root-level:
   if (acl_device_is_valid(device)) {
-    UNLOCK_RETURN(CL_SUCCESS);
+    return CL_SUCCESS;
   } else {
-    UNLOCK_RETURN(CL_INVALID_DEVICE);
+    return CL_INVALID_DEVICE;
   }
 }
 
@@ -712,22 +712,22 @@ clReconfigurePLLIntelFPGA(cl_device_id device, const char *pll_settings_str) {
   // comments specified for struct pll_setting_t in include/acl_pll.
   const acl_hal_t *hal;
   cl_int configure_status;
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
 
   if (!acl_device_is_valid(device)) {
-    UNLOCK_RETURN(CL_INVALID_DEVICE);
+    return CL_INVALID_DEVICE;
   }
   if (!pll_settings_str) {
-    UNLOCK_RETURN(CL_INVALID_VALUE);
+    return CL_INVALID_VALUE;
   }
 
   hal = acl_get_hal();
   configure_status =
       hal->pll_reconfigure(device->def.physical_device_id, pll_settings_str);
   if (configure_status == 0)
-    UNLOCK_RETURN(CL_SUCCESS);
+    return CL_SUCCESS;
   else
-    UNLOCK_RETURN(CL_INVALID_OPERATION);
+    return CL_INVALID_OPERATION;
 }
 
 ACL_EXPORT CL_API_ENTRY cl_int CL_API_CALL clSetDeviceExceptionCallback(
@@ -745,16 +745,16 @@ clSetDeviceExceptionCallbackIntelFPGA(
     acl_exception_notify_fn_t pfn_exception_notify, void *user_data) {
   unsigned i;
 
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
 
   if (!pfn_exception_notify)
-    UNLOCK_RETURN(CL_INVALID_VALUE);
+    return CL_INVALID_VALUE;
   if (!listen_mask)
-    UNLOCK_RETURN(CL_INVALID_VALUE);
+    return CL_INVALID_VALUE;
   if (!devices && num_devices > 0)
-    UNLOCK_RETURN(CL_INVALID_VALUE);
+    return CL_INVALID_VALUE;
   if (devices && num_devices == 0)
-    UNLOCK_RETURN(CL_INVALID_VALUE);
+    return CL_INVALID_VALUE;
 
   for (i = 0; i < num_devices; ++i) {
     devices[i]->exception_notify_fn = pfn_exception_notify;
@@ -762,7 +762,7 @@ clSetDeviceExceptionCallbackIntelFPGA(
     devices[i]->listen_mask = listen_mask;
   }
 
-  UNLOCK_RETURN(CL_SUCCESS);
+  return CL_SUCCESS;
 }
 
 //////////////////////////////
diff --git a/src/acl_event.cpp b/src/acl_event.cpp
index ffccdaf9..b94ed81a 100644
--- a/src/acl_event.cpp
+++ b/src/acl_event.cpp
@@ -81,12 +81,12 @@ static void l_record_milestone(cl_event event, cl_profiling_info milestone);
 
 ACL_EXPORT
 CL_API_ENTRY cl_int CL_API_CALL clRetainEventIntelFPGA(cl_event event) {
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
   if (!acl_event_is_valid(event)) {
-    UNLOCK_RETURN(CL_INVALID_EVENT);
+    return CL_INVALID_EVENT;
   }
   acl_retain(event);
-  UNLOCK_RETURN(CL_SUCCESS);
+  return CL_SUCCESS;
 }
 
 ACL_EXPORT
@@ -96,14 +96,14 @@ CL_API_ENTRY cl_int CL_API_CALL clRetainEvent(cl_event event) {
 
 ACL_EXPORT
 CL_API_ENTRY cl_int CL_API_CALL clReleaseEventIntelFPGA(cl_event event) {
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
 
   if (!acl_event_is_valid(event)) {
-    UNLOCK_RETURN(CL_INVALID_EVENT);
+    return CL_INVALID_EVENT;
   }
   if (!acl_is_retained(event)) {
-    UNLOCK_ERR_RET(CL_INVALID_EVENT, event->context,
-                   "Trying to release an event that is not retained");
+    ERR_RET(CL_INVALID_EVENT, event->context,
+            "Trying to release an event that is not retained");
   }
   acl_release(event);
 
@@ -134,7 +134,7 @@ CL_API_ENTRY cl_int CL_API_CALL clReleaseEventIntelFPGA(cl_event event) {
     }
   }
 
-  UNLOCK_RETURN(CL_SUCCESS);
+  return CL_SUCCESS;
 }
 
 ACL_EXPORT
@@ -147,10 +147,10 @@ CL_API_ENTRY cl_int CL_API_CALL clGetEventInfoIntelFPGA(
     cl_event event, cl_event_info param_name, size_t param_value_size,
     void *param_value, size_t *param_value_size_ret) {
   acl_result_t result;
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
 
   if (!acl_event_is_valid(event)) {
-    UNLOCK_RETURN(CL_INVALID_EVENT);
+    return CL_INVALID_EVENT;
   }
 
   // Give the scheduler a nudge.
@@ -159,8 +159,8 @@ CL_API_ENTRY cl_int CL_API_CALL clGetEventInfoIntelFPGA(
     acl_idle_update(event->context);
   }
 
-  UNLOCK_VALIDATE_ARRAY_OUT_ARGS(param_value_size, param_value,
-                                 param_value_size_ret, event->context);
+  VALIDATE_ARRAY_OUT_ARGS(param_value_size, param_value, param_value_size_ret,
+                          event->context);
 
   RESULT_INIT;
 
@@ -193,14 +193,14 @@ CL_API_ENTRY cl_int CL_API_CALL clGetEventInfoIntelFPGA(
   }
 
   if (result.size == 0) {
-    UNLOCK_ERR_RET(CL_INVALID_VALUE, event->context,
-                   "Invalid or unsupported event query");
+    ERR_RET(CL_INVALID_VALUE, event->context,
+            "Invalid or unsupported event query");
   }
 
   if (param_value) {
     if (param_value_size < result.size) {
-      UNLOCK_ERR_RET(CL_INVALID_VALUE, event->context,
-                     "Parameter return buffer is too small");
+      ERR_RET(CL_INVALID_VALUE, event->context,
+              "Parameter return buffer is too small");
     }
     RESULT_COPY(param_value, param_value_size);
   }
@@ -208,7 +208,7 @@ CL_API_ENTRY cl_int CL_API_CALL clGetEventInfoIntelFPGA(
   if (param_value_size_ret) {
     *param_value_size_ret = result.size;
   }
-  UNLOCK_RETURN(CL_SUCCESS);
+  return CL_SUCCESS;
 }
 
 ACL_EXPORT
@@ -226,21 +226,21 @@ CL_API_ENTRY cl_int CL_API_CALL clGetEventProfilingInfoIntelFPGA(
     cl_event event, cl_profiling_info param_name, size_t param_value_size,
     void *param_value, size_t *param_value_size_ret) {
   acl_result_t result;
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
 
   if (!acl_event_is_valid(event)) {
-    UNLOCK_RETURN(CL_INVALID_EVENT);
+    return CL_INVALID_EVENT;
   }
-  UNLOCK_VALIDATE_ARRAY_OUT_ARGS(param_value_size, param_value,
-                                 param_value_size_ret, event->context);
+  VALIDATE_ARRAY_OUT_ARGS(param_value_size, param_value, param_value_size_ret,
+                          event->context);
 
   // check if the event supports the profiling and error out accordingly
   if (event->cmd.type == CL_COMMAND_USER) {
-    UNLOCK_ERR_RET(CL_PROFILING_INFO_NOT_AVAILABLE, event->context,
-                   "Profiling information is not available for user events");
+    ERR_RET(CL_PROFILING_INFO_NOT_AVAILABLE, event->context,
+            "Profiling information is not available for user events");
   } else if (!event->support_profiling) {
     // since user event will not have command_queue set, no need to check again
-    UNLOCK_ERR_RET(
+    ERR_RET(
         CL_PROFILING_INFO_NOT_AVAILABLE, event->context,
         "Profiling information is not available because "
         "CL_QUEUE_PROFILING_ENABLE was not set on the event's command queue");
@@ -266,14 +266,13 @@ CL_API_ENTRY cl_int CL_API_CALL clGetEventProfilingInfoIntelFPGA(
   }
 
   if (result.size == 0) {
-    UNLOCK_ERR_RET(CL_INVALID_VALUE, event->context,
-                   "Invalid event profiling query");
+    ERR_RET(CL_INVALID_VALUE, event->context, "Invalid event profiling query");
   }
 
   if (param_value) {
     if (param_value_size < result.size) {
-      UNLOCK_ERR_RET(CL_INVALID_VALUE, event->context,
-                     "Parameter return buffer is too small");
+      ERR_RET(CL_INVALID_VALUE, event->context,
+              "Parameter return buffer is too small");
     }
     RESULT_COPY(param_value, param_value_size);
   }
@@ -281,7 +280,7 @@ CL_API_ENTRY cl_int CL_API_CALL clGetEventProfilingInfoIntelFPGA(
   if (param_value_size_ret) {
     *param_value_size_ret = result.size;
   }
-  UNLOCK_RETURN(CL_SUCCESS);
+  return CL_SUCCESS;
 }
 
 ACL_EXPORT
@@ -297,10 +296,10 @@ CL_API_ENTRY cl_event CL_API_CALL
 clCreateUserEventIntelFPGA(cl_context context, cl_int *errcode_ret) {
   cl_event result = 0;
   cl_int status;
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
 
   if (!acl_context_is_valid(context))
-    UNLOCK_BAIL(CL_INVALID_CONTEXT);
+    BAIL(CL_INVALID_CONTEXT);
 
   // Create the user event on the user_event_queue.
   // In our model, every event is attached to some command queue.
@@ -310,7 +309,7 @@ clCreateUserEventIntelFPGA(cl_context context, cl_int *errcode_ret) {
                             0, // depends on nothing else.
                             CL_COMMAND_USER, &result);
   if (status != CL_SUCCESS)
-    UNLOCK_BAIL(status); // already signaled error
+    BAIL(status); // already signaled error
 
   // As per spec.
   acl_set_execution_status(result, CL_SUBMITTED);
@@ -318,7 +317,7 @@ clCreateUserEventIntelFPGA(cl_context context, cl_int *errcode_ret) {
   if (errcode_ret)
     *errcode_ret = CL_SUCCESS;
 
-  UNLOCK_RETURN(result);
+  return result;
 }
 
 ACL_EXPORT
@@ -330,16 +329,16 @@ CL_API_ENTRY cl_event CL_API_CALL clCreateUserEvent(cl_context context,
 ACL_EXPORT
 CL_API_ENTRY cl_int CL_API_CALL
 clSetUserEventStatusIntelFPGA(cl_event event, cl_int execution_status) {
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
 
   if (!acl_event_is_valid(event)) {
-    UNLOCK_RETURN(CL_INVALID_EVENT);
+    return CL_INVALID_EVENT;
   }
 
   // Either negative, or CL_COMPLETE (which itself is 0)
   if (execution_status <= CL_COMPLETE) {
     if (event->execution_status <= CL_COMPLETE) {
-      UNLOCK_ERR_RET(
+      ERR_RET(
           CL_INVALID_OPERATION, event->context,
           "User event has already been completed or terminated with an error");
     }
@@ -349,10 +348,9 @@ clSetUserEventStatusIntelFPGA(cl_event event, cl_int execution_status) {
     // Nudge the scheduler.
     acl_idle_update(event->context);
 
-    UNLOCK_RETURN(CL_SUCCESS);
+    return CL_SUCCESS;
   } else {
-    UNLOCK_ERR_RET(CL_INVALID_VALUE, event->context,
-                   "Invalid execution status");
+    ERR_RET(CL_INVALID_VALUE, event->context, "Invalid execution status");
   }
 }
 
@@ -371,24 +369,24 @@ CL_API_ENTRY cl_int clSetEventCallbackIntelFPGA(
                                         void *user_data),
     void *user_data) {
   acl_event_user_callback *cb;
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
 
   if (!acl_event_is_valid(event)) {
-    UNLOCK_RETURN(CL_INVALID_EVENT);
+    return CL_INVALID_EVENT;
   }
 
   if (pfn_event_notify == NULL) {
-    UNLOCK_RETURN(CL_INVALID_VALUE);
+    return CL_INVALID_VALUE;
   }
   if (command_exec_callback_type != CL_SUBMITTED &&
       command_exec_callback_type != CL_RUNNING &&
       command_exec_callback_type != CL_COMPLETE) {
-    UNLOCK_RETURN(CL_INVALID_VALUE);
+    return CL_INVALID_VALUE;
   }
 
   cb = (acl_event_user_callback *)acl_malloc(sizeof(acl_event_user_callback));
   if (!cb)
-    UNLOCK_RETURN(CL_OUT_OF_HOST_MEMORY);
+    return CL_OUT_OF_HOST_MEMORY;
 
   cb->notify_user_data = user_data;
   cb->event_notify_fn = pfn_event_notify;
@@ -403,7 +401,7 @@ CL_API_ENTRY cl_int clSetEventCallbackIntelFPGA(
   // status is already passed.
   acl_event_callback(event, event->execution_status);
 
-  UNLOCK_RETURN(CL_SUCCESS);
+  return CL_SUCCESS;
 }
 
 // registers a user callback function for a specific command execution status.
@@ -454,10 +452,10 @@ void acl_event_callback(cl_event event, cl_int event_command_exec_status) {
         temp = cb_head;
         cb_head = cb_head->next;
         acl_free(temp);
-
-        lock_count = acl_suspend_lock();
-        event_notify_fn(event, event_command_exec_status, notify_user_data);
-        acl_resume_lock(lock_count);
+        {
+          acl_suspend_lock_guard l(acl_mutex_wrapper);
+          event_notify_fn(event, event_command_exec_status, notify_user_data);
+        }
         release++;
       } else {
         pre = cb_head;
@@ -669,8 +667,9 @@ void acl_set_execution_status(cl_event event, int new_status) {
   // signal handler, which can't lock mutexes, so we don't lock in that case.
   // All functions called from this one therefore have to use
   // acl_assert_locked_or_sig() instead of just acl_assert_locked().
+  std::unique_lock lock{acl_mutex_wrapper, std::defer_lock};
   if (!acl_is_inside_sig()) {
-    acl_lock();
+    lock.lock();
   }
 
   if (event) { // just being defensive
@@ -742,10 +741,6 @@ void acl_set_execution_status(cl_event event, int new_status) {
     // Signal all waiters.
     acl_signal_device_update();
   }
-
-  if (!acl_is_inside_sig()) {
-    acl_unlock();
-  }
 }
 
 static void l_record_milestone(cl_event event, cl_profiling_info milestone) {
diff --git a/src/acl_hal.cpp b/src/acl_hal.cpp
index 3b57892d..97071ad3 100644
--- a/src/acl_hal.cpp
+++ b/src/acl_hal.cpp
@@ -132,7 +132,7 @@ int acl_print_debug_msg(const char *msg, ...) {
 extern CL_API_ENTRY void CL_API_CALL
 clSetBoardLibraryIntelFPGA(char *library_name) {
   acl_mmd_library_names_t *next_library = NULL;
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
 
   acl_print_debug_msg("Adding library '%s' to list of libraries to open\n",
                       library_name);
@@ -152,7 +152,7 @@ clSetBoardLibraryIntelFPGA(char *library_name) {
     insertion_point->next = next_library;
   }
 
-  UNLOCK_RETURN_VOID;
+  return;
 }
 
 #ifdef __GNUC__
diff --git a/src/acl_hostch.cpp b/src/acl_hostch.cpp
index f9c9a19f..b51f2030 100644
--- a/src/acl_hostch.cpp
+++ b/src/acl_hostch.cpp
@@ -243,9 +243,10 @@ CL_API_ENTRY cl_int CL_API_CALL clReadPipeIntelFPGA(cl_mem pipe, void *ptr) {
   size_t buffer_size;
   cl_int status = 0;
 
-  acl_lock();
-  acl_idle_update(pipe->context);
-  acl_unlock();
+  {
+    std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
+    acl_idle_update(pipe->context);
+  }
 
   acl_mutex_lock(&(pipe->host_pipe_info->m_lock));
 
@@ -343,9 +344,10 @@ CL_API_ENTRY cl_int CL_API_CALL clWritePipeIntelFPGA(cl_mem pipe, void *ptr) {
   cl_int status = 0;
   cl_int ret;
 
-  acl_lock();
-  acl_idle_update(pipe->context);
-  acl_unlock();
+  {
+    std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
+    acl_idle_update(pipe->context);
+  }
 
   acl_mutex_lock(&(pipe->host_pipe_info->m_lock));
 
@@ -458,9 +460,10 @@ CL_API_ENTRY void *CL_API_CALL clMapHostPipeIntelFPGA(cl_mem pipe,
   void *buffer = 0;
   int status = 0;
 
-  acl_lock();
-  acl_idle_update(pipe->context);
-  acl_unlock();
+  {
+    std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
+    acl_idle_update(pipe->context);
+  }
 
   acl_mutex_lock(&(pipe->host_pipe_info->m_lock));
 
@@ -587,9 +590,10 @@ clUnmapHostPipeIntelFPGA(cl_mem pipe, void *mapped_ptr, size_t size_to_unmap,
   int status = 0;
   int first = 1;
 
-  acl_lock();
-  acl_idle_update(pipe->context);
-  acl_unlock();
+  {
+    std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
+    acl_idle_update(pipe->context);
+  }
 
   acl_mutex_lock(&(pipe->host_pipe_info->m_lock));
 
diff --git a/src/acl_icd_dispatch.cpp b/src/acl_icd_dispatch.cpp
index b1700bc2..04d37f71 100644
--- a/src/acl_icd_dispatch.cpp
+++ b/src/acl_icd_dispatch.cpp
@@ -78,11 +78,11 @@ clGetExtensionFunctionAddressIntelFPGA(const char *func_name) {
 ACL_EXPORT
 CL_API_ENTRY void *CL_API_CALL clGetBoardExtensionFunctionAddressIntelFPGA(
     const char *func_name, cl_device_id device) {
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
   {
     void *ret = acl_get_hal()->get_board_extension_function_address(
         func_name, device->def.physical_device_id);
-    UNLOCK_RETURN(ret);
+    return ret;
   }
 }
 
@@ -97,11 +97,11 @@ CL_API_ENTRY void *CL_API_CALL
 clGetExtensionFunctionAddressForPlatformIntelFPGA(cl_platform_id platform,
                                                   const char *func_name) {
   // We currently only have one platform
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
   if (!acl_platform_is_valid(platform)) {
-    UNLOCK_RETURN(NULL);
+    return NULL;
   }
-  UNLOCK_RETURN(clGetExtensionFunctionAddressIntelFPGA(func_name));
+  return clGetExtensionFunctionAddressIntelFPGA(func_name);
 }
 
 ACL_EXPORT
diff --git a/src/acl_kernel.cpp b/src/acl_kernel.cpp
index 2f6e5b85..7f2ee78e 100644
--- a/src/acl_kernel.cpp
+++ b/src/acl_kernel.cpp
@@ -125,12 +125,12 @@ ACL_DEFINE_CL_OBJECT_ALLOC_FUNCTIONS(cl_kernel);
 
 ACL_EXPORT
 CL_API_ENTRY cl_int CL_API_CALL clRetainKernelIntelFPGA(cl_kernel kernel) {
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
   if (!acl_kernel_is_valid(kernel)) {
-    UNLOCK_RETURN(CL_INVALID_KERNEL);
+    return CL_INVALID_KERNEL;
   }
   acl_retain(kernel);
-  UNLOCK_RETURN(CL_SUCCESS);
+  return CL_SUCCESS;
 }
 
 ACL_EXPORT
@@ -140,10 +140,10 @@ CL_API_ENTRY cl_int CL_API_CALL clRetainKernel(cl_kernel kernel) {
 
 ACL_EXPORT
 CL_API_ENTRY cl_int CL_API_CALL clReleaseKernelIntelFPGA(cl_kernel kernel) {
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
 
   if (!acl_kernel_is_valid(kernel)) {
-    UNLOCK_RETURN(CL_INVALID_KERNEL);
+    return CL_INVALID_KERNEL;
   }
 
   acl_print_debug_msg("Release kernel %p\n", kernel);
@@ -179,7 +179,7 @@ CL_API_ENTRY cl_int CL_API_CALL clReleaseKernelIntelFPGA(cl_kernel kernel) {
   } else {
     acl_release(kernel);
   }
-  UNLOCK_RETURN(CL_SUCCESS);
+  return CL_SUCCESS;
 }
 
 ACL_EXPORT
@@ -193,14 +193,14 @@ CL_API_ENTRY cl_kernel CL_API_CALL clCreateKernelIntelFPGA(
   cl_int status;
   cl_kernel kernel = 0;
 
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
 
   // Can't call the callback, because we have no valid context.
   if (!acl_program_is_valid(program))
-    UNLOCK_BAIL(CL_INVALID_PROGRAM);
+    BAIL(CL_INVALID_PROGRAM);
 
   if (!kernel_name)
-    UNLOCK_BAIL_INFO(CL_INVALID_VALUE, program->context, "kernel_name is NULL");
+    BAIL_INFO(CL_INVALID_VALUE, program->context, "kernel_name is NULL");
 
   // What device program is associated with this kernel?
   // Right now we only support one device per kernel.
@@ -209,12 +209,12 @@ CL_API_ENTRY cl_kernel CL_API_CALL clCreateKernelIntelFPGA(
                                              &status, program->context, 0);
 
   if (status != CL_SUCCESS)
-    UNLOCK_BAIL(status); // already signaled callback
+    BAIL(status); // already signaled callback
 
   kernel = acl_program_alloc_kernel(program);
   if (kernel == 0) {
-    UNLOCK_BAIL_INFO(CL_OUT_OF_HOST_MEMORY, program->context,
-                     "Could not allocate a program object");
+    BAIL_INFO(CL_OUT_OF_HOST_MEMORY, program->context,
+              "Could not allocate a program object");
   }
 
   l_init_kernel(kernel, program, accel_def, dev_bin, errcode_ret);
@@ -223,7 +223,7 @@ CL_API_ENTRY cl_kernel CL_API_CALL clCreateKernelIntelFPGA(
     *errcode_ret = CL_SUCCESS;
   }
 
-  UNLOCK_RETURN(kernel);
+  return kernel;
 }
 
 ACL_EXPORT
@@ -237,10 +237,10 @@ ACL_EXPORT
 CL_API_ENTRY cl_int CL_API_CALL clCreateKernelsInProgramIntelFPGA(
     cl_program program, cl_uint num_kernels, cl_kernel *kernels,
     cl_uint *num_kernels_ret) {
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
 
   if (!acl_program_is_valid(program)) {
-    UNLOCK_RETURN(CL_INVALID_PROGRAM);
+    return CL_INVALID_PROGRAM;
   }
 
   auto context = program->context;
@@ -251,23 +251,21 @@ CL_API_ENTRY cl_int CL_API_CALL clCreateKernelsInProgramIntelFPGA(
       l_load_consistently_built_kernels_in_program(program, accel_ret);
 
   if (status != CL_SUCCESS) {
-    UNLOCK_RETURN(status); // already signaled
+    return status; // already signaled
   }
   if (accel_ret.size() == 0) {
-    UNLOCK_ERR_RET(
-        CL_INVALID_PROGRAM_EXECUTABLE, context,
-        "No kernels were built across all devices with the same interface");
+    ERR_RET(CL_INVALID_PROGRAM_EXECUTABLE, context,
+            "No kernels were built across all devices with the same interface");
   }
 
   // Check return buffer spec
   if (num_kernels == 0 && kernels) {
-    UNLOCK_ERR_RET(CL_INVALID_VALUE, context,
-                   "num_kernels is zero but kernels array is specified");
+    ERR_RET(CL_INVALID_VALUE, context,
+            "num_kernels is zero but kernels array is specified");
   }
   if (num_kernels > 0 && kernels == 0) {
-    UNLOCK_ERR_RET(
-        CL_INVALID_VALUE, context,
-        "num_kernels is non-zero but kernels array is not specified");
+    ERR_RET(CL_INVALID_VALUE, context,
+            "num_kernels is non-zero but kernels array is not specified");
   }
 
   if (kernels) {
@@ -275,7 +273,7 @@ CL_API_ENTRY cl_int CL_API_CALL clCreateKernelsInProgramIntelFPGA(
 
     // Result buffer isn't big enough.
     if (num_kernels < accel_ret.size()) {
-      UNLOCK_RETURN(CL_INVALID_VALUE);
+      return CL_INVALID_VALUE;
     }
 
     // The definitions are in accel_ret. Create the kernels.
@@ -300,7 +298,7 @@ CL_API_ENTRY cl_int CL_API_CALL clCreateKernelsInProgramIntelFPGA(
   if (num_kernels_ret)
     *num_kernels_ret = static_cast<cl_uint>(accel_ret.size());
 
-  UNLOCK_RETURN(status);
+  return status;
 }
 
 ACL_EXPORT
@@ -320,17 +318,16 @@ CL_API_ENTRY cl_int CL_API_CALL clSetKernelArgIntelFPGA(cl_kernel kernel,
   cl_context context;
   cl_bool is_pipe = CL_FALSE;
   cl_bool is_sampler = CL_FALSE;
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
 
   if (!acl_kernel_is_valid(kernel)) {
-    UNLOCK_RETURN(CL_INVALID_KERNEL);
+    return CL_INVALID_KERNEL;
   }
 
   context = kernel->program->context;
 
   if (arg_index >= kernel->accel_def->iface.args.size()) {
-    UNLOCK_ERR_RET(CL_INVALID_ARG_INDEX, context,
-                   "Argument index is too large");
+    ERR_RET(CL_INVALID_ARG_INDEX, context, "Argument index is too large");
   }
 
   arg_info = &(kernel->accel_def->iface.args[arg_index]);
@@ -341,14 +338,14 @@ CL_API_ENTRY cl_int CL_API_CALL clSetKernelArgIntelFPGA(cl_kernel kernel,
     // representing buffers.
     if (arg_value && (*(cl_mem *)arg_value) &&
         !acl_mem_is_valid(*(cl_mem *)arg_value))
-      UNLOCK_ERR_RET(CL_INVALID_MEM_OBJECT, context,
-                     "Non-memory object passed in as memory object argument");
+      ERR_RET(CL_INVALID_MEM_OBJECT, context,
+              "Non-memory object passed in as memory object argument");
 
   } else if (arg_info->category == ACL_ARG_SAMPLER) {
     if (arg_value && (arg_size != sizeof(cl_sampler) ||
                       !acl_sampler_is_valid(*(cl_sampler *)arg_value))) {
-      UNLOCK_ERR_RET(CL_INVALID_SAMPLER, context,
-                     "Non-sampler object passed in as sampler object argument");
+      ERR_RET(CL_INVALID_SAMPLER, context,
+              "Non-sampler object passed in as sampler object argument");
     }
     is_sampler = CL_TRUE;
   } else if (arg_size != arg_info->size && arg_value &&
@@ -361,13 +358,12 @@ CL_API_ENTRY cl_int CL_API_CALL clSetKernelArgIntelFPGA(cl_kernel kernel,
   switch (arg_info->addr_space) {
   case ACL_ARG_ADDR_LOCAL: /* Size is number of local bytes to allocate */
     if (arg_size == 0) {
-      UNLOCK_ERR_RET(CL_INVALID_ARG_SIZE, context,
-                     "Pointer-to-local argument specified zero size");
+      ERR_RET(CL_INVALID_ARG_SIZE, context,
+              "Pointer-to-local argument specified zero size");
     }
     if (arg_value != 0) {
-      UNLOCK_ERR_RET(
-          CL_INVALID_ARG_VALUE, context,
-          "Pointer-to-local argument specified with a non-null value");
+      ERR_RET(CL_INVALID_ARG_VALUE, context,
+              "Pointer-to-local argument specified with a non-null value");
     }
 
     /* We instantiated a specific mem capacity to handle this pointer.
@@ -376,10 +372,9 @@ CL_API_ENTRY cl_int CL_API_CALL clSetKernelArgIntelFPGA(cl_kernel kernel,
     {
       unsigned lmem_size_instantiated = arg_info->lmem_size_bytes;
       if (arg_size > lmem_size_instantiated) {
-        UNLOCK_ERR_RET(
-            CL_INVALID_ARG_SIZE, context,
-            "Pointer-to-local argument requested size is larger than "
-            "maximum specified at compile time");
+        ERR_RET(CL_INVALID_ARG_SIZE, context,
+                "Pointer-to-local argument requested size is larger than "
+                "maximum specified at compile time");
       }
     }
     break;
@@ -387,19 +382,17 @@ CL_API_ENTRY cl_int CL_API_CALL clSetKernelArgIntelFPGA(cl_kernel kernel,
   case ACL_ARG_ADDR_GLOBAL:
   case ACL_ARG_ADDR_CONSTANT:
     if (arg_size != sizeof(cl_mem)) {
-      UNLOCK_ERR_RET(
-          CL_INVALID_ARG_SIZE, context,
-          "Pointer-to-global or Pointer-to-constant argument size is "
-          "not the size of cl_mem");
+      ERR_RET(CL_INVALID_ARG_SIZE, context,
+              "Pointer-to-global or Pointer-to-constant argument size is "
+              "not the size of cl_mem");
     }
     // Can pass NULL or pointer to NULL in arg_value, or it must be a valid
     // memory object.
     if (arg_value && (*(cl_mem *)arg_value) &&
         !acl_mem_is_valid(*(cl_mem *)arg_value)) {
-      UNLOCK_ERR_RET(
-          CL_INVALID_ARG_VALUE, context,
-          "Pointer-to-global or Pointer-to-constant argument value is "
-          "not a valid memory object");
+      ERR_RET(CL_INVALID_ARG_VALUE, context,
+              "Pointer-to-global or Pointer-to-constant argument value is "
+              "not a valid memory object");
     }
 
     if (arg_value && (*(cl_mem *)arg_value) &&
@@ -410,8 +403,8 @@ CL_API_ENTRY cl_int CL_API_CALL clSetKernelArgIntelFPGA(cl_kernel kernel,
     // If this buffer is an SVM buffer, assume that the user wants the memory to
     // be in sync. Treat this the same as an SVM kernel arg and return.
     if (arg_value && (*(cl_mem *)arg_value) && (*(cl_mem *)arg_value)->is_svm) {
-      UNLOCK_RETURN(clSetKernelArgSVMPointerIntelFPGA(
-          kernel, arg_index, (*(cl_mem *)arg_value)->host_mem.aligned_ptr));
+      return clSetKernelArgSVMPointerIntelFPGA(
+          kernel, arg_index, (*(cl_mem *)arg_value)->host_mem.aligned_ptr);
     }
     break;
 
@@ -419,22 +412,21 @@ CL_API_ENTRY cl_int CL_API_CALL clSetKernelArgIntelFPGA(cl_kernel kernel,
     if (is_sampler && arg_value != 0 &&
         acl_sampler_is_valid_ptr(*((cl_sampler *)arg_value))) {
       if (arg_size != sizeof(cl_sampler)) {
-        UNLOCK_ERR_RET(CL_INVALID_ARG_SIZE, context,
-                       "Sampler argument size is not the size of cl_sampler");
+        ERR_RET(CL_INVALID_ARG_SIZE, context,
+                "Sampler argument size is not the size of cl_sampler");
       }
       if (arg_info->size != sizeof(int)) {
-        UNLOCK_ERR_RET(CL_INVALID_ARG_SIZE, context,
-                       "Argument size is the wrong size");
+        ERR_RET(CL_INVALID_ARG_SIZE, context,
+                "Argument size is the wrong size");
       }
     } else if (arg_size == sizeof(cl_mem) &&
                acl_pipe_is_valid_pointer(*((cl_mem *)arg_value), kernel)) {
       is_pipe = CL_TRUE;
     } else if (arg_size != arg_info->size) {
-      UNLOCK_ERR_RET(CL_INVALID_ARG_SIZE, context,
-                     "Argument size is the wrong size");
+      ERR_RET(CL_INVALID_ARG_SIZE, context, "Argument size is the wrong size");
     }
     if (arg_value == 0) {
-      UNLOCK_ERR_RET(CL_INVALID_ARG_VALUE, context, "Argument value is NULL");
+      ERR_RET(CL_INVALID_ARG_VALUE, context, "Argument value is NULL");
     }
     break;
   }
@@ -452,9 +444,9 @@ CL_API_ENTRY cl_int CL_API_CALL clSetKernelArgIntelFPGA(cl_kernel kernel,
     /* If this is a host pipe, create a host channel and bind them together */
     if (arg_info->host_accessible && pipe_ptr->host_pipe_info != NULL) {
       if (pipe_ptr->host_pipe_info->m_binded_kernel != NULL) {
-        UNLOCK_ERR_RET(CL_INVALID_ARG_VALUE, context,
-                       "This pipe has already been bound to a kernel. Cannot "
-                       "rebind to a new kernel");
+        ERR_RET(CL_INVALID_ARG_VALUE, context,
+                "This pipe has already been bound to a kernel. Cannot "
+                "rebind to a new kernel");
       }
 
       // Check to see if the kernel argument's width matches up with our cl_pipe
@@ -475,23 +467,23 @@ CL_API_ENTRY cl_int CL_API_CALL clSetKernelArgIntelFPGA(cl_kernel kernel,
                          hostpipe_info.is_host_to_dev) {
                 // Direction match
               } else {
-                UNLOCK_ERR_RET(CL_INVALID_ARG_VALUE, context,
-                               "Host accessible pipe direction is not the same "
-                               "of cl_pipe");
+                ERR_RET(CL_INVALID_ARG_VALUE, context,
+                        "Host accessible pipe direction is not the same "
+                        "of cl_pipe");
               }
               // Check width
               if (pipe_ptr->fields.pipe_objs.pipe_packet_size !=
                   hostpipe_info.data_width) {
-                UNLOCK_ERR_RET(CL_INVALID_ARG_SIZE, context,
-                               "Host accessible pipe size is not the same size "
-                               "of cl_pipe");
+                ERR_RET(CL_INVALID_ARG_SIZE, context,
+                        "Host accessible pipe size is not the same size "
+                        "of cl_pipe");
               }
               // Check max buffer size
               if (pipe_ptr->fields.pipe_objs.pipe_max_packets >
                   hostpipe_info.max_buffer_depth) {
-                UNLOCK_ERR_RET(CL_INVALID_ARG_VALUE, context,
-                               "Host accessible pipe max packets size is "
-                               "smaller than cl_pipe requested size");
+                ERR_RET(CL_INVALID_ARG_VALUE, context,
+                        "Host accessible pipe max packets size is "
+                        "smaller than cl_pipe requested size");
               }
               found = true;
             }
@@ -513,23 +505,23 @@ CL_API_ENTRY cl_int CL_API_CALL clSetKernelArgIntelFPGA(cl_kernel kernel,
                        hostpipe_info.is_host_to_dev) {
               // Direction match
             } else {
-              UNLOCK_ERR_RET(
+              ERR_RET(
                   CL_INVALID_ARG_VALUE, context,
                   "Host accessible pipe direction is not the same of cl_pipe");
             }
             // Check width
             if (pipe_ptr->fields.pipe_objs.pipe_packet_size !=
                 hostpipe_info.data_width) {
-              UNLOCK_ERR_RET(
+              ERR_RET(
                   CL_INVALID_ARG_SIZE, context,
                   "Host accessible pipe size is not the same size of cl_pipe");
             }
             // Check max buffer size
             if (pipe_ptr->fields.pipe_objs.pipe_max_packets >
                 hostpipe_info.max_buffer_depth) {
-              UNLOCK_ERR_RET(CL_INVALID_ARG_VALUE, context,
-                             "Host accessible pipe max packets size is smaller "
-                             "than cl_pipe requested size");
+              ERR_RET(CL_INVALID_ARG_VALUE, context,
+                      "Host accessible pipe max packets size is smaller "
+                      "than cl_pipe requested size");
             }
             found = true;
           }
@@ -547,7 +539,7 @@ CL_API_ENTRY cl_int CL_API_CALL clSetKernelArgIntelFPGA(cl_kernel kernel,
       // figure out which device at enqueue time
       pipe_ptr->host_pipe_info->binded = false;
     }
-    UNLOCK_RETURN(CL_SUCCESS);
+    return CL_SUCCESS;
   }
 
   // Now try saving the value.
@@ -564,9 +556,8 @@ CL_API_ENTRY cl_int CL_API_CALL clSetKernelArgIntelFPGA(cl_kernel kernel,
     // creation time, or at system initialization...
 #ifndef REMOVE_VALID_CHECKS
     if ((start_idx + iface_arg_size) > kernel->arg_value_size) {
-      UNLOCK_ERR_RET(
-          CL_INVALID_KERNEL, context,
-          "Argument overflows the space allocated for kernel arguments");
+      ERR_RET(CL_INVALID_KERNEL, context,
+              "Argument overflows the space allocated for kernel arguments");
     }
 #endif
 
@@ -594,9 +585,9 @@ CL_API_ENTRY cl_int CL_API_CALL clSetKernelArgIntelFPGA(cl_kernel kernel,
                   .autodiscovery_def.num_global_mem_systems > 1 &&
           !l_check_mem_type_support_on_kernel_arg(
               kernel, arg_index, ACL_GLOBAL_MEM_DEVICE_PRIVATE)) {
-        UNLOCK_ERR_RET(CL_INVALID_ARG_VALUE, context,
-                       "cl_mem object was set on kernel argument that doesn't "
-                       "have attribute to access device private memory");
+        ERR_RET(CL_INVALID_ARG_VALUE, context,
+                "cl_mem object was set on kernel argument that doesn't "
+                "have attribute to access device private memory");
       }
     }
 
@@ -724,7 +715,7 @@ CL_API_ENTRY cl_int CL_API_CALL clSetKernelArgIntelFPGA(cl_kernel kernel,
     kernel->arg_defined[arg_index] = 1;
   }
 
-  UNLOCK_RETURN(CL_SUCCESS);
+  return CL_SUCCESS;
 }
 
 ACL_EXPORT
@@ -739,22 +730,21 @@ ACL_EXPORT
 CL_API_ENTRY cl_int CL_API_CALL clSetKernelArgSVMPointerIntelFPGA(
     cl_kernel kernel, cl_uint arg_index, const void *arg_value) {
   cl_context context;
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
 
 #ifndef REMOVE_VALID_CHECKS
   if (!acl_kernel_is_valid(kernel)) {
-    UNLOCK_RETURN(CL_INVALID_KERNEL);
+    return CL_INVALID_KERNEL;
   }
 
   context = kernel->program->context;
 
   if (arg_index >= kernel->accel_def->iface.args.size()) {
-    UNLOCK_ERR_RET(CL_INVALID_ARG_INDEX, context,
-                   "Argument index is too large");
+    ERR_RET(CL_INVALID_ARG_INDEX, context, "Argument index is too large");
   }
 
   if (arg_value == NULL) {
-    UNLOCK_ERR_RET(CL_INVALID_ARG_VALUE, context, "SVM argument is NULL");
+    ERR_RET(CL_INVALID_ARG_VALUE, context, "SVM argument is NULL");
   }
 
   unsigned expected_alignment =
@@ -763,13 +753,12 @@ CL_API_ENTRY cl_int CL_API_CALL clSetKernelArgSVMPointerIntelFPGA(
       expected_alignment ? expected_alignment : ACL_MEM_ALIGN; // For tests
   if ((uintptr_t)arg_value % expected_alignment != 0) {
     if (expected_alignment == ACL_MEM_ALIGN) {
-      UNLOCK_ERR_RET(
-          CL_INVALID_ARG_VALUE, context,
-          "SVM argument is not aligned correctly for type.  Ensure the "
-          "kernel argument is targeting the correct buffer location.");
+      ERR_RET(CL_INVALID_ARG_VALUE, context,
+              "SVM argument is not aligned correctly for type.  Ensure the "
+              "kernel argument is targeting the correct buffer location.");
     } else {
-      UNLOCK_ERR_RET(CL_INVALID_ARG_VALUE, context,
-                     "SVM argument is not aligned correctly for type.");
+      ERR_RET(CL_INVALID_ARG_VALUE, context,
+              "SVM argument is not aligned correctly for type.");
     }
   }
 #endif
@@ -786,9 +775,8 @@ CL_API_ENTRY cl_int CL_API_CALL clSetKernelArgSVMPointerIntelFPGA(
     // creation time, or at system initialization...
 #ifndef REMOVE_VALID_CHECKS
     if ((start_idx + iface_arg_size) > kernel->arg_value_size) {
-      UNLOCK_ERR_RET(
-          CL_INVALID_KERNEL, context,
-          "Argument overflows the space allocated for kernel arguments");
+      ERR_RET(CL_INVALID_KERNEL, context,
+              "Argument overflows the space allocated for kernel arguments");
     }
     // If the board has both SVM and DGM, make sure kernel argument is SVM
     cl_bool context_has_device_with_physical_mem = CL_FALSE;
@@ -812,9 +800,9 @@ CL_API_ENTRY cl_int CL_API_CALL clSetKernelArgSVMPointerIntelFPGA(
             1 &&
         !l_check_mem_type_support_on_kernel_arg(
             kernel, arg_index, ACL_GLOBAL_MEM_SHARED_VIRTUAL)) {
-      UNLOCK_ERR_RET(CL_INVALID_ARG_VALUE, context,
-                     "SVM pointer was set on kernel argument that doesn't have "
-                     "attribute to access SVM");
+      ERR_RET(CL_INVALID_ARG_VALUE, context,
+              "SVM pointer was set on kernel argument that doesn't have "
+              "attribute to access SVM");
     }
 #endif
 
@@ -831,7 +819,7 @@ CL_API_ENTRY cl_int CL_API_CALL clSetKernelArgSVMPointerIntelFPGA(
     kernel->arg_defined[arg_index] = 1;
   }
 
-  UNLOCK_RETURN(CL_SUCCESS);
+  return CL_SUCCESS;
 }
 
 ACL_EXPORT
@@ -843,17 +831,16 @@ CL_API_ENTRY cl_int CL_API_CALL clSetKernelArgSVMPointer(
 ACL_EXPORT
 CL_API_ENTRY cl_int CL_API_CALL clSetKernelArgMemPointerINTEL(
     cl_kernel kernel, cl_uint arg_index, const void *arg_value) {
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
 
   if (!acl_kernel_is_valid(kernel)) {
-    UNLOCK_RETURN(CL_INVALID_KERNEL);
+    return CL_INVALID_KERNEL;
   }
 
   cl_context context = kernel->program->context;
 
   if (arg_index >= kernel->accel_def->iface.args.size()) {
-    UNLOCK_ERR_RET(CL_INVALID_ARG_INDEX, context,
-                   "Argument index is too large");
+    ERR_RET(CL_INVALID_ARG_INDEX, context, "Argument index is too large");
   }
 
   // Determine where to write the value.
@@ -866,9 +853,8 @@ CL_API_ENTRY cl_int CL_API_CALL clSetKernelArgMemPointerINTEL(
   // creation time, or at system initialization...
 #ifndef REMOVE_VALID_CHECKS
   if ((start_idx + iface_arg_size) > kernel->arg_value_size) {
-    UNLOCK_ERR_RET(
-        CL_INVALID_KERNEL, context,
-        "Argument overflows the space allocated for kernel arguments");
+    ERR_RET(CL_INVALID_KERNEL, context,
+            "Argument overflows the space allocated for kernel arguments");
   }
 
   unsigned expected_alignment =
@@ -877,20 +863,20 @@ CL_API_ENTRY cl_int CL_API_CALL clSetKernelArgMemPointerINTEL(
       expected_alignment ? expected_alignment : ACL_MEM_ALIGN; // For tests
   if ((uintptr_t)arg_value % expected_alignment != 0) {
     if (expected_alignment == ACL_MEM_ALIGN) {
-      UNLOCK_ERR_RET(
+      ERR_RET(
           CL_INVALID_ARG_VALUE, context,
           "Pointer argument is not aligned correctly for type.  If you are "
           "using unified shared memory compile the kernel with the -usm flag.");
     } else {
-      UNLOCK_ERR_RET(CL_INVALID_ARG_VALUE, context,
-                     "Pointer argument is not aligned correctly for type.");
+      ERR_RET(CL_INVALID_ARG_VALUE, context,
+              "Pointer argument is not aligned correctly for type.");
     }
   }
 
   if (!acl_usm_ptr_belongs_to_context(context, arg_value)) {
-    UNLOCK_ERR_RET(CL_INVALID_ARG_VALUE, context,
-                   "Pointer argument is not allocated using USM or not "
-                   "allocated in correct context.");
+    ERR_RET(CL_INVALID_ARG_VALUE, context,
+            "Pointer argument is not allocated using USM or not "
+            "allocated in correct context.");
   }
 
   // Ensure the USM allocation (arg_value) is compatible with what the kernel
@@ -914,9 +900,9 @@ CL_API_ENTRY cl_int CL_API_CALL clSetKernelArgMemPointerINTEL(
   acl_usm_allocation_t *usm_alloc =
       acl_get_usm_alloc_from_ptr(context, arg_value);
   if (usm_alloc == nullptr) {
-    UNLOCK_ERR_RET(CL_INVALID_ARG_VALUE, context,
-                   "Pointer argument is not allocated using USM or not "
-                   "allocated in correct context.");
+    ERR_RET(CL_INVALID_ARG_VALUE, context,
+            "Pointer argument is not allocated using USM or not "
+            "allocated in correct context.");
   }
 
   // Try to find the memory interface that corresponds to this allocation.
@@ -962,29 +948,29 @@ CL_API_ENTRY cl_int CL_API_CALL clSetKernelArgMemPointerINTEL(
             ACL_GLOBAL_MEM_DEVICE_ALLOCATION)) {
         if (kernel_arg_mem->allocation_type & ACL_GLOBAL_MEM_HOST_ALLOCATION) {
           // Host not compatible with device memory.
-          UNLOCK_ERR_RET(CL_INVALID_ARG_VALUE, context,
-                         "Argument expects host allocation but pointer is to "
-                         "USM device memory");
+          ERR_RET(CL_INVALID_ARG_VALUE, context,
+                  "Argument expects host allocation but pointer is to "
+                  "USM device memory");
         } else if (kernel_arg_mem->allocation_type &
                    ACL_GLOBAL_MEM_SHARED_ALLOCATION) {
           // Shared not compatible with device memory.
-          UNLOCK_ERR_RET(CL_INVALID_ARG_VALUE, context,
-                         "Argument expects shared allocation but pointer is to "
-                         "USM device memory");
+          ERR_RET(CL_INVALID_ARG_VALUE, context,
+                  "Argument expects shared allocation but pointer is to "
+                  "USM device memory");
         }
       } else if (allocation_mem != kernel_arg_mem) {
-        UNLOCK_ERR_RET(CL_INVALID_ARG_VALUE, context,
-                       "Possibly incompatible interface used for device memory "
-                       "allocation.");
+        ERR_RET(CL_INVALID_ARG_VALUE, context,
+                "Possibly incompatible interface used for device memory "
+                "allocation.");
       }
     } else if (usm_alloc->type == CL_MEM_TYPE_SHARED_INTEL) {
       if (!(kernel_arg_mem->allocation_type &
             ACL_GLOBAL_MEM_SHARED_ALLOCATION)) {
         if (kernel_arg_mem->allocation_type &
             ACL_GLOBAL_MEM_DEVICE_ALLOCATION) {
-          UNLOCK_ERR_RET(CL_INVALID_ARG_VALUE, context,
-                         "Argument expects device allocation but pointer is to "
-                         "USM shared memory");
+          ERR_RET(CL_INVALID_ARG_VALUE, context,
+                  "Argument expects device allocation but pointer is to "
+                  "USM shared memory");
         } else if (kernel_arg_mem->allocation_type &
                    ACL_GLOBAL_MEM_HOST_ALLOCATION) {
           bool compatible = false;
@@ -1000,23 +986,23 @@ CL_API_ENTRY cl_int CL_API_CALL clSetKernelArgMemPointerINTEL(
             }
           }
           if (!compatible) {
-            UNLOCK_ERR_RET(CL_INVALID_ARG_VALUE, context,
-                           "Argument expects host allocation but pointer is to "
-                           "USM shared memory");
+            ERR_RET(CL_INVALID_ARG_VALUE, context,
+                    "Argument expects host allocation but pointer is to "
+                    "USM shared memory");
           }
         }
       } else if (allocation_mem != kernel_arg_mem) {
-        UNLOCK_ERR_RET(CL_INVALID_ARG_VALUE, context,
-                       "Possibly incompatible interface used for shared memory "
-                       "allocation.");
+        ERR_RET(CL_INVALID_ARG_VALUE, context,
+                "Possibly incompatible interface used for shared memory "
+                "allocation.");
       }
     } else if (usm_alloc->type == CL_MEM_TYPE_HOST_INTEL) {
       if (!(kernel_arg_mem->allocation_type & ACL_GLOBAL_MEM_HOST_ALLOCATION)) {
         if (kernel_arg_mem->allocation_type &
             ACL_GLOBAL_MEM_DEVICE_ALLOCATION) {
-          UNLOCK_ERR_RET(CL_INVALID_ARG_VALUE, context,
-                         "Argument expects device allocation but pointer is to "
-                         "USM host memory");
+          ERR_RET(CL_INVALID_ARG_VALUE, context,
+                  "Argument expects device allocation but pointer is to "
+                  "USM host memory");
         } else if (kernel_arg_mem->allocation_type &
                    ACL_GLOBAL_MEM_SHARED_ALLOCATION) {
           bool compatible = false;
@@ -1032,13 +1018,13 @@ CL_API_ENTRY cl_int CL_API_CALL clSetKernelArgMemPointerINTEL(
             }
           }
           if (!compatible) {
-            UNLOCK_ERR_RET(CL_INVALID_ARG_VALUE, context,
-                           "Argument expects shared allocation but pointer is "
-                           "to USM host memory");
+            ERR_RET(CL_INVALID_ARG_VALUE, context,
+                    "Argument expects shared allocation but pointer is "
+                    "to USM host memory");
           }
         }
       } else if (allocation_mem != kernel_arg_mem) {
-        UNLOCK_ERR_RET(
+        ERR_RET(
             CL_INVALID_ARG_VALUE, context,
             "Possibly incompatible interface used for host memory allocation.");
       }
@@ -1073,7 +1059,7 @@ CL_API_ENTRY cl_int CL_API_CALL clSetKernelArgMemPointerINTEL(
   }
   kernel->ptr_arg_vector[arg_index] = usm_alloc->range.begin;
 
-  UNLOCK_RETURN(CL_SUCCESS);
+  return CL_SUCCESS;
 }
 
 ACL_EXPORT
@@ -1083,77 +1069,72 @@ clSetKernelExecInfoIntelFPGA(cl_kernel kernel, cl_kernel_exec_info param_name,
   cl_context context;
   cl_int status = CL_SUCCESS;
   size_t iparam;
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
 
   if (!acl_kernel_is_valid(kernel)) {
-    UNLOCK_RETURN(CL_INVALID_KERNEL);
+    return CL_INVALID_KERNEL;
   }
   context = kernel->program->context;
 
   if (param_value == NULL)
-    UNLOCK_ERR_RET(CL_INVALID_VALUE, context, "param_value cannot be NULL");
+    ERR_RET(CL_INVALID_VALUE, context, "param_value cannot be NULL");
 
   switch (param_name) {
   case CL_KERNEL_EXEC_INFO_SVM_PTRS: {
     iparam = 0;
     // param_value_size must be a coefficient of sizeof(void*)
     if (param_value_size % sizeof(void *) != 0)
-      UNLOCK_ERR_RET(CL_INVALID_VALUE, context,
-                     "param_value_size is not valid");
+      ERR_RET(CL_INVALID_VALUE, context, "param_value_size is not valid");
 
     // The pointers must be valid svm pointers or svm pointers + offset into the
     // SVM region.
     for (iparam = 0; iparam < param_value_size / (sizeof(void *)); iparam++) {
       if (!acl_ptr_is_contained_in_context_svm(
               context, ((void **)param_value)[iparam])) {
-        UNLOCK_ERR_RET(CL_INVALID_VALUE, context,
-                       "param_value contains a pointer that is not contained "
-                       "in the SVM region");
+        ERR_RET(CL_INVALID_VALUE, context,
+                "param_value contains a pointer that is not contained "
+                "in the SVM region");
       }
     }
     break;
   }
   case CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM:
     if (param_value_size != sizeof(cl_bool))
-      UNLOCK_ERR_RET(CL_INVALID_VALUE, context,
-                     "param_value_size is not valid");
+      ERR_RET(CL_INVALID_VALUE, context, "param_value_size is not valid");
     // We currently don't support any fine-grain system SVM:
     if (*(cl_bool *)param_value == CL_TRUE)
-      UNLOCK_ERR_RET(CL_INVALID_OPERATION, context,
-                     "No devices in context associated with "
-                     "kernel support fine-grain system SVM allocations");
+      ERR_RET(CL_INVALID_OPERATION, context,
+              "No devices in context associated with "
+              "kernel support fine-grain system SVM allocations");
     break;
   case CL_KERNEL_EXEC_INFO_INDIRECT_HOST_ACCESS_INTEL:
     if (param_value_size != sizeof(cl_bool))
-      UNLOCK_ERR_RET(CL_INVALID_VALUE, context,
-                     "param_value_size is not valid");
+      ERR_RET(CL_INVALID_VALUE, context, "param_value_size is not valid");
 
     if (*((cl_bool *)param_value) != CL_TRUE &&
         *((cl_bool *)param_value) != CL_FALSE) {
-      UNLOCK_ERR_RET(CL_INVALID_VALUE, context,
-                     "param_value is not valid cl_bool value");
+      ERR_RET(CL_INVALID_VALUE, context,
+              "param_value is not valid cl_bool value");
     }
     break;
   case CL_KERNEL_EXEC_INFO_INDIRECT_DEVICE_ACCESS_INTEL:
     if (param_value_size != sizeof(cl_bool))
-      UNLOCK_ERR_RET(CL_INVALID_VALUE, context,
-                     "param_value_size is not valid");
+      ERR_RET(CL_INVALID_VALUE, context, "param_value_size is not valid");
 
     if (*((cl_bool *)param_value) != CL_TRUE &&
         *((cl_bool *)param_value) != CL_FALSE) {
-      UNLOCK_ERR_RET(CL_INVALID_VALUE, context,
-                     "param_value is not valid cl_bool value");
+      ERR_RET(CL_INVALID_VALUE, context,
+              "param_value is not valid cl_bool value");
     }
     break;
   case CL_KERNEL_EXEC_INFO_INDIRECT_SHARED_ACCESS_INTEL:
     if (param_value_size != sizeof(cl_bool))
-      UNLOCK_ERR_RET(CL_INVALID_VALUE, context,
-                     "param_value_size is not valid");
+      ERR_RET(CL_INVALID_VALUE, context, "param_value_size is not valid");
 
     if (*((cl_bool *)param_value) != CL_TRUE &&
         *((cl_bool *)param_value) != CL_FALSE) {
-      UNLOCK_ERR_RET(CL_INVALID_VALUE, context,
-                     "param_value is not valid cl_bool value");
+      ERR_RET(CL_INVALID_VALUE, context,
+              "param_value is not valid cl_bool value");
     }
     break;
   case CL_KERNEL_EXEC_INFO_USM_PTRS_INTEL:
@@ -1161,26 +1142,24 @@ clSetKernelExecInfoIntelFPGA(cl_kernel kernel, cl_kernel_exec_info param_name,
     kernel->ptr_hashtable.clear();
     // param_value_size must be a coefficient of sizeof(void*)
     if (param_value_size % sizeof(void *) != 0)
-      UNLOCK_ERR_RET(CL_INVALID_VALUE, context,
-                     "param_value_size is not valid");
+      ERR_RET(CL_INVALID_VALUE, context, "param_value_size is not valid");
 
     // The pointers must be valid device pointer
     for (iparam = 0; iparam < param_value_size / (sizeof(void *)); iparam++) {
       acl_usm_allocation_t *usm_alloc =
           acl_get_usm_alloc_from_ptr(context, ((void **)param_value)[iparam]);
       if (!usm_alloc) {
-        UNLOCK_ERR_RET(
-            CL_INVALID_VALUE, context,
-            "param_value contains a pointer that is not part of context");
+        ERR_RET(CL_INVALID_VALUE, context,
+                "param_value contains a pointer that is not part of context");
       }
       kernel->ptr_hashtable.insert(usm_alloc->range.begin);
     }
     break;
   default:
-    UNLOCK_ERR_RET(CL_INVALID_VALUE, context, "Invalid param_name");
+    ERR_RET(CL_INVALID_VALUE, context, "Invalid param_name");
   }
 
-  UNLOCK_RETURN(status);
+  return status;
 }
 
 ACL_EXPORT
@@ -1202,19 +1181,19 @@ CL_API_ENTRY cl_int CL_API_CALL clGetKernelArgInfoIntelFPGA(
   cl_context context;
   cl_program program;
 
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
 
   if (!acl_kernel_is_valid(kernel)) {
-    UNLOCK_RETURN(CL_INVALID_KERNEL);
+    return CL_INVALID_KERNEL;
   }
 
   program = kernel->program;
   context = program->context;
-  UNLOCK_VALIDATE_ARRAY_OUT_ARGS(param_value_size, param_value,
-                                 param_value_size_ret, context);
+  VALIDATE_ARRAY_OUT_ARGS(param_value_size, param_value, param_value_size_ret,
+                          context);
 
   if (arg_indx >= kernel->accel_def->iface.args.size())
-    UNLOCK_ERR_RET(CL_INVALID_ARG_INDEX, context, "Invalid kernel arg index.");
+    ERR_RET(CL_INVALID_ARG_INDEX, context, "Invalid kernel arg index.");
   // addr_space and type_qualifier is always available via autodiscovery, the
   // other three parameters are optionally loaded in the autodiscovery string,
   // therefore any one of the three parameter being empty infers information not
@@ -1222,15 +1201,15 @@ CL_API_ENTRY cl_int CL_API_CALL clGetKernelArgInfoIntelFPGA(
   if ((kernel->accel_def->iface.args[arg_indx].name.empty()) &&
       !(param_name == CL_KERNEL_ARG_ADDRESS_QUALIFIER ||
         param_name == CL_KERNEL_ARG_TYPE_QUALIFIER))
-    UNLOCK_ERR_RET(CL_KERNEL_ARG_INFO_NOT_AVAILABLE, context,
-                   "Kernel arg info not available.");
+    ERR_RET(CL_KERNEL_ARG_INFO_NOT_AVAILABLE, context,
+            "Kernel arg info not available.");
 
   // filtering the arguments that are added by the compiler to handle printfs.
   // In such cases, the arguments won't have any type, hence the type_name is
   // empty.
   if (!kernel->accel_def->iface.args[arg_indx].name.empty() &&
       kernel->accel_def->iface.args[arg_indx].type_name.empty())
-    UNLOCK_ERR_RET(CL_INVALID_ARG_INDEX, context, "Invalid kernel arg index.");
+    ERR_RET(CL_INVALID_ARG_INDEX, context, "Invalid kernel arg index.");
 
   RESULT_INIT;
 
@@ -1270,17 +1249,17 @@ CL_API_ENTRY cl_int CL_API_CALL clGetKernelArgInfoIntelFPGA(
     break;
 
   default:
-    UNLOCK_ERR_RET(CL_INVALID_VALUE, context, "Invalid kernel arg info query");
+    ERR_RET(CL_INVALID_VALUE, context, "Invalid kernel arg info query");
   }
 
   if (result.size == 0) {
-    UNLOCK_RETURN(CL_INVALID_VALUE);
+    return CL_INVALID_VALUE;
   } // should have already signaled
 
   if (param_value) {
     if (param_value_size < result.size) {
-      UNLOCK_ERR_RET(CL_INVALID_VALUE, context,
-                     "Parameter return buffer is too small");
+      ERR_RET(CL_INVALID_VALUE, context,
+              "Parameter return buffer is too small");
     }
     RESULT_COPY(param_value, param_value_size);
   }
@@ -1288,7 +1267,7 @@ CL_API_ENTRY cl_int CL_API_CALL clGetKernelArgInfoIntelFPGA(
   if (param_value_size_ret) {
     *param_value_size_ret = result.size;
   }
-  UNLOCK_RETURN(CL_SUCCESS);
+  return CL_SUCCESS;
 }
 
 ACL_EXPORT
@@ -1306,15 +1285,15 @@ CL_API_ENTRY cl_int CL_API_CALL clGetKernelInfoIntelFPGA(
     void *param_value, size_t *param_value_size_ret) {
   acl_result_t result;
   cl_context context;
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
 
   if (!acl_kernel_is_valid(kernel)) {
-    UNLOCK_RETURN(CL_INVALID_KERNEL);
+    return CL_INVALID_KERNEL;
   }
 
   context = kernel->program->context;
-  UNLOCK_VALIDATE_ARRAY_OUT_ARGS(param_value_size, param_value,
-                                 param_value_size_ret, context);
+  VALIDATE_ARRAY_OUT_ARGS(param_value_size, param_value, param_value_size_ret,
+                          context);
 
   RESULT_INIT;
 
@@ -1339,17 +1318,17 @@ CL_API_ENTRY cl_int CL_API_CALL clGetKernelInfoIntelFPGA(
     break;
 
   default:
-    UNLOCK_ERR_RET(CL_INVALID_VALUE, context, "Invalid kernel info query");
+    ERR_RET(CL_INVALID_VALUE, context, "Invalid kernel info query");
   }
 
   if (result.size == 0) {
-    UNLOCK_RETURN(CL_INVALID_VALUE);
+    return CL_INVALID_VALUE;
   } // should have already signaled
 
   if (param_value) {
     if (param_value_size < result.size) {
-      UNLOCK_ERR_RET(CL_INVALID_VALUE, context,
-                     "Parameter return buffer is too small");
+      ERR_RET(CL_INVALID_VALUE, context,
+              "Parameter return buffer is too small");
     }
     RESULT_COPY(param_value, param_value_size);
   }
@@ -1357,7 +1336,7 @@ CL_API_ENTRY cl_int CL_API_CALL clGetKernelInfoIntelFPGA(
   if (param_value_size_ret) {
     *param_value_size_ret = result.size;
   }
-  UNLOCK_RETURN(CL_SUCCESS);
+  return CL_SUCCESS;
 }
 
 ACL_EXPORT
@@ -1376,10 +1355,10 @@ CL_API_ENTRY cl_int CL_API_CALL clGetKernelWorkGroupInfoIntelFPGA(
     size_t param_value_size, void *param_value, size_t *param_value_size_ret) {
   acl_result_t result;
   cl_context context;
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
 
   if (!acl_kernel_is_valid(kernel)) {
-    UNLOCK_RETURN(CL_INVALID_KERNEL);
+    return CL_INVALID_KERNEL;
   }
 
   context = kernel->program->context;
@@ -1396,22 +1375,21 @@ CL_API_ENTRY cl_int CL_API_CALL clGetKernelWorkGroupInfoIntelFPGA(
       }
     }
     if (!matched) {
-      UNLOCK_ERR_RET(CL_INVALID_DEVICE, context,
-                     "Kernel program is not built for the specified device");
+      ERR_RET(CL_INVALID_DEVICE, context,
+              "Kernel program is not built for the specified device");
     }
   } else {
     // Must only be one device for this kernel.
     if (kernel->program->num_devices != 1) {
-      UNLOCK_ERR_RET(
-          CL_INVALID_DEVICE, context,
-          "Device is not specified, but kernel is not built for a unique "
-          "device");
+      ERR_RET(CL_INVALID_DEVICE, context,
+              "Device is not specified, but kernel is not built for a unique "
+              "device");
     }
     device = kernel->program->device[0];
   }
 
-  UNLOCK_VALIDATE_ARRAY_OUT_ARGS(param_value_size, param_value,
-                                 param_value_size_ret, context);
+  VALIDATE_ARRAY_OUT_ARGS(param_value_size, param_value, param_value_size_ret,
+                          context);
 
   RESULT_INIT;
 
@@ -1425,7 +1403,7 @@ CL_API_ENTRY cl_int CL_API_CALL clGetKernelWorkGroupInfoIntelFPGA(
                      acl_platform.max_work_item_sizes);
       break;
     } else {
-      UNLOCK_RETURN(CL_INVALID_VALUE);
+      return CL_INVALID_VALUE;
     }
   case CL_KERNEL_WORK_GROUP_SIZE:
     RESULT_SIZE_T(kernel->accel_def->max_work_group_size);
@@ -1445,17 +1423,17 @@ CL_API_ENTRY cl_int CL_API_CALL clGetKernelWorkGroupInfoIntelFPGA(
     RESULT_ULONG(0);
     break;
   default:
-    UNLOCK_ERR_RET(CL_INVALID_VALUE, context, "Invalid kernel info query");
+    ERR_RET(CL_INVALID_VALUE, context, "Invalid kernel info query");
   }
 
   if (result.size == 0) {
-    UNLOCK_RETURN(CL_INVALID_VALUE);
+    return CL_INVALID_VALUE;
   } // already signalled.
 
   if (param_value) {
     if (param_value_size < result.size) {
-      UNLOCK_ERR_RET(CL_INVALID_VALUE, context,
-                     "Parameter return buffer is too small");
+      ERR_RET(CL_INVALID_VALUE, context,
+              "Parameter return buffer is too small");
     }
     RESULT_COPY(param_value, param_value_size);
   }
@@ -1463,7 +1441,7 @@ CL_API_ENTRY cl_int CL_API_CALL clGetKernelWorkGroupInfoIntelFPGA(
   if (param_value_size_ret) {
     *param_value_size_ret = result.size;
   }
-  UNLOCK_RETURN(CL_SUCCESS);
+  return CL_SUCCESS;
 }
 
 ACL_EXPORT
@@ -1481,10 +1459,10 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueNativeKernelIntelFPGA(
     size_t cb_args, cl_uint num_mem_objects, const cl_mem *mem_list,
     const void **args_mem_loc, cl_uint num_events_in_wait_list,
     const cl_event *event_wait_list, cl_event *event) {
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
 
   if (!acl_command_queue_is_valid(command_queue)) {
-    UNLOCK_RETURN(CL_INVALID_COMMAND_QUEUE);
+    return CL_INVALID_COMMAND_QUEUE;
   }
 
   // Avoid warnings
@@ -1500,8 +1478,8 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueNativeKernelIntelFPGA(
   event = event;
 
   // We don't support native kernels.
-  UNLOCK_ERR_RET(CL_INVALID_OPERATION, command_queue->context,
-                 "Native kernels are not supported.");
+  ERR_RET(CL_INVALID_OPERATION, command_queue->context,
+          "Native kernels are not supported.");
 }
 
 ACL_EXPORT
@@ -1523,7 +1501,7 @@ clEnqueueTaskIntelFPGA(cl_command_queue command_queue, cl_kernel kernel,
   size_t task_global_work_size = 1;
   size_t task_local_work_size = 1;
   cl_int ret;
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
 
   ret = l_enqueue_kernel_with_type(
       command_queue, kernel,
@@ -1531,7 +1509,7 @@ clEnqueueTaskIntelFPGA(cl_command_queue command_queue, cl_kernel kernel,
       0, // global work offset
       &task_global_work_size, &task_local_work_size, num_events_in_wait_list,
       event_wait_list, event, CL_COMMAND_TASK);
-  UNLOCK_RETURN(ret);
+  return ret;
 }
 
 ACL_EXPORT
@@ -1551,14 +1529,14 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueNDRangeKernelIntelFPGA(
     const size_t *local_work_size, cl_uint num_events_in_wait_list,
     const cl_event *event_wait_list, cl_event *event) {
   cl_int ret;
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
 
   ret = l_enqueue_kernel_with_type(
       command_queue, kernel, work_dim, global_work_offset, global_work_size,
       local_work_size, num_events_in_wait_list, event_wait_list, event,
       CL_COMMAND_NDRANGE_KERNEL);
 
-  UNLOCK_RETURN(ret);
+  return ret;
 }
 
 ACL_EXPORT
@@ -1577,13 +1555,13 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueNDRangeKernel(
 ACL_EXPORT
 CL_API_ENTRY cl_int CL_API_CALL clResetKernelsIntelFPGA(
     cl_context context, cl_uint num_devices, const cl_device_id *device_list) {
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
 
   if (!acl_context_is_valid(context))
-    UNLOCK_RETURN(CL_INVALID_CONTEXT);
+    return CL_INVALID_CONTEXT;
   if (num_devices == 0 && device_list != NULL)
-    UNLOCK_ERR_RET(CL_INVALID_VALUE, context,
-                   "num_devices is 0 while device list is not NULL");
+    ERR_RET(CL_INVALID_VALUE, context,
+            "num_devices is 0 while device list is not NULL");
   if (device_list) {
     // The supplied devices must be associated with the context.
     cl_uint idev, ictxdev;
@@ -1596,8 +1574,8 @@ CL_API_ENTRY cl_int CL_API_CALL clResetKernelsIntelFPGA(
         saw_it = (context->device[ictxdev] == device_list[idev]);
       }
       if (!saw_it) {
-        UNLOCK_ERR_RET(CL_INVALID_DEVICE, context,
-                       "A specified device is not associated with the context");
+        ERR_RET(CL_INVALID_DEVICE, context,
+                "A specified device is not associated with the context");
       }
     }
     // Ok, each device is associated with the context.
@@ -1618,7 +1596,7 @@ CL_API_ENTRY cl_int CL_API_CALL clResetKernelsIntelFPGA(
   }
   acl_idle_update(context); // nudge the scheduler to take care of the rest.
 
-  UNLOCK_RETURN(CL_SUCCESS);
+  return CL_SUCCESS;
 }
 
 //////////////////////////////
@@ -3195,12 +3173,12 @@ void acl_receive_kernel_update(int activation_id, cl_int status) {
   // signal handler, which can't lock mutexes, so we don't lock in that case.
   // All functions called from this one therefore have to use
   // acl_assert_locked_or_sig() instead of just acl_assert_locked().
+  std::unique_lock lock{acl_mutex_wrapper, std::defer_lock};
   if (!acl_is_inside_sig()) {
-    acl_lock();
+    lock.lock();
   }
 
   if (activation_id >= 0 && activation_id < doq->max_ops) {
-
     // This address is stable, given a fixed activation_id.
     // So we don't run into race conditions.
     acl_device_op_t *op = doq->op + activation_id;
@@ -3219,10 +3197,6 @@ void acl_receive_kernel_update(int activation_id, cl_int status) {
     // Signal all waiters.
     acl_signal_device_update();
   }
-
-  if (!acl_is_inside_sig()) {
-    acl_unlock();
-  }
 }
 
 // The kernel invocation has completed.
diff --git a/src/acl_mem.cpp b/src/acl_mem.cpp
index c9287536..00e0f4af 100644
--- a/src/acl_mem.cpp
+++ b/src/acl_mem.cpp
@@ -118,7 +118,7 @@ void CL_CALLBACK acl_free_allocation_after_event_completion(
   event_command_exec_status =
       event_command_exec_status; // Avoiding Windows warning.
   event = event;
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
   if (callback_ptrs[0]) {
     acl_mem_aligned_free(event->context, (acl_aligned_ptr_t *)callback_ptrs[0]);
     acl_free(callback_ptrs[0]);
@@ -126,7 +126,6 @@ void CL_CALLBACK acl_free_allocation_after_event_completion(
   if (callback_ptrs[1])
     clReleaseEvent(((cl_event)callback_ptrs[1]));
   acl_free(callback_data);
-  acl_unlock();
 }
 
 ACL_DEFINE_CL_OBJECT_ALLOC_FUNCTIONS(cl_mem);
@@ -136,17 +135,17 @@ ACL_DEFINE_CL_OBJECT_ALLOC_FUNCTIONS(cl_mem);
 
 ACL_EXPORT
 CL_API_ENTRY cl_int CL_API_CALL clRetainMemObjectIntelFPGA(cl_mem mem) {
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
 
   if (!acl_mem_is_valid(mem)) {
-    UNLOCK_RETURN(CL_INVALID_MEM_OBJECT);
+    return CL_INVALID_MEM_OBJECT;
   }
 
   acl_retain(mem);
 
   acl_print_debug_msg("Retain  mem[%p] now %u\n", mem, acl_ref_count(mem));
 
-  UNLOCK_RETURN(CL_SUCCESS);
+  return CL_SUCCESS;
 }
 
 ACL_EXPORT
@@ -156,13 +155,13 @@ CL_API_ENTRY cl_int CL_API_CALL clRetainMemObject(cl_mem mem) {
 
 ACL_EXPORT
 CL_API_ENTRY cl_int CL_API_CALL clReleaseMemObjectIntelFPGA(cl_mem mem) {
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
 
   // In the double-free case, we'll error out here, for two reasons:
   // 1) the reference count will be 0.
   // 1) mem->region == 0
   if (!acl_mem_is_valid(mem)) {
-    UNLOCK_RETURN(CL_INVALID_MEM_OBJECT);
+    return CL_INVALID_MEM_OBJECT;
   }
 
   acl_release(mem);
@@ -249,7 +248,7 @@ CL_API_ENTRY cl_int CL_API_CALL clReleaseMemObjectIntelFPGA(cl_mem mem) {
     clReleaseContext(context);
   }
 
-  UNLOCK_RETURN(CL_SUCCESS);
+  return CL_SUCCESS;
 }
 
 ACL_EXPORT
@@ -325,19 +324,19 @@ CL_API_ENTRY cl_int clSetMemObjectDestructorCallbackIntelFPGA(
     void(CL_CALLBACK *pfn_notify)(cl_mem memobj, void *user_data),
     void *user_data) {
   acl_mem_destructor_user_callback *cb;
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
   if (!acl_mem_is_valid(memobj)) {
-    UNLOCK_RETURN(CL_INVALID_MEM_OBJECT);
+    return CL_INVALID_MEM_OBJECT;
   }
 
   if (pfn_notify == NULL) {
-    UNLOCK_RETURN(CL_INVALID_VALUE);
+    return CL_INVALID_VALUE;
   }
 
   cb = (acl_mem_destructor_user_callback *)acl_malloc(
       sizeof(acl_mem_destructor_user_callback));
   if (!cb)
-    UNLOCK_RETURN(CL_OUT_OF_HOST_MEMORY);
+    return CL_OUT_OF_HOST_MEMORY;
 
   // Push to the front of the list.
   cb->notify_user_data = user_data;
@@ -345,7 +344,7 @@ CL_API_ENTRY cl_int clSetMemObjectDestructorCallbackIntelFPGA(
   cb->next = memobj->destructor_callback_list;
   memobj->destructor_callback_list = cb;
 
-  UNLOCK_RETURN(CL_SUCCESS);
+  return CL_SUCCESS;
 }
 
 ACL_EXPORT
@@ -421,7 +420,7 @@ CL_API_ENTRY cl_mem clCreateBufferWithPropertiesINTEL(
   unsigned int idevice;
   cl_uint bank_id = 0;
   cl_uint tmp_mem_id = 0;
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
 
 #ifdef MEM_DEBUG_MSG
   printf("CreateBuffer\n");
@@ -431,8 +430,8 @@ CL_API_ENTRY cl_mem clCreateBufferWithPropertiesINTEL(
     switch (*properties) {
     case CL_MEM_CHANNEL_INTEL: {
       if (flags & CL_CHANNEL_7_INTELFPGA) {
-        UNLOCK_BAIL_INFO(CL_INVALID_DEVICE, context,
-                         "Both channel flag and channel property are set");
+        BAIL_INFO(CL_INVALID_DEVICE, context,
+                  "Both channel flag and channel property are set");
       }
       bank_id = (cl_uint) * (properties + 1);
     } break;
@@ -440,7 +439,7 @@ CL_API_ENTRY cl_mem clCreateBufferWithPropertiesINTEL(
       tmp_mem_id = (cl_uint) * (properties + 1);
     } break;
     default: {
-      UNLOCK_BAIL_INFO(CL_INVALID_DEVICE, context, "Invalid properties");
+      BAIL_INFO(CL_INVALID_DEVICE, context, "Invalid properties");
     }
     }
     properties += 2;
@@ -448,11 +447,10 @@ CL_API_ENTRY cl_mem clCreateBufferWithPropertiesINTEL(
 
 #ifndef REMOVE_VALID_CHECKS
   if (!acl_context_is_valid(context))
-    UNLOCK_BAIL(CL_INVALID_CONTEXT);
+    BAIL(CL_INVALID_CONTEXT);
 
   if (bank_id > 7) {
-    UNLOCK_BAIL_INFO(CL_INVALID_VALUE, context,
-                     "Invalid channel property value");
+    BAIL_INFO(CL_INVALID_VALUE, context, "Invalid channel property value");
   }
 
   // Check flags
@@ -463,8 +461,7 @@ CL_API_ENTRY cl_mem clCreateBufferWithPropertiesINTEL(
                   CL_MEM_COPY_HOST_PTR | CL_MEM_HOST_WRITE_ONLY |
                   CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_NO_ACCESS |
                   CL_CHANNEL_7_INTELFPGA | CL_MEM_HETEROGENEOUS_INTELFPGA)) {
-      UNLOCK_BAIL_INFO(CL_INVALID_VALUE, context,
-                       "Invalid or unsupported flags");
+      BAIL_INFO(CL_INVALID_VALUE, context, "Invalid or unsupported flags");
     }
 
     {
@@ -478,8 +475,8 @@ CL_API_ENTRY cl_mem clCreateBufferWithPropertiesINTEL(
         num_rw_specs++;
       // Default to CL_MEM_READ_WRITE.
       if (num_rw_specs > 1) {
-        UNLOCK_BAIL_INFO(CL_INVALID_VALUE, context,
-                         "More than one read/write flag is specified");
+        BAIL_INFO(CL_INVALID_VALUE, context,
+                  "More than one read/write flag is specified");
       }
       if (num_rw_specs == 0)
         flags |= CL_MEM_READ_WRITE;
@@ -493,7 +490,7 @@ CL_API_ENTRY cl_mem clCreateBufferWithPropertiesINTEL(
       if (flags & CL_MEM_HOST_NO_ACCESS)
         num_rw_specs++;
       if (num_rw_specs > 1) {
-        UNLOCK_BAIL_INFO(
+        BAIL_INFO(
             CL_INVALID_VALUE, context,
             "More than one host read/write/no_access flags are specified");
       }
@@ -501,65 +498,60 @@ CL_API_ENTRY cl_mem clCreateBufferWithPropertiesINTEL(
 
     // Check exclusion between use-host-ptr and others
     if ((flags & CL_MEM_USE_HOST_PTR) && (flags & CL_MEM_ALLOC_HOST_PTR)) {
-      UNLOCK_BAIL_INFO(
-          CL_INVALID_VALUE, context,
-          "Flags CL_MEM_USE_HOST_PTR and CL_MEM_ALLOC_HOST_PTR are both "
-          "specified but are mutually exclusive");
+      BAIL_INFO(CL_INVALID_VALUE, context,
+                "Flags CL_MEM_USE_HOST_PTR and CL_MEM_ALLOC_HOST_PTR are both "
+                "specified but are mutually exclusive");
     }
     if ((flags & CL_MEM_USE_HOST_PTR) && (flags & CL_MEM_COPY_HOST_PTR)) {
-      UNLOCK_BAIL_INFO(
-          CL_INVALID_VALUE, context,
-          "Flags CL_MEM_USE_HOST_PTR and CL_MEM_COPY_HOST_PTR are both "
-          "specified but are mutually exclusive");
+      BAIL_INFO(CL_INVALID_VALUE, context,
+                "Flags CL_MEM_USE_HOST_PTR and CL_MEM_COPY_HOST_PTR are both "
+                "specified but are mutually exclusive");
     }
   }
 
   // Check host_ptr
   if (host_ptr == 0 && (flags & CL_MEM_USE_HOST_PTR)) {
-    UNLOCK_BAIL_INFO(
-        CL_INVALID_HOST_PTR, context,
-        "Flag CL_MEM_USE_HOST_PTR is specified, but no host pointer is "
-        "provided");
+    BAIL_INFO(CL_INVALID_HOST_PTR, context,
+              "Flag CL_MEM_USE_HOST_PTR is specified, but no host pointer is "
+              "provided");
   }
   if (host_ptr == 0 && (flags & CL_MEM_COPY_HOST_PTR)) {
-    UNLOCK_BAIL_INFO(
-        CL_INVALID_HOST_PTR, context,
-        "Flag CL_MEM_COPY_HOST_PTR is specified, but no host pointer is "
-        "provided");
+    BAIL_INFO(CL_INVALID_HOST_PTR, context,
+              "Flag CL_MEM_COPY_HOST_PTR is specified, but no host pointer is "
+              "provided");
   }
   if (host_ptr != 0 &&
       !(flags & (CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR))) {
-    UNLOCK_BAIL_INFO(
-        CL_INVALID_HOST_PTR, context,
-        "A host pointer is provided without also specifying one of "
-        "CL_MEM_USE_HOST_PTR or CL_MEM_COPY_HOST_PTR");
+    BAIL_INFO(CL_INVALID_HOST_PTR, context,
+              "A host pointer is provided without also specifying one of "
+              "CL_MEM_USE_HOST_PTR or CL_MEM_COPY_HOST_PTR");
   }
 
   // Check size
   if (size == 0) {
-    UNLOCK_BAIL_INFO(CL_INVALID_BUFFER_SIZE, context,
-                     "Memory buffer cannot be of size zero");
+    BAIL_INFO(CL_INVALID_BUFFER_SIZE, context,
+              "Memory buffer cannot be of size zero");
   }
   // If using host memory, then just accept any size.
   if (!(flags & CL_MEM_USE_HOST_PTR) && (size > context->max_mem_alloc_size)) {
-    UNLOCK_BAIL_INFO(CL_INVALID_BUFFER_SIZE, context,
-                     "Requested memory object size exceeds device limits");
+    BAIL_INFO(CL_INVALID_BUFFER_SIZE, context,
+              "Requested memory object size exceeds device limits");
   }
 
 #endif
 
   auto *new_block = acl_new<acl_block_allocation_t>();
   if (!new_block) {
-    UNLOCK_BAIL_INFO(CL_OUT_OF_HOST_MEMORY, context,
-                     "Could not allocate a cl_mem object");
+    BAIL_INFO(CL_OUT_OF_HOST_MEMORY, context,
+              "Could not allocate a cl_mem object");
   }
 
   // Now actually allocate the mem object.
   mem = acl_alloc_cl_mem();
   if (!mem) {
     acl_delete(new_block);
-    UNLOCK_BAIL_INFO(CL_OUT_OF_HOST_MEMORY, context,
-                     "Could not allocate a cl_mem object");
+    BAIL_INFO(CL_OUT_OF_HOST_MEMORY, context,
+              "Could not allocate a cl_mem object");
   }
   mem->mem_id = tmp_mem_id;
 
@@ -633,11 +625,10 @@ CL_API_ENTRY cl_mem clCreateBufferWithPropertiesINTEL(
              context_has_device_with_physical_mem) {
     acl_delete(mem->block_allocation);
     acl_free_cl_mem(mem);
-    UNLOCK_BAIL_INFO(
-        CL_MEM_OBJECT_ALLOCATION_FAILURE, context,
-        "Detected devices with only SVM and on-board memory in the same "
-        "context. Altera does not currently support this combination and "
-        "cannot allocate requested memory object.");
+    BAIL_INFO(CL_MEM_OBJECT_ALLOCATION_FAILURE, context,
+              "Detected devices with only SVM and on-board memory in the same "
+              "context. Altera does not currently support this combination and "
+              "cannot allocate requested memory object.");
   } else {
     mem->is_svm = CL_FALSE;
   }
@@ -659,7 +650,7 @@ CL_API_ENTRY cl_mem clCreateBufferWithPropertiesINTEL(
         ptr.size = size;
         mem->host_mem = ptr;
       } else {
-        UNLOCK_BAIL_INFO(
+        BAIL_INFO(
             CL_INVALID_HOST_PTR, context,
             "On a system that only supports SVM and does not support "
             "fine-grained system SVM, "
@@ -743,8 +734,8 @@ CL_API_ENTRY cl_mem clCreateBufferWithPropertiesINTEL(
 
     if (mem->host_mem.raw == 0) {
       acl_free_cl_mem(mem);
-      UNLOCK_BAIL_INFO(CL_OUT_OF_HOST_MEMORY, context,
-                       "Could not allocate a buffer in host memory");
+      BAIL_INFO(CL_OUT_OF_HOST_MEMORY, context,
+                "Could not allocate a buffer in host memory");
     }
     mem->block_allocation->range.begin = mem->host_mem.aligned_ptr;
     mem->block_allocation->range.next =
@@ -784,9 +775,8 @@ CL_API_ENTRY cl_mem clCreateBufferWithPropertiesINTEL(
       if (mem->host_mem.raw == 0) {
         acl_delete(mem->block_allocation);
         acl_free_cl_mem(mem);
-        UNLOCK_BAIL_INFO(
-            CL_OUT_OF_HOST_MEMORY, context,
-            "Could not allocate backing store for a device buffer");
+        BAIL_INFO(CL_OUT_OF_HOST_MEMORY, context,
+                  "Could not allocate backing store for a device buffer");
       }
     }
 
@@ -829,10 +819,9 @@ CL_API_ENTRY cl_mem clCreateBufferWithPropertiesINTEL(
                         : CL_OUT_OF_RESOURCES;
       acl_delete(mem->block_allocation);
       acl_free_cl_mem(mem);
-      UNLOCK_BAIL_INFO(
-          code, context,
-          "Could not allocate a buffer of the specified size due to "
-          "fragmentation or exhaustion");
+      BAIL_INFO(code, context,
+                "Could not allocate a buffer of the specified size due to "
+                "fragmentation or exhaustion");
     }
   }
 
@@ -937,8 +926,8 @@ CL_API_ENTRY cl_mem clCreateBufferWithPropertiesINTEL(
         acl_delete(mem->block_allocation);
         acl_free_cl_mem(mem);
         // Need an error status valid to return from this function
-        UNLOCK_BAIL_INFO(CL_OUT_OF_RESOURCES, context,
-                         "Could not copy data into the allocated buffer");
+        BAIL_INFO(CL_OUT_OF_RESOURCES, context,
+                  "Could not copy data into the allocated buffer");
       }
     }
   }
@@ -953,7 +942,7 @@ CL_API_ENTRY cl_mem clCreateBufferWithPropertiesINTEL(
   printf("CreateBuffer Finished:  %zx\n", (size_t)result);
 #endif
 
-  UNLOCK_RETURN(result);
+  return result;
 }
 
 // Create a buffer.
@@ -1003,18 +992,18 @@ CL_API_ENTRY cl_mem CL_API_CALL clCreateSubBufferIntelFPGA(
   cl_mem mem;
   int num_rw_specs = 0;
 
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
 
 #ifdef MEM_DEBUG_MSG
   printf("CreateSubBuffer");
 #endif
 
   if (!acl_mem_is_valid(buffer)) {
-    UNLOCK_BAIL(CL_INVALID_MEM_OBJECT);
+    BAIL(CL_INVALID_MEM_OBJECT);
   }
   if (buffer->mem_object_type != CL_MEM_OBJECT_BUFFER ||
       buffer->fields.buffer_objs.is_subbuffer) {
-    UNLOCK_BAIL(CL_INVALID_MEM_OBJECT);
+    BAIL(CL_INVALID_MEM_OBJECT);
   }
   context = buffer->context;
 
@@ -1025,7 +1014,7 @@ CL_API_ENTRY cl_mem CL_API_CALL clCreateSubBufferIntelFPGA(
       ~(CL_MEM_READ_WRITE | CL_MEM_READ_ONLY | CL_MEM_WRITE_ONLY |
         CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_NO_ACCESS |
         CL_CHANNEL_7_INTELFPGA | CL_MEM_HETEROGENEOUS_INTELFPGA)) {
-    UNLOCK_BAIL_INFO(CL_INVALID_VALUE, context, "Invalid or unsupported flags");
+    BAIL_INFO(CL_INVALID_VALUE, context, "Invalid or unsupported flags");
   }
 
   // Check for exactly one read/write spec
@@ -1036,8 +1025,8 @@ CL_API_ENTRY cl_mem CL_API_CALL clCreateSubBufferIntelFPGA(
   if (flags & CL_MEM_WRITE_ONLY)
     num_rw_specs++;
   if (num_rw_specs > 1) {
-    UNLOCK_BAIL_INFO(CL_INVALID_VALUE, context,
-                     "More than one read/write flag is specified");
+    BAIL_INFO(CL_INVALID_VALUE, context,
+              "More than one read/write flag is specified");
   }
 
   // Check for exactly one host read/write/no_access spec
@@ -1049,9 +1038,8 @@ CL_API_ENTRY cl_mem CL_API_CALL clCreateSubBufferIntelFPGA(
   if (flags & CL_MEM_HOST_NO_ACCESS)
     num_rw_specs++;
   if (num_rw_specs > 1) {
-    UNLOCK_BAIL_INFO(
-        CL_INVALID_VALUE, context,
-        "More than one host read/write/no_access flags are specified");
+    BAIL_INFO(CL_INVALID_VALUE, context,
+              "More than one host read/write/no_access flags are specified");
   }
 
   // If the parent buffer is write only then the sub-buffer cannot read.
@@ -1066,30 +1054,27 @@ CL_API_ENTRY cl_mem CL_API_CALL clCreateSubBufferIntelFPGA(
        (flags & CL_MEM_HOST_WRITE_ONLY)) ||
       ((buffer->flags & CL_MEM_HOST_NO_ACCESS) &&
        ((flags & CL_MEM_HOST_READ_ONLY) || (flags & CL_MEM_HOST_WRITE_ONLY)))) {
-    UNLOCK_BAIL_INFO(
-        CL_INVALID_VALUE, context,
-        "Read/write flags are incompatible with the parent buffer");
+    BAIL_INFO(CL_INVALID_VALUE, context,
+              "Read/write flags are incompatible with the parent buffer");
   }
 
   if (buffer_create_type != CL_BUFFER_CREATE_TYPE_REGION) {
-    UNLOCK_BAIL_INFO(CL_INVALID_VALUE, context,
-                     "Invalid buffer_create_type value");
+    BAIL_INFO(CL_INVALID_VALUE, context, "Invalid buffer_create_type value");
   }
 
   if (buffer_create_info == NULL) {
-    UNLOCK_BAIL_INFO(CL_INVALID_VALUE, context, "Empty buffer_create_info");
+    BAIL_INFO(CL_INVALID_VALUE, context, "Empty buffer_create_info");
   }
 
   if (((cl_buffer_region *)buffer_create_info)->origin +
           ((cl_buffer_region *)buffer_create_info)->size >
       buffer->size) {
-    UNLOCK_BAIL_INFO(CL_INVALID_VALUE, context,
-                     "Origin plus size is out of bounds of parent buffer");
+    BAIL_INFO(CL_INVALID_VALUE, context,
+              "Origin plus size is out of bounds of parent buffer");
   }
 
   if (((cl_buffer_region *)buffer_create_info)->size == 0) {
-    UNLOCK_BAIL_INFO(CL_INVALID_BUFFER_SIZE, context,
-                     "Sub-buffer size is zero");
+    BAIL_INFO(CL_INVALID_BUFFER_SIZE, context, "Sub-buffer size is zero");
   }
 
   for (idevice = 0; idevice < context->num_devices; ++idevice) {
@@ -1100,7 +1085,7 @@ CL_API_ENTRY cl_mem CL_API_CALL clCreateSubBufferIntelFPGA(
         context->device[idevice], CL_DEVICE_MEM_BASE_ADDR_ALIGN, sizeof(int),
         &device_mem_base_addr_align, NULL);
     if (status_code != CL_SUCCESS) {
-      UNLOCK_BAIL(CL_OUT_OF_HOST_MEMORY);
+      BAIL(CL_OUT_OF_HOST_MEMORY);
     }
 
     if (!((((cl_buffer_region *)buffer_create_info)->origin * 8) &
@@ -1111,22 +1096,21 @@ CL_API_ENTRY cl_mem CL_API_CALL clCreateSubBufferIntelFPGA(
   }
 
   if (!aligns_with_any_device) {
-    UNLOCK_BAIL_INFO(
-        CL_MISALIGNED_SUB_BUFFER_OFFSET, context,
-        "Sub-buffer offset does not align with any device in context");
+    BAIL_INFO(CL_MISALIGNED_SUB_BUFFER_OFFSET, context,
+              "Sub-buffer offset does not align with any device in context");
   }
 
   acl_block_allocation_t *new_block = acl_new<acl_block_allocation_t>();
   if (!new_block) {
-    UNLOCK_BAIL_INFO(CL_OUT_OF_HOST_MEMORY, context,
-                     "Could not allocate a cl_mem object");
+    BAIL_INFO(CL_OUT_OF_HOST_MEMORY, context,
+              "Could not allocate a cl_mem object");
   }
   // Now actually allocate the mem object.
   mem = acl_alloc_cl_mem();
   if (!mem) {
     acl_delete(new_block);
-    UNLOCK_BAIL_INFO(CL_OUT_OF_HOST_MEMORY, context,
-                     "Could not allocate a cl_mem object");
+    BAIL_INFO(CL_OUT_OF_HOST_MEMORY, context,
+              "Could not allocate a cl_mem object");
   }
 
   mem->block_allocation = new_block;
@@ -1176,7 +1160,7 @@ CL_API_ENTRY cl_mem CL_API_CALL clCreateSubBufferIntelFPGA(
       buffer->host_mem = acl_mem_aligned_malloc(buffer->size);
       if (!buffer->host_mem.raw) {
         acl_free_cl_mem(mem);
-        UNLOCK_BAIL_INFO(
+        BAIL_INFO(
             CL_OUT_OF_HOST_MEMORY, context,
             "Could not allocate backing store for a device buffer with sub "
             "buffers");
@@ -1275,9 +1259,8 @@ CL_API_ENTRY cl_mem CL_API_CALL clCreateSubBufferIntelFPGA(
         cl_uint sub_bank_id = ((cl_uint)sub_flags & CL_CHANNEL_7_INTELFPGA) /
                               CL_CHANNEL_1_INTELFPGA;
         if (sub_bank_id != buffer->bank_id) {
-          UNLOCK_BAIL_INFO(
-              CL_INVALID_VALUE, context,
-              "Sub-buffer bank id does not match parent buffer bank id");
+          BAIL_INFO(CL_INVALID_VALUE, context,
+                    "Sub-buffer bank id does not match parent buffer bank id");
         }
       }
     }
@@ -1305,10 +1288,9 @@ CL_API_ENTRY cl_mem CL_API_CALL clCreateSubBufferIntelFPGA(
                         ? CL_OUT_OF_HOST_MEMORY
                         : CL_OUT_OF_RESOURCES;
       acl_free_cl_mem(mem);
-      UNLOCK_BAIL_INFO(
-          code, context,
-          "Could not allocate a buffer of the specified size due to "
-          "fragmentation or exhaustion");
+      BAIL_INFO(code, context,
+                "Could not allocate a buffer of the specified size due to "
+                "fragmentation or exhaustion");
     }
   }
 
@@ -1330,9 +1312,8 @@ CL_API_ENTRY cl_mem CL_API_CALL clCreateSubBufferIntelFPGA(
       cl_uint sub_bank_id = ((cl_uint)sub_flags & CL_CHANNEL_7_INTELFPGA) /
                             CL_CHANNEL_1_INTELFPGA;
       if (sub_bank_id != buffer->bank_id) {
-        UNLOCK_BAIL_INFO(
-            CL_INVALID_VALUE, context,
-            "Sub-buffer bank id does not match parent buffer bank id");
+        BAIL_INFO(CL_INVALID_VALUE, context,
+                  "Sub-buffer bank id does not match parent buffer bank id");
       }
     }
   }
@@ -1352,7 +1333,7 @@ CL_API_ENTRY cl_mem CL_API_CALL clCreateSubBufferIntelFPGA(
   printf(" %zx\n", (size_t)result);
 #endif
 
-  UNLOCK_RETURN(result);
+  return result;
 }
 
 ACL_EXPORT
@@ -1369,12 +1350,12 @@ CL_API_ENTRY cl_int CL_API_CALL clGetMemObjectInfoIntelFPGA(
     void *param_value, size_t *param_value_size_ret) {
   acl_result_t result;
   cl_context context;
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
 
   RESULT_INIT;
 
   if (!acl_mem_is_valid(mem)) {
-    UNLOCK_RETURN(CL_INVALID_MEM_OBJECT);
+    return CL_INVALID_MEM_OBJECT;
   }
 
   context = mem->context;
@@ -1454,14 +1435,14 @@ CL_API_ENTRY cl_int CL_API_CALL clGetMemObjectInfoIntelFPGA(
   }
 
   if (result.size == 0) {
-    UNLOCK_ERR_RET(CL_INVALID_VALUE, context,
-                   "Invalid or unsupported memory object query");
+    ERR_RET(CL_INVALID_VALUE, context,
+            "Invalid or unsupported memory object query");
   }
 
   if (param_value) {
     if (param_value_size < result.size) {
-      UNLOCK_ERR_RET(CL_INVALID_VALUE, context,
-                     "Parameter return buffer is too small");
+      ERR_RET(CL_INVALID_VALUE, context,
+              "Parameter return buffer is too small");
     }
     RESULT_COPY(param_value, param_value_size);
   }
@@ -1469,7 +1450,7 @@ CL_API_ENTRY cl_int CL_API_CALL clGetMemObjectInfoIntelFPGA(
   if (param_value_size_ret) {
     *param_value_size_ret = result.size;
   }
-  UNLOCK_RETURN(CL_SUCCESS);
+  return CL_SUCCESS;
 }
 
 ACL_EXPORT
@@ -1497,10 +1478,10 @@ ACL_EXPORT CL_API_ENTRY cl_mem CL_API_CALL clCreateImageIntelFPGA(
   unsigned iformat;
   cl_bool found_image_format;
   unsigned int idevice;
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
 
   if (!acl_context_is_valid(context)) {
-    UNLOCK_BAIL(CL_INVALID_CONTEXT);
+    BAIL(CL_INVALID_CONTEXT);
   }
 
   // Check the maximum image sizes for all available devices in the context
@@ -1530,20 +1511,20 @@ ACL_EXPORT CL_API_ENTRY cl_mem CL_API_CALL clCreateImageIntelFPGA(
   }
 
   if (image_format == NULL) {
-    UNLOCK_BAIL_INFO(CL_INVALID_IMAGE_FORMAT_DESCRIPTOR, context,
-                     "image_format is NULL");
+    BAIL_INFO(CL_INVALID_IMAGE_FORMAT_DESCRIPTOR, context,
+              "image_format is NULL");
   }
 
   element_size =
       acl_get_image_element_size(context, image_format, &local_errcode_ret);
   if (local_errcode_ret != CL_SUCCESS) {
-    UNLOCK_BAIL(local_errcode_ret);
+    BAIL(local_errcode_ret);
   }
 
   local_errcode_ret = clGetSupportedImageFormats(
       context, flags, image_desc->image_type, 0, NULL, &num_image_formats);
   if (local_errcode_ret != CL_SUCCESS) {
-    UNLOCK_BAIL(local_errcode_ret);
+    BAIL(local_errcode_ret);
   }
   supported_image_formats = (cl_image_format *)acl_malloc(
       sizeof(cl_image_format) * num_image_formats);
@@ -1565,11 +1546,11 @@ ACL_EXPORT CL_API_ENTRY cl_mem CL_API_CALL clCreateImageIntelFPGA(
   acl_free(supported_image_formats);
 
   if (local_errcode_ret != CL_SUCCESS) {
-    UNLOCK_BAIL(local_errcode_ret);
+    BAIL(local_errcode_ret);
   }
   if (!found_image_format) {
-    UNLOCK_BAIL_INFO(CL_IMAGE_FORMAT_NOT_SUPPORTED, context,
-                     "Unsupported image format");
+    BAIL_INFO(CL_IMAGE_FORMAT_NOT_SUPPORTED, context,
+              "Unsupported image format");
   }
 
   // Allocate the memory for the image. This size (and sometimes the method)
@@ -1580,17 +1561,17 @@ ACL_EXPORT CL_API_ENTRY cl_mem CL_API_CALL clCreateImageIntelFPGA(
         element_size * image_desc->image_width +
         get_offset_for_image_param(context, image_desc->image_type, "data");
     if (image_desc->image_width <= 0) {
-      UNLOCK_BAIL_INFO(CL_INVALID_IMAGE_SIZE, context,
-                       "image width cannot be zero for a 1D object");
+      BAIL_INFO(CL_INVALID_IMAGE_SIZE, context,
+                "image width cannot be zero for a 1D object");
     }
     if (image_size > context->max_mem_alloc_size) {
-      UNLOCK_BAIL_INFO(CL_INVALID_IMAGE_SIZE, context,
-                       "image size exceeds maximum alloc size");
+      BAIL_INFO(CL_INVALID_IMAGE_SIZE, context,
+                "image size exceeds maximum alloc size");
     }
     return_buffer =
         clCreateBuffer(context, flags, image_size, host_ptr, errcode_ret);
     if (return_buffer == NULL)
-      UNLOCK_RETURN(NULL);
+      return NULL;
     return_buffer->fields.image_objs.image_format =
         (cl_image_format *)acl_malloc(sizeof(cl_image_format));
     return_buffer->fields.image_objs.image_desc =
@@ -1598,8 +1579,8 @@ ACL_EXPORT CL_API_ENTRY cl_mem CL_API_CALL clCreateImageIntelFPGA(
     break;
   case CL_MEM_OBJECT_IMAGE1D_BUFFER:
     // Need to actually allocate/assign the buffer data here
-    UNLOCK_BAIL_INFO(CL_IMAGE_FORMAT_NOT_SUPPORTED, context,
-                     "Do not support images created from buffers");
+    BAIL_INFO(CL_IMAGE_FORMAT_NOT_SUPPORTED, context,
+              "Do not support images created from buffers");
     // Need to actually allocate/assign the buffer data here
     break;
   case CL_MEM_OBJECT_IMAGE1D_ARRAY:
@@ -1607,17 +1588,17 @@ ACL_EXPORT CL_API_ENTRY cl_mem CL_API_CALL clCreateImageIntelFPGA(
         element_size * image_desc->image_width * image_desc->image_array_size +
         get_offset_for_image_param(context, image_desc->image_type, "data");
     if (image_desc->image_width <= 0) {
-      UNLOCK_BAIL_INFO(CL_INVALID_IMAGE_SIZE, context,
-                       "image width cannot be zero for a 1D object");
+      BAIL_INFO(CL_INVALID_IMAGE_SIZE, context,
+                "image width cannot be zero for a 1D object");
     }
     if (image_size > context->max_mem_alloc_size) {
-      UNLOCK_BAIL_INFO(CL_INVALID_IMAGE_SIZE, context,
-                       "image size exceeds maximum alloc size");
+      BAIL_INFO(CL_INVALID_IMAGE_SIZE, context,
+                "image size exceeds maximum alloc size");
     }
     return_buffer =
         clCreateBuffer(context, flags, image_size, host_ptr, errcode_ret);
     if (return_buffer == NULL)
-      UNLOCK_RETURN(NULL);
+      return NULL;
     return_buffer->fields.image_objs.image_format =
         (cl_image_format *)acl_malloc(sizeof(cl_image_format));
     return_buffer->fields.image_objs.image_desc =
@@ -1627,45 +1608,45 @@ ACL_EXPORT CL_API_ENTRY cl_mem CL_API_CALL clCreateImageIntelFPGA(
     // If we change this, need to actually allocate/assign the buffer data here
     if (image_desc->mem_object != NULL &&
         image_desc->mem_object->mem_object_type == CL_MEM_OBJECT_BUFFER) {
-      UNLOCK_BAIL_INFO(CL_IMAGE_FORMAT_NOT_SUPPORTED, context,
-                       "Do not support images created from buffers");
+      BAIL_INFO(CL_IMAGE_FORMAT_NOT_SUPPORTED, context,
+                "Do not support images created from buffers");
       // Copy information from the other image object
     } else if (image_desc->mem_object != NULL &&
                image_desc->mem_object->mem_object_type ==
                    CL_MEM_OBJECT_BUFFER) {
-      UNLOCK_BAIL_INFO(CL_IMAGE_FORMAT_NOT_SUPPORTED, context,
-                       "Do not support images created from other images");
+      BAIL_INFO(CL_IMAGE_FORMAT_NOT_SUPPORTED, context,
+                "Do not support images created from other images");
       // Allocate a new image object
     } else {
       image_size =
           element_size * image_desc->image_width * image_desc->image_height +
           get_offset_for_image_param(context, image_desc->image_type, "data");
       if (image_desc->image_width <= 0) {
-        UNLOCK_BAIL_INFO(CL_INVALID_IMAGE_SIZE, context,
-                         "image width cannot be zero for a 2D object");
+        BAIL_INFO(CL_INVALID_IMAGE_SIZE, context,
+                  "image width cannot be zero for a 2D object");
       }
       if (image_desc->image_width > max_2d_image_width) {
-        UNLOCK_BAIL_INFO(
+        BAIL_INFO(
             CL_INVALID_IMAGE_SIZE, context,
             "image width exceeds maximum width for all devices in context");
       }
       if (image_desc->image_height <= 0) {
-        UNLOCK_BAIL_INFO(CL_INVALID_IMAGE_SIZE, context,
-                         "image height cannot be zero for a 2D object");
+        BAIL_INFO(CL_INVALID_IMAGE_SIZE, context,
+                  "image height cannot be zero for a 2D object");
       }
       if (image_desc->image_height > max_2d_image_height) {
-        UNLOCK_BAIL_INFO(
+        BAIL_INFO(
             CL_INVALID_IMAGE_SIZE, context,
             "1 image height exceeds maximum height for all devices in context");
       }
       if (image_size > context->max_mem_alloc_size) {
-        UNLOCK_BAIL_INFO(CL_INVALID_IMAGE_SIZE, context,
-                         "image size exceeds maximum alloc size");
+        BAIL_INFO(CL_INVALID_IMAGE_SIZE, context,
+                  "image size exceeds maximum alloc size");
       }
       return_buffer =
           clCreateBuffer(context, flags, image_size, host_ptr, errcode_ret);
       if (return_buffer == NULL)
-        UNLOCK_RETURN(NULL);
+        return NULL;
       return_buffer->fields.image_objs.image_format =
           (cl_image_format *)acl_malloc(sizeof(cl_image_format));
       return_buffer->fields.image_objs.image_desc =
@@ -1678,22 +1659,21 @@ ACL_EXPORT CL_API_ENTRY cl_mem CL_API_CALL clCreateImageIntelFPGA(
             image_desc->image_array_size +
         get_offset_for_image_param(context, image_desc->image_type, "data");
     if (image_desc->image_width <= 0)
-      UNLOCK_BAIL_INFO(CL_INVALID_IMAGE_SIZE, context,
-                       "image width cannot be zero for a 2D object");
+      BAIL_INFO(CL_INVALID_IMAGE_SIZE, context,
+                "image width cannot be zero for a 2D object");
     if (image_desc->image_width > max_2d_image_width)
-      UNLOCK_BAIL_INFO(
-          CL_INVALID_IMAGE_SIZE, context,
-          "image width exceeds maximum width for all devices in context");
+      BAIL_INFO(CL_INVALID_IMAGE_SIZE, context,
+                "image width exceeds maximum width for all devices in context");
     if (image_desc->image_height <= 0)
-      UNLOCK_BAIL_INFO(CL_INVALID_IMAGE_SIZE, context,
-                       "image height cannot be zero for a 2D object");
+      BAIL_INFO(CL_INVALID_IMAGE_SIZE, context,
+                "image height cannot be zero for a 2D object");
     if (image_desc->image_height > max_2d_image_height)
-      UNLOCK_BAIL_INFO(
+      BAIL_INFO(
           CL_INVALID_IMAGE_SIZE, context,
           "2 image height exceeds maximum height for all devices in context");
     if (image_size > context->max_mem_alloc_size)
-      UNLOCK_BAIL_INFO(CL_INVALID_IMAGE_SIZE, context,
-                       "image size exceeds maximum alloc size");
+      BAIL_INFO(CL_INVALID_IMAGE_SIZE, context,
+                "image size exceeds maximum alloc size");
     return_buffer =
         clCreateBuffer(context, flags, image_size, host_ptr, errcode_ret);
     if (return_buffer == NULL)
@@ -1709,41 +1689,38 @@ ACL_EXPORT CL_API_ENTRY cl_mem CL_API_CALL clCreateImageIntelFPGA(
             image_desc->image_depth +
         get_offset_for_image_param(context, image_desc->image_type, "data");
     if (image_desc->image_width <= 0)
-      UNLOCK_BAIL_INFO(CL_INVALID_IMAGE_SIZE, context,
-                       "image width cannot be zero for a 3D object");
+      BAIL_INFO(CL_INVALID_IMAGE_SIZE, context,
+                "image width cannot be zero for a 3D object");
     if (image_desc->image_width > max_3d_image_width)
-      UNLOCK_BAIL_INFO(
-          CL_INVALID_IMAGE_SIZE, context,
-          "image width exceeds maximum width for all devices in context");
+      BAIL_INFO(CL_INVALID_IMAGE_SIZE, context,
+                "image width exceeds maximum width for all devices in context");
     if (image_desc->image_height <= 0)
-      UNLOCK_BAIL_INFO(CL_INVALID_IMAGE_SIZE, context,
-                       "image height cannot be zero for a 3D object");
+      BAIL_INFO(CL_INVALID_IMAGE_SIZE, context,
+                "image height cannot be zero for a 3D object");
     if (image_desc->image_height > max_3d_image_height)
-      UNLOCK_BAIL_INFO(
+      BAIL_INFO(
           CL_INVALID_IMAGE_SIZE, context,
           "image height exceeds maximum height for all devices in context");
     if (image_desc->image_depth <= 0)
-      UNLOCK_BAIL_INFO(CL_INVALID_IMAGE_SIZE, context,
-                       "image depth cannot be zero for a 3D object");
+      BAIL_INFO(CL_INVALID_IMAGE_SIZE, context,
+                "image depth cannot be zero for a 3D object");
     if (image_desc->image_depth > max_3d_image_depth)
-      UNLOCK_BAIL_INFO(
-          CL_INVALID_IMAGE_SIZE, context,
-          "image depth exceeds maximum depth for all devices in context");
+      BAIL_INFO(CL_INVALID_IMAGE_SIZE, context,
+                "image depth exceeds maximum depth for all devices in context");
     if (image_size > context->max_mem_alloc_size)
-      UNLOCK_BAIL_INFO(CL_INVALID_IMAGE_SIZE, context,
-                       "image size exceeds maximum alloc size");
+      BAIL_INFO(CL_INVALID_IMAGE_SIZE, context,
+                "image size exceeds maximum alloc size");
     return_buffer =
         clCreateBuffer(context, flags, image_size, host_ptr, errcode_ret);
     if (return_buffer == NULL)
-      UNLOCK_RETURN(NULL);
+      return NULL;
     return_buffer->fields.image_objs.image_format =
         (cl_image_format *)acl_malloc(sizeof(cl_image_format));
     return_buffer->fields.image_objs.image_desc =
         (cl_image_desc *)acl_malloc(sizeof(cl_image_desc));
     break;
   default:
-    UNLOCK_BAIL_INFO(CL_INVALID_IMAGE_DESCRIPTOR, context,
-                     "invalid image type");
+    BAIL_INFO(CL_INVALID_IMAGE_DESCRIPTOR, context, "invalid image type");
     break;
   }
 
@@ -1761,8 +1738,8 @@ ACL_EXPORT CL_API_ENTRY cl_mem CL_API_CALL clCreateImageIntelFPGA(
   if (!return_buffer->host_mem.aligned_ptr) {
     return_buffer->host_mem = acl_mem_aligned_malloc(image_size);
     if (return_buffer->host_mem.raw == 0) {
-      UNLOCK_BAIL_INFO(CL_OUT_OF_HOST_MEMORY, context,
-                       "Could not allocate backing store for a device image");
+      BAIL_INFO(CL_OUT_OF_HOST_MEMORY, context,
+                "Could not allocate backing store for a device image");
     }
   }
 
@@ -1773,7 +1750,7 @@ ACL_EXPORT CL_API_ENTRY cl_mem CL_API_CALL clCreateImageIntelFPGA(
         &(acl_platform.host_user_mem))) {
     copy_image_metadata(return_buffer);
   }
-  UNLOCK_RETURN(return_buffer);
+  return return_buffer;
 }
 
 ACL_EXPORT CL_API_ENTRY cl_mem CL_API_CALL clCreateImage(
@@ -1894,18 +1871,18 @@ CL_API_ENTRY cl_int CL_API_CALL clGetSupportedImageFormatsIntelFPGA(
       {CL_BGRA, CL_UNORM_INT8},
   };
 
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
 
   if (!acl_context_is_valid(context)) {
-    UNLOCK_RETURN(CL_INVALID_CONTEXT);
+    return CL_INVALID_CONTEXT;
   }
   if (num_entries == 0 && image_formats) {
-    UNLOCK_ERR_RET(CL_INVALID_VALUE, context,
-                   "num_entries is zero but image formats array is specified");
+    ERR_RET(CL_INVALID_VALUE, context,
+            "num_entries is zero but image formats array is specified");
   }
   if (num_entries > 0 && image_formats == 0) {
-    UNLOCK_ERR_RET(CL_INVALID_VALUE, context,
-                   "num_entries is non-zero but image_formats array is NULL");
+    ERR_RET(CL_INVALID_VALUE, context,
+            "num_entries is non-zero but image_formats array is NULL");
   }
   switch (image_type) {
   case CL_MEM_OBJECT_IMAGE2D:
@@ -1916,13 +1893,12 @@ CL_API_ENTRY cl_int CL_API_CALL clGetSupportedImageFormatsIntelFPGA(
   case CL_MEM_OBJECT_IMAGE1D_BUFFER:
     break;
   default:
-    UNLOCK_ERR_RET(CL_INVALID_VALUE, context,
-                   "Invalid or unsupported image type");
+    ERR_RET(CL_INVALID_VALUE, context, "Invalid or unsupported image type");
   }
   if (flags &
       ~(CL_MEM_READ_WRITE | CL_MEM_READ_ONLY | CL_MEM_WRITE_ONLY |
         CL_MEM_USE_HOST_PTR | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR)) {
-    UNLOCK_ERR_RET(CL_INVALID_VALUE, context, "Invalid flags");
+    ERR_RET(CL_INVALID_VALUE, context, "Invalid flags");
   }
 
   if (num_image_formats) {
@@ -1938,7 +1914,7 @@ CL_API_ENTRY cl_int CL_API_CALL clGetSupportedImageFormatsIntelFPGA(
           supported_image_formats[i].image_channel_data_type;
     }
   }
-  UNLOCK_RETURN(CL_SUCCESS);
+  return CL_SUCCESS;
 }
 
 ACL_EXPORT
@@ -1957,19 +1933,18 @@ CL_API_ENTRY cl_int CL_API_CALL clGetImageInfoIntelFPGA(
     void *param_value, size_t *param_value_size_ret) {
   acl_result_t result;
   cl_context context;
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
 
   RESULT_INIT;
 
   if (!acl_mem_is_valid(image)) {
-    UNLOCK_RETURN(CL_INVALID_MEM_OBJECT);
+    return CL_INVALID_MEM_OBJECT;
   }
 
   context = image->context;
 
   if (!is_image(image)) {
-    UNLOCK_ERR_RET(CL_INVALID_MEM_OBJECT, context,
-                   "Memory object is not an image");
+    ERR_RET(CL_INVALID_MEM_OBJECT, context, "Memory object is not an image");
   }
 
   switch (param_name) {
@@ -2000,14 +1975,14 @@ CL_API_ENTRY cl_int CL_API_CALL clGetImageInfoIntelFPGA(
   }
 
   if (result.size == 0) {
-    UNLOCK_ERR_RET(CL_INVALID_VALUE, context,
-                   "Invalid or unsupported memory object query");
+    ERR_RET(CL_INVALID_VALUE, context,
+            "Invalid or unsupported memory object query");
   }
 
   if (param_value) {
     if (param_value_size < result.size) {
-      UNLOCK_ERR_RET(CL_INVALID_VALUE, context,
-                     "Parameter return buffer is too small");
+      ERR_RET(CL_INVALID_VALUE, context,
+              "Parameter return buffer is too small");
     }
     RESULT_COPY(param_value, param_value_size);
   }
@@ -2015,7 +1990,7 @@ CL_API_ENTRY cl_int CL_API_CALL clGetImageInfoIntelFPGA(
   if (param_value_size_ret) {
     *param_value_size_ret = result.size;
   }
-  UNLOCK_RETURN(CL_SUCCESS);
+  return CL_SUCCESS;
 }
 
 ACL_EXPORT
@@ -2040,22 +2015,22 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueReadImageIntelFPGA(
   size_t tmp_row_pitch, tmp_slice_pitch;
   cl_int errcode_ret;
   size_t src_element_size;
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
 
   if (!acl_command_queue_is_valid(command_queue)) {
-    UNLOCK_RETURN(CL_INVALID_COMMAND_QUEUE);
+    return CL_INVALID_COMMAND_QUEUE;
   }
 
   if (ptr == 0) {
-    UNLOCK_ERR_RET(CL_INVALID_VALUE, command_queue->context,
-                   "Pointer argument cannot be NULL");
+    ERR_RET(CL_INVALID_VALUE, command_queue->context,
+            "Pointer argument cannot be NULL");
   }
 
   if (image != NULL) {
     src_element_size = acl_get_image_element_size(
         image->context, image->fields.image_objs.image_format, &errcode_ret);
     if (errcode_ret != CL_SUCCESS) {
-      UNLOCK_RETURN(errcode_ret);
+      return errcode_ret;
     }
   } else {
     src_element_size = 0;
@@ -2076,8 +2051,8 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueReadImageIntelFPGA(
   if (row_pitch != 0) {
     if (row_pitch <
         image->fields.image_objs.image_desc->image_width * src_element_size) {
-      UNLOCK_ERR_RET(CL_INVALID_VALUE, command_queue->context,
-                     "Invalid row pitch provided");
+      ERR_RET(CL_INVALID_VALUE, command_queue->context,
+              "Invalid row pitch provided");
     }
     tmp_row_pitch = row_pitch;
   } else {
@@ -2096,20 +2071,20 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueReadImageIntelFPGA(
   // Allow the user to override the default slice pitch
   if (slice_pitch != 0) {
     if (slice_pitch < tmp_slice_pitch) {
-      UNLOCK_ERR_RET(CL_INVALID_VALUE, command_queue->context,
-                     "Invalid row pitch provided");
+      ERR_RET(CL_INVALID_VALUE, command_queue->context,
+              "Invalid row pitch provided");
     }
     tmp_slice_pitch = slice_pitch;
   }
 
   if (!is_image(image)) {
-    UNLOCK_ERR_RET(CL_INVALID_MEM_OBJECT, command_queue->context,
-                   "Memory object is not an image");
+    ERR_RET(CL_INVALID_MEM_OBJECT, command_queue->context,
+            "Memory object is not an image");
   }
 
   if (!acl_bind_buffer_to_device(command_queue->device, image)) {
-    UNLOCK_ERR_RET(CL_MEM_OBJECT_ALLOCATION_FAILURE, command_queue->context,
-                   "Deferred Allocation Failed");
+    ERR_RET(CL_MEM_OBJECT_ALLOCATION_FAILURE, command_queue->context,
+            "Deferred Allocation Failed");
   }
 
   {
@@ -2119,7 +2094,7 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueReadImageIntelFPGA(
         tmp_dst_offset, // see creation of the unwrapped_host_mem
         tmp_row_pitch, tmp_slice_pitch, tmp_cb, num_events_in_wait_list,
         event_wait_list, event, CL_COMMAND_READ_BUFFER, 0);
-    UNLOCK_RETURN(ret);
+    return ret;
   }
 }
 
@@ -2146,20 +2121,20 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueWriteImageIntelFPGA(
   size_t tmp_row_pitch, tmp_slice_pitch;
   cl_int errcode_ret;
   size_t dst_element_size;
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
 
   if (image != NULL) {
     dst_element_size = acl_get_image_element_size(
         image->context, image->fields.image_objs.image_format, &errcode_ret);
     if (errcode_ret != CL_SUCCESS) {
-      UNLOCK_RETURN(errcode_ret);
+      return errcode_ret;
     }
   } else {
-    UNLOCK_RETURN(CL_INVALID_MEM_OBJECT);
+    return CL_INVALID_MEM_OBJECT;
   }
 
   if (!acl_command_queue_is_valid(command_queue)) {
-    UNLOCK_RETURN(CL_INVALID_COMMAND_QUEUE);
+    return CL_INVALID_COMMAND_QUEUE;
   }
 
   tmp_src_offset[0] = (size_t)((char *)ptr - (const char *)ACL_MEM_ALIGN);
@@ -2177,8 +2152,8 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueWriteImageIntelFPGA(
   if (input_row_pitch != 0) {
     if (input_row_pitch <
         image->fields.image_objs.image_desc->image_width * dst_element_size) {
-      UNLOCK_ERR_RET(CL_INVALID_VALUE, command_queue->context,
-                     "Invalid row pitch provided");
+      ERR_RET(CL_INVALID_VALUE, command_queue->context,
+              "Invalid row pitch provided");
     }
     tmp_row_pitch = input_row_pitch;
   } else {
@@ -2197,20 +2172,20 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueWriteImageIntelFPGA(
   // Allow the user to override the default slice pitch
   if (input_slice_pitch != 0) {
     if (input_slice_pitch < tmp_slice_pitch) {
-      UNLOCK_ERR_RET(CL_INVALID_VALUE, command_queue->context,
-                     "Invalid row pitch provided");
+      ERR_RET(CL_INVALID_VALUE, command_queue->context,
+              "Invalid row pitch provided");
     }
     tmp_slice_pitch = input_slice_pitch;
   }
 
   if (!is_image(image)) {
-    UNLOCK_ERR_RET(CL_INVALID_MEM_OBJECT, command_queue->context,
-                   "Memory object is not an image");
+    ERR_RET(CL_INVALID_MEM_OBJECT, command_queue->context,
+            "Memory object is not an image");
   }
 
   if (!acl_bind_buffer_to_device(command_queue->device, image)) {
-    UNLOCK_ERR_RET(CL_MEM_OBJECT_ALLOCATION_FAILURE, command_queue->context,
-                   "Deferred Allocation Failed");
+    ERR_RET(CL_MEM_OBJECT_ALLOCATION_FAILURE, command_queue->context,
+            "Deferred Allocation Failed");
   }
 
   {
@@ -2220,7 +2195,7 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueWriteImageIntelFPGA(
         tmp_row_pitch, tmp_slice_pitch, image, tmp_dst_offset, 0, 0, tmp_cb,
         num_events_in_wait_list, event_wait_list, event,
         CL_COMMAND_WRITE_BUFFER, 0);
-    UNLOCK_RETURN(ret);
+    return ret;
   }
 }
 
@@ -2250,25 +2225,25 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueFillImageIntelFPGA(
   char converted_fill_color[16]; // Maximum number of bytes needed to keep a
                                  // pixel.
   cl_event tmp_event;
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
 
   if (image != NULL) {
     dst_element_size = acl_get_image_element_size(
         image->context, image->fields.image_objs.image_format, &errcode_ret);
     if (errcode_ret != CL_SUCCESS) {
-      UNLOCK_RETURN(errcode_ret);
+      return errcode_ret;
     }
   } else {
-    UNLOCK_RETURN(CL_INVALID_MEM_OBJECT);
+    return CL_INVALID_MEM_OBJECT;
   }
 
   if (!acl_command_queue_is_valid(command_queue)) {
-    UNLOCK_RETURN(CL_INVALID_COMMAND_QUEUE);
+    return CL_INVALID_COMMAND_QUEUE;
   }
 
   if (!is_image(image)) {
-    UNLOCK_ERR_RET(CL_INVALID_MEM_OBJECT, command_queue->context,
-                   "Memory object is not an image");
+    ERR_RET(CL_INVALID_MEM_OBJECT, command_queue->context,
+            "Memory object is not an image");
   }
 
   // Replicating the color in the region allocated in host mem.
@@ -2276,8 +2251,8 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueFillImageIntelFPGA(
   color_format.image_channel_order = CL_RGBA;
 
   if (fill_color == NULL)
-    UNLOCK_ERR_RET(CL_INVALID_VALUE, command_queue->context,
-                   "fill_color cannot be NULL");
+    ERR_RET(CL_INVALID_VALUE, command_queue->context,
+            "fill_color cannot be NULL");
 
   size_t host_mem_size = region[0] * region[1] * region[2] * dst_element_size;
 
@@ -2326,24 +2301,24 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueFillImageIntelFPGA(
     errcode_ret = -1;
   }
   if (errcode_ret != CL_SUCCESS)
-    UNLOCK_ERR_RET(CL_IMAGE_FORMAT_NOT_SUPPORTED, command_queue->context,
-                   "Failed to convert fill_color to the appropriate image "
-                   "channel format and order");
+    ERR_RET(CL_IMAGE_FORMAT_NOT_SUPPORTED, command_queue->context,
+            "Failed to convert fill_color to the appropriate image "
+            "channel format and order");
 
   // This array is passed to clSetEventCallback for releasing the
   // allocated memory and releasing the event, if *event is null.
   void **callback_data = (void **)acl_malloc(sizeof(void *) * 2);
   if (!callback_data) {
-    UNLOCK_ERR_RET(CL_OUT_OF_HOST_MEMORY, command_queue->context,
-                   "Out of host memory");
+    ERR_RET(CL_OUT_OF_HOST_MEMORY, command_queue->context,
+            "Out of host memory");
   }
 
   acl_aligned_ptr_t *aligned_ptr =
       (acl_aligned_ptr_t *)acl_malloc(sizeof(acl_aligned_ptr_t));
   if (!aligned_ptr) {
     acl_free(callback_data);
-    UNLOCK_ERR_RET(CL_OUT_OF_HOST_MEMORY, command_queue->context,
-                   "Out of host memory");
+    ERR_RET(CL_OUT_OF_HOST_MEMORY, command_queue->context,
+            "Out of host memory");
   }
 
   *aligned_ptr = acl_mem_aligned_malloc(host_mem_size);
@@ -2351,8 +2326,8 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueFillImageIntelFPGA(
   if (!ptr) {
     acl_free(aligned_ptr);
     acl_free(callback_data);
-    UNLOCK_ERR_RET(CL_OUT_OF_HOST_MEMORY, command_queue->context,
-                   "Out of host memory");
+    ERR_RET(CL_OUT_OF_HOST_MEMORY, command_queue->context,
+            "Out of host memory");
   }
 
   for (cl_uint i = 0; i < region[0] * region[1] * region[2]; i++) {
@@ -2391,8 +2366,8 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueFillImageIntelFPGA(
       acl_free(aligned_ptr);
       acl_free(callback_data);
     }
-    UNLOCK_ERR_RET(CL_MEM_OBJECT_ALLOCATION_FAILURE, command_queue->context,
-                   "Deferred Allocation Failed");
+    ERR_RET(CL_MEM_OBJECT_ALLOCATION_FAILURE, command_queue->context,
+            "Deferred Allocation Failed");
   }
 
   {
@@ -2407,7 +2382,7 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueFillImageIntelFPGA(
                            aligned_ptr); // Cleaning up before failing.
       acl_free(aligned_ptr);
       acl_free(callback_data);
-      UNLOCK_RETURN(ret);
+      return ret;
     }
 
     callback_data[0] = (void *)(aligned_ptr);
@@ -2422,7 +2397,7 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueFillImageIntelFPGA(
     clSetEventCallback(tmp_event, CL_COMPLETE,
                        acl_free_allocation_after_event_completion,
                        (void *)callback_data);
-    UNLOCK_RETURN(ret);
+    return ret;
   }
 }
 
@@ -2447,27 +2422,27 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueCopyImageIntelFPGA(
   size_t tmp_cb[3];
   size_t element_size;
   cl_int errcode_ret;
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
 
   if (!acl_command_queue_is_valid(command_queue)) {
-    UNLOCK_RETURN(CL_INVALID_COMMAND_QUEUE);
+    return CL_INVALID_COMMAND_QUEUE;
   }
 
   if (src_image == NULL || !is_image(src_image)) {
-    UNLOCK_ERR_RET(CL_INVALID_MEM_OBJECT, command_queue->context,
-                   "Source memory object is not an image");
+    ERR_RET(CL_INVALID_MEM_OBJECT, command_queue->context,
+            "Source memory object is not an image");
   }
   if (dst_image == NULL || !is_image(dst_image)) {
-    UNLOCK_ERR_RET(CL_INVALID_MEM_OBJECT, command_queue->context,
-                   "Source memory object is not an image");
+    ERR_RET(CL_INVALID_MEM_OBJECT, command_queue->context,
+            "Source memory object is not an image");
   }
 
   if ((src_image->fields.image_objs.image_format->image_channel_order !=
        dst_image->fields.image_objs.image_format->image_channel_order) ||
       (src_image->fields.image_objs.image_format->image_channel_data_type !=
        dst_image->fields.image_objs.image_format->image_channel_data_type)) {
-    UNLOCK_ERR_RET(CL_INVALID_MEM_OBJECT, command_queue->context,
-                   "Source memory object is not an image");
+    ERR_RET(CL_INVALID_MEM_OBJECT, command_queue->context,
+            "Source memory object is not an image");
   }
 
   // Doesn't matter if we look at src or dst, already verified that they are the
@@ -2476,8 +2451,8 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueCopyImageIntelFPGA(
       command_queue->context, src_image->fields.image_objs.image_format,
       &errcode_ret);
   if (errcode_ret != CL_SUCCESS) {
-    UNLOCK_ERR_RET(errcode_ret, command_queue->context,
-                   "Source memory object is not an image");
+    ERR_RET(errcode_ret, command_queue->context,
+            "Source memory object is not an image");
   }
 
   tmp_src_offset[0] = src_origin[0];
@@ -2496,17 +2471,16 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueCopyImageIntelFPGA(
        dst_image->fields.image_objs.image_format->image_channel_order) ||
       (src_image->fields.image_objs.image_format->image_channel_data_type !=
        dst_image->fields.image_objs.image_format->image_channel_data_type)) {
-    UNLOCK_ERR_RET(
-        CL_IMAGE_FORMAT_MISMATCH, command_queue->context,
-        "Mismatch in image format between source & destination image");
+    ERR_RET(CL_IMAGE_FORMAT_MISMATCH, command_queue->context,
+            "Mismatch in image format between source & destination image");
   }
   if (!acl_bind_buffer_to_device(command_queue->device, src_image)) {
-    UNLOCK_ERR_RET(CL_MEM_OBJECT_ALLOCATION_FAILURE, command_queue->context,
-                   "Deferred Allocation Failed");
+    ERR_RET(CL_MEM_OBJECT_ALLOCATION_FAILURE, command_queue->context,
+            "Deferred Allocation Failed");
   }
   if (!acl_bind_buffer_to_device(command_queue->device, dst_image)) {
-    UNLOCK_ERR_RET(CL_MEM_OBJECT_ALLOCATION_FAILURE, command_queue->context,
-                   "Deferred Allocation Failed");
+    ERR_RET(CL_MEM_OBJECT_ALLOCATION_FAILURE, command_queue->context,
+            "Deferred Allocation Failed");
   }
 
   {
@@ -2514,7 +2488,7 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueCopyImageIntelFPGA(
         command_queue, 0, src_image, tmp_src_offset, 0, 0, dst_image,
         tmp_dst_offset, 0, 0, tmp_cb, num_events_in_wait_list, event_wait_list,
         event, CL_COMMAND_COPY_BUFFER, 0);
-    UNLOCK_RETURN(ret);
+    return ret;
   }
 }
 
@@ -2541,18 +2515,18 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueCopyImageToBufferIntelFPGA(
   size_t tmp_row_pitch, tmp_slice_pitch;
   cl_int errcode_ret;
   size_t src_element_size;
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
 
   if (!acl_command_queue_is_valid(command_queue)) {
-    UNLOCK_RETURN(CL_INVALID_COMMAND_QUEUE);
+    return CL_INVALID_COMMAND_QUEUE;
   }
   if (!acl_mem_is_valid(src_image)) {
-    UNLOCK_ERR_RET(CL_INVALID_MEM_OBJECT, command_queue->context,
-                   "Source image is invalid");
+    ERR_RET(CL_INVALID_MEM_OBJECT, command_queue->context,
+            "Source image is invalid");
   }
   if (!acl_mem_is_valid(dst_buffer)) {
-    UNLOCK_ERR_RET(CL_INVALID_MEM_OBJECT, command_queue->context,
-                   "Destination buffer is invalid");
+    ERR_RET(CL_INVALID_MEM_OBJECT, command_queue->context,
+            "Destination buffer is invalid");
   }
 
   if (src_image != NULL) {
@@ -2560,7 +2534,7 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueCopyImageToBufferIntelFPGA(
         src_image->context, src_image->fields.image_objs.image_format,
         &errcode_ret);
     if (errcode_ret != CL_SUCCESS) {
-      UNLOCK_RETURN(errcode_ret);
+      return errcode_ret;
     }
   } else {
     src_element_size = 0;
@@ -2590,17 +2564,17 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueCopyImageToBufferIntelFPGA(
   }
 
   if (!is_image(src_image)) {
-    UNLOCK_ERR_RET(CL_INVALID_MEM_OBJECT, command_queue->context,
-                   "Memory object is not an image");
+    ERR_RET(CL_INVALID_MEM_OBJECT, command_queue->context,
+            "Memory object is not an image");
   }
 
   if (!acl_bind_buffer_to_device(command_queue->device, src_image)) {
-    UNLOCK_ERR_RET(CL_MEM_OBJECT_ALLOCATION_FAILURE, command_queue->context,
-                   "Deferred Allocation Failed");
+    ERR_RET(CL_MEM_OBJECT_ALLOCATION_FAILURE, command_queue->context,
+            "Deferred Allocation Failed");
   }
   if (!acl_bind_buffer_to_device(command_queue->device, dst_buffer)) {
-    UNLOCK_ERR_RET(CL_MEM_OBJECT_ALLOCATION_FAILURE, command_queue->context,
-                   "Deferred Allocation Failed");
+    ERR_RET(CL_MEM_OBJECT_ALLOCATION_FAILURE, command_queue->context,
+            "Deferred Allocation Failed");
   }
 
   {
@@ -2609,7 +2583,7 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueCopyImageToBufferIntelFPGA(
         tmp_dst_offset, // see creation of the unwrapped_host_mem
         tmp_row_pitch, tmp_slice_pitch, tmp_cb, num_events_in_wait_list,
         event_wait_list, event, CL_COMMAND_READ_BUFFER, 0);
-    UNLOCK_RETURN(ret);
+    return ret;
   }
 }
 
@@ -2636,29 +2610,29 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueCopyBufferToImageIntelFPGA(
   size_t tmp_row_pitch, tmp_slice_pitch;
   cl_int errcode_ret;
   size_t dst_element_size;
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
 
   if (dst_image != NULL) {
     dst_element_size = acl_get_image_element_size(
         dst_image->context, dst_image->fields.image_objs.image_format,
         &errcode_ret);
     if (errcode_ret != CL_SUCCESS) {
-      UNLOCK_RETURN(errcode_ret);
+      return errcode_ret;
     }
   } else {
     dst_element_size = 0;
   }
 
   if (!acl_command_queue_is_valid(command_queue)) {
-    UNLOCK_RETURN(CL_INVALID_COMMAND_QUEUE);
+    return CL_INVALID_COMMAND_QUEUE;
   }
   if (!acl_mem_is_valid(src_buffer)) {
-    UNLOCK_ERR_RET(CL_INVALID_MEM_OBJECT, command_queue->context,
-                   "Source buffer is invalid");
+    ERR_RET(CL_INVALID_MEM_OBJECT, command_queue->context,
+            "Source buffer is invalid");
   }
   if (!acl_mem_is_valid(dst_image)) {
-    UNLOCK_ERR_RET(CL_INVALID_MEM_OBJECT, command_queue->context,
-                   "Destination buffer is invalid");
+    ERR_RET(CL_INVALID_MEM_OBJECT, command_queue->context,
+            "Destination buffer is invalid");
   }
 
   tmp_src_offset[0] = src_offset;
@@ -2685,17 +2659,17 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueCopyBufferToImageIntelFPGA(
   }
 
   if (!is_image(dst_image)) {
-    UNLOCK_ERR_RET(CL_INVALID_MEM_OBJECT, command_queue->context,
-                   "Memory object is not an image");
+    ERR_RET(CL_INVALID_MEM_OBJECT, command_queue->context,
+            "Memory object is not an image");
   }
 
   if (!acl_bind_buffer_to_device(command_queue->device, src_buffer)) {
-    UNLOCK_ERR_RET(CL_MEM_OBJECT_ALLOCATION_FAILURE, command_queue->context,
-                   "Deferred Allocation Failed");
+    ERR_RET(CL_MEM_OBJECT_ALLOCATION_FAILURE, command_queue->context,
+            "Deferred Allocation Failed");
   }
   if (!acl_bind_buffer_to_device(command_queue->device, dst_image)) {
-    UNLOCK_ERR_RET(CL_MEM_OBJECT_ALLOCATION_FAILURE, command_queue->context,
-                   "Deferred Allocation Failed");
+    ERR_RET(CL_MEM_OBJECT_ALLOCATION_FAILURE, command_queue->context,
+            "Deferred Allocation Failed");
   }
 
   {
@@ -2704,7 +2678,7 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueCopyBufferToImageIntelFPGA(
         tmp_slice_pitch, dst_image, tmp_dst_offset, 0, 0, tmp_cb,
         num_events_in_wait_list, event_wait_list, event,
         CL_COMMAND_WRITE_BUFFER, 0);
-    UNLOCK_RETURN(ret);
+    return ret;
   }
 }
 
@@ -2733,35 +2707,35 @@ CL_API_ENTRY void *CL_API_CALL clEnqueueMapImageIntelFPGA(
   size_t element_size;
   size_t tmp_row_pitch;
   size_t tmp_slice_pitch;
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
 
   if (image != NULL) {
     element_size = acl_get_image_element_size(
         image->context, image->fields.image_objs.image_format, errcode_ret);
     if (*errcode_ret != CL_SUCCESS) {
-      UNLOCK_RETURN(NULL);
+      return NULL;
     }
   } else {
     element_size = 0;
   }
   if (!acl_command_queue_is_valid(command_queue)) {
-    UNLOCK_BAIL(CL_INVALID_COMMAND_QUEUE);
+    BAIL(CL_INVALID_COMMAND_QUEUE);
   }
   context = command_queue->context;
   if (!acl_mem_is_valid(image)) {
-    UNLOCK_BAIL_INFO(CL_INVALID_MEM_OBJECT, context, "Invalid memory object");
+    BAIL_INFO(CL_INVALID_MEM_OBJECT, context, "Invalid memory object");
   }
 
   if (command_queue->context != image->context) {
-    UNLOCK_BAIL_INFO(
+    BAIL_INFO(
         CL_INVALID_CONTEXT, context,
         "Command queue and memory object are not associated with the same "
         "context");
   }
 
   if (!acl_bind_buffer_to_device(command_queue->device, image)) {
-    UNLOCK_BAIL_INFO(CL_MEM_OBJECT_ALLOCATION_FAILURE, context,
-                     "Deferred Allocation Failed");
+    BAIL_INFO(CL_MEM_OBJECT_ALLOCATION_FAILURE, context,
+              "Deferred Allocation Failed");
   }
 
   // Check if we can physically map the data into place.
@@ -2769,18 +2743,18 @@ CL_API_ENTRY void *CL_API_CALL clEnqueueMapImageIntelFPGA(
   // have backing store for it.
   if (!image->block_allocation->region->is_host_accessible &&
       !image->host_mem.aligned_ptr) {
-    UNLOCK_BAIL_INFO(CL_MAP_FAILURE, context,
-                     "Could not map the image into host memory");
+    BAIL_INFO(CL_MAP_FAILURE, context,
+              "Could not map the image into host memory");
   }
 
   if (!is_image(image)) {
-    UNLOCK_BAIL_INFO(CL_INVALID_MEM_OBJECT, command_queue->context,
-                     "Memory object is not an image");
+    BAIL_INFO(CL_INVALID_MEM_OBJECT, command_queue->context,
+              "Memory object is not an image");
   }
 
   if (image_row_pitch == NULL) {
-    UNLOCK_BAIL_INFO(CL_INVALID_VALUE, command_queue->context,
-                     "Invalid row pitch provided");
+    BAIL_INFO(CL_INVALID_VALUE, command_queue->context,
+              "Invalid row pitch provided");
   } else {
     tmp_row_pitch =
         image->fields.image_objs.image_desc->image_width * element_size;
@@ -2791,8 +2765,8 @@ CL_API_ENTRY void *CL_API_CALL clEnqueueMapImageIntelFPGA(
        image->mem_object_type == CL_MEM_OBJECT_IMAGE2D_ARRAY ||
        image->mem_object_type == CL_MEM_OBJECT_IMAGE1D_ARRAY) &&
       image_slice_pitch == NULL) {
-    UNLOCK_BAIL_INFO(CL_INVALID_VALUE, command_queue->context,
-                     "Invalid slice pitch provided");
+    BAIL_INFO(CL_INVALID_VALUE, command_queue->context,
+              "Invalid slice pitch provided");
   } else {
     if (image->mem_object_type == CL_MEM_OBJECT_IMAGE2D ||
         image->mem_object_type == CL_MEM_OBJECT_IMAGE1D ||
@@ -2858,7 +2832,7 @@ CL_API_ENTRY void *CL_API_CALL clEnqueueMapImageIntelFPGA(
     acl_print_debug_msg(" map: ref count is %u\n", acl_ref_count(image));
 
     if (status != CL_SUCCESS)
-      UNLOCK_BAIL(status); // already signalled callback
+      BAIL(status); // already signalled callback
 
     // The enqueue of the mem transfer will retain the buffer.
   } else {
@@ -2868,7 +2842,7 @@ CL_API_ENTRY void *CL_API_CALL clEnqueueMapImageIntelFPGA(
         acl_create_event(command_queue, num_events_in_wait_list,
                          event_wait_list, CL_COMMAND_MAP_BUFFER, &local_event);
     if (status != CL_SUCCESS)
-      UNLOCK_BAIL(status); // already signalled callback
+      BAIL(status); // already signalled callback
     // Mark it as the trivial map buffer case.
     local_event->cmd.trivial = 1;
     local_event->cmd.info.trivial_mem_mapping.mem = image;
@@ -2915,7 +2889,7 @@ CL_API_ENTRY void *CL_API_CALL clEnqueueMapImageIntelFPGA(
   }
   acl_dump_mem_internal(image);
 
-  UNLOCK_RETURN(result);
+  return result;
 }
 
 ACL_EXPORT
@@ -2953,38 +2927,38 @@ CL_API_ENTRY void *CL_API_CALL clEnqueueMapBufferIntelFPGA(
   cl_event local_event = 0; // used for blocking
   cl_context context;
   cl_int status;
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
 
   if (!acl_command_queue_is_valid(command_queue)) {
-    UNLOCK_BAIL(CL_INVALID_COMMAND_QUEUE);
+    BAIL(CL_INVALID_COMMAND_QUEUE);
   }
   context = command_queue->context;
   if (!acl_mem_is_valid(buffer)) {
-    UNLOCK_BAIL_INFO(CL_INVALID_MEM_OBJECT, context, "Invalid memory object");
+    BAIL_INFO(CL_INVALID_MEM_OBJECT, context, "Invalid memory object");
   }
 
   if (command_queue->context != buffer->context) {
-    UNLOCK_BAIL_INFO(
+    BAIL_INFO(
         CL_INVALID_CONTEXT, context,
         "Command queue and memory object are not associated with the same "
         "context");
   }
 
   if (!acl_bind_buffer_to_device(command_queue->device, buffer)) {
-    UNLOCK_BAIL_INFO(CL_MEM_OBJECT_ALLOCATION_FAILURE, context,
-                     "Deferred Allocation Failed");
+    BAIL_INFO(CL_MEM_OBJECT_ALLOCATION_FAILURE, context,
+              "Deferred Allocation Failed");
   }
 
   // Check flags
   if (map_flags &
       ~(CL_MAP_READ | CL_MAP_WRITE | CL_MAP_WRITE_INVALIDATE_REGION)) {
-    UNLOCK_BAIL_INFO(CL_INVALID_VALUE, context, "Invalid or unsupported flags");
+    BAIL_INFO(CL_INVALID_VALUE, context, "Invalid or unsupported flags");
   }
   if (((map_flags & CL_MAP_READ) &
        (map_flags & CL_MAP_WRITE_INVALIDATE_REGION)) ||
       ((map_flags & CL_MAP_WRITE) &
        (map_flags & CL_MAP_WRITE_INVALIDATE_REGION))) {
-    UNLOCK_BAIL_INFO(
+    BAIL_INFO(
         CL_INVALID_VALUE, context,
         "CL_MAP_READ or CL_MAP_WRITE and CL_MAP_WRITE_INVALIDATE_REGION are "
         "specified but are mutually exclusive");
@@ -2996,13 +2970,13 @@ CL_API_ENTRY void *CL_API_CALL clEnqueueMapBufferIntelFPGA(
   cl_mem_flags flags = buffer->flags;
   if (!buffer->block_allocation->region->is_host_accessible &&
       !buffer->host_mem.aligned_ptr && !(flags & CL_MEM_USE_HOST_PTR)) {
-    UNLOCK_BAIL_INFO(CL_MAP_FAILURE, context,
-                     "Could not map the buffer into host memory");
+    BAIL_INFO(CL_MAP_FAILURE, context,
+              "Could not map the buffer into host memory");
   }
 
   if (offset + cb > buffer->size) {
-    UNLOCK_BAIL_INFO(CL_INVALID_VALUE, context,
-                     "Requested offset and byte count exceeds the buffer size");
+    BAIL_INFO(CL_INVALID_VALUE, context,
+              "Requested offset and byte count exceeds the buffer size");
   }
 
   if (flags & CL_MEM_USE_HOST_PTR) {
@@ -3031,7 +3005,7 @@ CL_API_ENTRY void *CL_API_CALL clEnqueueMapBufferIntelFPGA(
     acl_print_debug_msg(" map: ref count is %u\n", acl_ref_count(buffer));
 
     if (status != CL_SUCCESS)
-      UNLOCK_BAIL(status); // already signalled callback
+      BAIL(status); // already signalled callback
 
   } else if (!buffer->block_allocation->region->is_host_accessible) {
     size_t tmp_src_offset[3];
@@ -3071,7 +3045,7 @@ CL_API_ENTRY void *CL_API_CALL clEnqueueMapBufferIntelFPGA(
     acl_print_debug_msg(" map: ref count is %u\n", acl_ref_count(buffer));
 
     if (status != CL_SUCCESS)
-      UNLOCK_BAIL(status); // already signalled callback
+      BAIL(status); // already signalled callback
 
     // The enqueue of the mem transfer will retain the buffer.
   } else {
@@ -3080,7 +3054,7 @@ CL_API_ENTRY void *CL_API_CALL clEnqueueMapBufferIntelFPGA(
     status = acl_create_event(command_queue, num_events, events,
                               CL_COMMAND_MAP_BUFFER, &local_event);
     if (status != CL_SUCCESS)
-      UNLOCK_BAIL(status); // already signalled callback
+      BAIL(status); // already signalled callback
     // Mark it as the trivial map buffer case.
     local_event->cmd.trivial = 1;
     local_event->cmd.info.trivial_mem_mapping.mem = buffer;
@@ -3131,7 +3105,7 @@ CL_API_ENTRY void *CL_API_CALL clEnqueueMapBufferIntelFPGA(
     }
   }
   acl_dump_mem_internal(buffer);
-  UNLOCK_RETURN(result);
+  return result;
 }
 
 ACL_EXPORT
@@ -3152,28 +3126,27 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueUnmapMemObjectIntelFPGA(
   cl_context context;
   cl_int status;
   char *valid_base_ptr;
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
 
   if (!acl_command_queue_is_valid(command_queue)) {
-    UNLOCK_RETURN(CL_INVALID_COMMAND_QUEUE);
+    return CL_INVALID_COMMAND_QUEUE;
   }
   context = command_queue->context;
   if (!acl_mem_is_valid(mem)) {
-    UNLOCK_ERR_RET(CL_INVALID_MEM_OBJECT, context, "Memory object is invalid");
+    ERR_RET(CL_INVALID_MEM_OBJECT, context, "Memory object is invalid");
   }
 
   if (command_queue->context != mem->context) {
-    UNLOCK_ERR_RET(
-        CL_INVALID_CONTEXT, context,
-        "Command queue and memory object are not associated with the "
-        "same context");
+    ERR_RET(CL_INVALID_CONTEXT, context,
+            "Command queue and memory object are not associated with the "
+            "same context");
   }
   cl_mem_flags flags = mem->flags;
   if ((!mem->block_allocation->region->is_host_accessible &&
        !mem->host_mem.aligned_ptr && !(flags & CL_MEM_USE_HOST_PTR)) ||
       mem->allocation_deferred) {
-    UNLOCK_ERR_RET(CL_MAP_FAILURE, context,
-                   "Could not have mapped the buffer into host memory");
+    ERR_RET(CL_MAP_FAILURE, context,
+            "Could not have mapped the buffer into host memory");
   }
 
   // Necessary sanity check on the pointer.
@@ -3187,12 +3160,12 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueUnmapMemObjectIntelFPGA(
                                   : mem->block_allocation->range.begin);
   }
   if ((valid_base_ptr - (char *)mapped_ptr) > 0) {
-    UNLOCK_ERR_RET(CL_INVALID_VALUE, context,
-                   "Invalid mapped_ptr argument: it lies outside the buffer");
+    ERR_RET(CL_INVALID_VALUE, context,
+            "Invalid mapped_ptr argument: it lies outside the buffer");
   }
   if (((char *)mapped_ptr - (valid_base_ptr + mem->size)) >= 0) {
-    UNLOCK_ERR_RET(CL_INVALID_VALUE, context,
-                   "Invalid mapped_ptr argument: it lies outside the buffer");
+    ERR_RET(CL_INVALID_VALUE, context,
+            "Invalid mapped_ptr argument: it lies outside the buffer");
   }
 
   // This is the mirror image of mapping the buffer in the first place.
@@ -3214,7 +3187,7 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueUnmapMemObjectIntelFPGA(
       size_t image_element_size = acl_get_image_element_size(
           mem->context, mem->fields.image_objs.image_format, &status);
       if (status != CL_SUCCESS) {
-        UNLOCK_RETURN(status);
+        return status;
       }
 
       tmp_cb[0] =
@@ -3249,7 +3222,7 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueUnmapMemObjectIntelFPGA(
         // execution time.
         0);
     if (status != CL_SUCCESS)
-      UNLOCK_RETURN(status); // already signalled callback
+      return status; // already signalled callback
     acl_print_debug_msg("mem[%p] enqueue unmap. refcount %u\n", mem,
                         acl_ref_count(mem));
 
@@ -3271,7 +3244,7 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueUnmapMemObjectIntelFPGA(
       size_t image_element_size = acl_get_image_element_size(
           mem->context, mem->fields.image_objs.image_format, &status);
       if (status != CL_SUCCESS) {
-        UNLOCK_RETURN(status);
+        return status;
       }
 
       tmp_cb[0] =
@@ -3306,14 +3279,14 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueUnmapMemObjectIntelFPGA(
         // execution time.
         0);
     if (status != CL_SUCCESS)
-      UNLOCK_RETURN(status); // already signalled callback
+      return status; // already signalled callback
     acl_print_debug_msg("mem[%p] enqueue unmap. refcount %u\n", mem,
                         acl_ref_count(mem));
   } else {
     status = acl_create_event(command_queue, num_events, events,
                               CL_COMMAND_UNMAP_MEM_OBJECT, &local_event);
     if (status != CL_SUCCESS)
-      UNLOCK_RETURN(status); // already signalled callback
+      return status; // already signalled callback
     local_event->cmd.trivial = 1;
     local_event->cmd.info.trivial_mem_mapping.mem = mem;
     // Should retain the memory object so that its metadata will stick around
@@ -3334,7 +3307,7 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueUnmapMemObjectIntelFPGA(
     clReleaseEvent(local_event);
     acl_idle_update(command_queue->context); // Clean up early
   }
-  UNLOCK_RETURN(CL_SUCCESS);
+  return CL_SUCCESS;
 }
 
 ACL_EXPORT
@@ -3354,7 +3327,7 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueReadBufferIntelFPGA(
   size_t tmp_src_offset[3];
   size_t tmp_dst_offset[3];
   size_t tmp_cb[3];
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
 
   tmp_src_offset[0] = offset;
   tmp_src_offset[1] = 0;
@@ -3367,15 +3340,15 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueReadBufferIntelFPGA(
   tmp_cb[2] = 1;
 
   if (!acl_command_queue_is_valid(command_queue)) {
-    UNLOCK_RETURN(CL_INVALID_COMMAND_QUEUE);
+    return CL_INVALID_COMMAND_QUEUE;
   }
   if (ptr == 0) {
-    UNLOCK_ERR_RET(CL_INVALID_VALUE, command_queue->context,
-                   "Pointer argument cannot be NULL");
+    ERR_RET(CL_INVALID_VALUE, command_queue->context,
+            "Pointer argument cannot be NULL");
   }
   if (!acl_bind_buffer_to_device(command_queue->device, buffer)) {
-    UNLOCK_ERR_RET(CL_MEM_OBJECT_ALLOCATION_FAILURE, command_queue->context,
-                   "Deferred Allocation Failed");
+    ERR_RET(CL_MEM_OBJECT_ALLOCATION_FAILURE, command_queue->context,
+            "Deferred Allocation Failed");
   }
 
   {
@@ -3384,7 +3357,7 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueReadBufferIntelFPGA(
         command_queue->context->unwrapped_host_mem,
         tmp_dst_offset, // see creation of the unwrapped_host_mem
         0, 0, tmp_cb, num_events, events, event, CL_COMMAND_READ_BUFFER, 0);
-    UNLOCK_RETURN(ret);
+    return ret;
   }
 }
 
@@ -3410,7 +3383,7 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueReadBufferRectIntelFPGA(
   size_t tmp_src_offset[3];
   size_t tmp_dst_offset[3];
   size_t tmp_cb[3];
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
 
   if (buffer_row_pitch == 0) {
     buffer_row_pitch = region[0];
@@ -3436,19 +3409,18 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueReadBufferRectIntelFPGA(
   tmp_cb[2] = region[2];
 
   if (!acl_command_queue_is_valid(command_queue)) {
-    UNLOCK_RETURN(CL_INVALID_COMMAND_QUEUE);
+    return CL_INVALID_COMMAND_QUEUE;
   }
   if (!acl_mem_is_valid(buffer)) {
-    UNLOCK_ERR_RET(CL_INVALID_MEM_OBJECT, command_queue->context,
-                   "Buffer is invalid");
+    ERR_RET(CL_INVALID_MEM_OBJECT, command_queue->context, "Buffer is invalid");
   }
   if (ptr == 0) {
-    UNLOCK_ERR_RET(CL_INVALID_VALUE, command_queue->context,
-                   "Pointer argument cannot be NULL");
+    ERR_RET(CL_INVALID_VALUE, command_queue->context,
+            "Pointer argument cannot be NULL");
   }
   if (!acl_bind_buffer_to_device(command_queue->device, buffer)) {
-    UNLOCK_ERR_RET(CL_MEM_OBJECT_ALLOCATION_FAILURE, command_queue->context,
-                   "Deferred Allocation Failed");
+    ERR_RET(CL_MEM_OBJECT_ALLOCATION_FAILURE, command_queue->context,
+            "Deferred Allocation Failed");
   }
   {
     cl_int ret = l_enqueue_mem_transfer(
@@ -3457,7 +3429,7 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueReadBufferRectIntelFPGA(
         tmp_dst_offset, // see creation of the unwrapped_host_mem
         host_row_pitch, host_slice_pitch, tmp_cb, num_events_in_wait_list,
         event_wait_list, event, CL_COMMAND_READ_BUFFER, 0);
-    UNLOCK_RETURN(ret);
+    return ret;
   }
 }
 
@@ -3483,7 +3455,7 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueWriteBufferIntelFPGA(
   size_t tmp_src_offset[3];
   size_t tmp_dst_offset[3];
   size_t tmp_cb[3];
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
 
   tmp_src_offset[0] = (size_t)((char *)ptr - (const char *)ACL_MEM_ALIGN);
   tmp_src_offset[1] = 0;
@@ -3496,11 +3468,11 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueWriteBufferIntelFPGA(
   tmp_cb[2] = 1;
 
   if (!acl_command_queue_is_valid(command_queue)) {
-    UNLOCK_RETURN(CL_INVALID_COMMAND_QUEUE);
+    return CL_INVALID_COMMAND_QUEUE;
   }
   if (!acl_bind_buffer_to_device(command_queue->device, buffer)) {
-    UNLOCK_ERR_RET(CL_MEM_OBJECT_ALLOCATION_FAILURE, command_queue->context,
-                   "Deferred Allocation Failed");
+    ERR_RET(CL_MEM_OBJECT_ALLOCATION_FAILURE, command_queue->context,
+            "Deferred Allocation Failed");
   }
 
   {
@@ -3509,7 +3481,7 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueWriteBufferIntelFPGA(
         command_queue->context->unwrapped_host_mem, tmp_src_offset, 0, 0,
         buffer, tmp_dst_offset, 0, 0, tmp_cb, num_events, events, event,
         CL_COMMAND_WRITE_BUFFER, 0);
-    UNLOCK_RETURN(ret);
+    return ret;
   }
 }
 
@@ -3534,7 +3506,7 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueWriteBufferRectIntelFPGA(
   size_t tmp_src_offset[3];
   size_t tmp_dst_offset[3];
   size_t tmp_cb[3];
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
 
   if (buffer_row_pitch == 0) {
     buffer_row_pitch = region[0];
@@ -3560,19 +3532,18 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueWriteBufferRectIntelFPGA(
   tmp_cb[2] = region[2];
 
   if (!acl_command_queue_is_valid(command_queue)) {
-    UNLOCK_RETURN(CL_INVALID_COMMAND_QUEUE);
+    return CL_INVALID_COMMAND_QUEUE;
   }
   if (!acl_mem_is_valid(buffer)) {
-    UNLOCK_ERR_RET(CL_INVALID_MEM_OBJECT, command_queue->context,
-                   "Buffer is invalid");
+    ERR_RET(CL_INVALID_MEM_OBJECT, command_queue->context, "Buffer is invalid");
   }
   if (ptr == 0) {
-    UNLOCK_ERR_RET(CL_INVALID_VALUE, command_queue->context,
-                   "Pointer argument cannot be NULL");
+    ERR_RET(CL_INVALID_VALUE, command_queue->context,
+            "Pointer argument cannot be NULL");
   }
   if (!acl_bind_buffer_to_device(command_queue->device, buffer)) {
-    UNLOCK_ERR_RET(CL_MEM_OBJECT_ALLOCATION_FAILURE, command_queue->context,
-                   "Deferred Allocation Failed");
+    ERR_RET(CL_MEM_OBJECT_ALLOCATION_FAILURE, command_queue->context,
+            "Deferred Allocation Failed");
   }
   {
     cl_int ret = l_enqueue_mem_transfer(
@@ -3582,7 +3553,7 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueWriteBufferRectIntelFPGA(
         tmp_dst_offset, // see creation of the unwrapped_host_mem
         buffer_row_pitch, buffer_slice_pitch, tmp_cb, num_events_in_wait_list,
         event_wait_list, event, CL_COMMAND_WRITE_BUFFER, 0);
-    UNLOCK_RETURN(ret);
+    return ret;
   }
 }
 
@@ -3612,53 +3583,50 @@ CL_API_ENTRY cl_int clEnqueueFillBufferIntelFPGA(
   char *ptr;
 
   cl_event tmp_event;
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
 
   if (!acl_command_queue_is_valid(command_queue)) {
-    UNLOCK_RETURN(CL_INVALID_COMMAND_QUEUE);
+    return CL_INVALID_COMMAND_QUEUE;
   }
 
   if (!acl_mem_is_valid(buffer)) {
-    UNLOCK_ERR_RET(CL_INVALID_MEM_OBJECT, command_queue->context,
-                   "Buffer is invalid");
+    ERR_RET(CL_INVALID_MEM_OBJECT, command_queue->context, "Buffer is invalid");
   }
 
   // Pattern size can only be {1,2,4,8,...,1024 sizeof(double16)}.
   if (pattern_size == 0 || pattern_size > 1024 ||
       (pattern_size & (pattern_size - 1))) {
-    UNLOCK_ERR_RET(CL_INVALID_VALUE, command_queue->context,
-                   "Invalid pattern size");
+    ERR_RET(CL_INVALID_VALUE, command_queue->context, "Invalid pattern size");
   }
 
   if (offset % pattern_size != 0 || size % pattern_size != 0) {
-    UNLOCK_ERR_RET(CL_INVALID_VALUE, command_queue->context,
-                   "Offset and size must be a multiple of pattern size");
+    ERR_RET(CL_INVALID_VALUE, command_queue->context,
+            "Offset and size must be a multiple of pattern size");
   }
 
   if (pattern == NULL) {
-    UNLOCK_ERR_RET(CL_INVALID_VALUE, command_queue->context,
-                   "pattern cannot be NULL");
+    ERR_RET(CL_INVALID_VALUE, command_queue->context, "pattern cannot be NULL");
   }
 
   if (!acl_bind_buffer_to_device(command_queue->device, buffer)) {
-    UNLOCK_ERR_RET(CL_MEM_OBJECT_ALLOCATION_FAILURE, command_queue->context,
-                   "Deferred Allocation Failed");
+    ERR_RET(CL_MEM_OBJECT_ALLOCATION_FAILURE, command_queue->context,
+            "Deferred Allocation Failed");
   }
 
   // This array is passed to clSetEventCallback for releasing the
   // allocated memory and releasing the event, if *event is null.
   void **callback_data = (void **)acl_malloc(sizeof(void *) * 2);
   if (!callback_data) {
-    UNLOCK_ERR_RET(CL_OUT_OF_HOST_MEMORY, command_queue->context,
-                   "Out of host memory");
+    ERR_RET(CL_OUT_OF_HOST_MEMORY, command_queue->context,
+            "Out of host memory");
   }
 
   acl_aligned_ptr_t *aligned_ptr =
       (acl_aligned_ptr_t *)acl_malloc(sizeof(acl_aligned_ptr_t));
   if (!aligned_ptr) {
     acl_free(callback_data);
-    UNLOCK_ERR_RET(CL_OUT_OF_HOST_MEMORY, command_queue->context,
-                   "Out of host memory");
+    ERR_RET(CL_OUT_OF_HOST_MEMORY, command_queue->context,
+            "Out of host memory");
   }
 
   // Replicating the pattern, size/pattern_size times.
@@ -3667,8 +3635,8 @@ CL_API_ENTRY cl_int clEnqueueFillBufferIntelFPGA(
   if (!ptr) {
     acl_free(aligned_ptr);
     acl_free(callback_data);
-    UNLOCK_ERR_RET(CL_OUT_OF_HOST_MEMORY, command_queue->context,
-                   "Out of host memory");
+    ERR_RET(CL_OUT_OF_HOST_MEMORY, command_queue->context,
+            "Out of host memory");
   }
 
   for (cl_uint i = 0; i < size / pattern_size; i++) {
@@ -3697,7 +3665,7 @@ CL_API_ENTRY cl_int clEnqueueFillBufferIntelFPGA(
       acl_mem_aligned_free(command_queue->context, aligned_ptr);
       acl_free(aligned_ptr);
       acl_free(callback_data);
-      UNLOCK_RETURN(ret);
+      return ret;
     }
     callback_data[0] = (void *)(aligned_ptr);
     if (event) {
@@ -3711,7 +3679,7 @@ CL_API_ENTRY cl_int clEnqueueFillBufferIntelFPGA(
     clSetEventCallback(tmp_event, CL_COMPLETE,
                        acl_free_allocation_after_event_completion,
                        (void *)callback_data);
-    UNLOCK_RETURN(ret);
+    return ret;
   }
 }
 
@@ -3736,7 +3704,7 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueCopyBufferIntelFPGA(
   size_t tmp_src_offset[3];
   size_t tmp_dst_offset[3];
   size_t tmp_cb[3];
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
 
   tmp_src_offset[0] = src_offset;
   tmp_src_offset[1] = 0;
@@ -3749,23 +3717,23 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueCopyBufferIntelFPGA(
   tmp_cb[2] = 1;
 
   if (!acl_command_queue_is_valid(command_queue)) {
-    UNLOCK_RETURN(CL_INVALID_COMMAND_QUEUE);
+    return CL_INVALID_COMMAND_QUEUE;
   }
   if (!acl_mem_is_valid(src_buffer)) {
-    UNLOCK_ERR_RET(CL_INVALID_MEM_OBJECT, command_queue->context,
-                   "Source buffer is invalid");
+    ERR_RET(CL_INVALID_MEM_OBJECT, command_queue->context,
+            "Source buffer is invalid");
   }
   if (!acl_mem_is_valid(dst_buffer)) {
-    UNLOCK_ERR_RET(CL_INVALID_MEM_OBJECT, command_queue->context,
-                   "Destination buffer is invalid");
+    ERR_RET(CL_INVALID_MEM_OBJECT, command_queue->context,
+            "Destination buffer is invalid");
   }
   if (!acl_bind_buffer_to_device(command_queue->device, src_buffer)) {
-    UNLOCK_ERR_RET(CL_MEM_OBJECT_ALLOCATION_FAILURE, command_queue->context,
-                   "Deferred Allocation Failed");
+    ERR_RET(CL_MEM_OBJECT_ALLOCATION_FAILURE, command_queue->context,
+            "Deferred Allocation Failed");
   }
   if (!acl_bind_buffer_to_device(command_queue->device, dst_buffer)) {
-    UNLOCK_ERR_RET(CL_MEM_OBJECT_ALLOCATION_FAILURE, command_queue->context,
-                   "Deferred Allocation Failed");
+    ERR_RET(CL_MEM_OBJECT_ALLOCATION_FAILURE, command_queue->context,
+            "Deferred Allocation Failed");
   }
 
   {
@@ -3773,7 +3741,7 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueCopyBufferIntelFPGA(
         command_queue, 0, src_buffer, tmp_src_offset, 0, 0, dst_buffer,
         tmp_dst_offset, 0, 0, tmp_cb, num_events, events, event,
         CL_COMMAND_COPY_BUFFER, 0);
-    UNLOCK_RETURN(ret);
+    return ret;
   }
 }
 
@@ -3797,7 +3765,7 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueCopyBufferRectIntelFPGA(
   size_t tmp_src_offset[3];
   size_t tmp_dst_offset[3];
   size_t tmp_cb[3];
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
 
   if (src_row_pitch == 0) {
     src_row_pitch = region[0];
@@ -3823,44 +3791,41 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueCopyBufferRectIntelFPGA(
   tmp_cb[2] = region[2];
 
   if (!acl_command_queue_is_valid(command_queue)) {
-    UNLOCK_RETURN(CL_INVALID_COMMAND_QUEUE);
+    return CL_INVALID_COMMAND_QUEUE;
   }
   if (!acl_mem_is_valid(src_buffer)) {
-    UNLOCK_ERR_RET(CL_INVALID_MEM_OBJECT, command_queue->context,
-                   "Source buffer is invalid");
+    ERR_RET(CL_INVALID_MEM_OBJECT, command_queue->context,
+            "Source buffer is invalid");
   }
   if (!acl_mem_is_valid(dst_buffer)) {
-    UNLOCK_ERR_RET(CL_INVALID_MEM_OBJECT, command_queue->context,
-                   "Destination buffer is invalid");
+    ERR_RET(CL_INVALID_MEM_OBJECT, command_queue->context,
+            "Destination buffer is invalid");
   }
   if (!acl_bind_buffer_to_device(command_queue->device, src_buffer)) {
-    UNLOCK_ERR_RET(CL_MEM_OBJECT_ALLOCATION_FAILURE, command_queue->context,
-                   "Deferred Allocation Failed");
+    ERR_RET(CL_MEM_OBJECT_ALLOCATION_FAILURE, command_queue->context,
+            "Deferred Allocation Failed");
   }
   if (!acl_bind_buffer_to_device(command_queue->device, dst_buffer)) {
-    UNLOCK_ERR_RET(CL_MEM_OBJECT_ALLOCATION_FAILURE, command_queue->context,
-                   "Deferred Allocation Failed");
+    ERR_RET(CL_MEM_OBJECT_ALLOCATION_FAILURE, command_queue->context,
+            "Deferred Allocation Failed");
   }
 
   if (src_buffer == dst_buffer) {
     if (src_row_pitch != dst_row_pitch) {
-      UNLOCK_ERR_RET(
-          CL_INVALID_VALUE, command_queue->context,
-          "Source buffer and destination buffer are the same, but row "
-          "pitches do not match");
+      ERR_RET(CL_INVALID_VALUE, command_queue->context,
+              "Source buffer and destination buffer are the same, but row "
+              "pitches do not match");
     }
     if (src_slice_pitch != dst_slice_pitch) {
-      UNLOCK_ERR_RET(
-          CL_INVALID_VALUE, command_queue->context,
-          "Source buffer and destination buffer are the same, but slice "
-          "pitches do not match");
+      ERR_RET(CL_INVALID_VALUE, command_queue->context,
+              "Source buffer and destination buffer are the same, but slice "
+              "pitches do not match");
     }
     if (check_copy_overlap(tmp_src_offset, tmp_dst_offset, tmp_cb,
                            src_row_pitch, src_slice_pitch)) {
-      UNLOCK_ERR_RET(
-          CL_MEM_COPY_OVERLAP, command_queue->context,
-          "Source buffer and destination buffer are the same and regions "
-          "overlaps");
+      ERR_RET(CL_MEM_COPY_OVERLAP, command_queue->context,
+              "Source buffer and destination buffer are the same and regions "
+              "overlaps");
     }
   }
   {
@@ -3870,7 +3835,7 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueCopyBufferRectIntelFPGA(
         tmp_dst_offset, // see creation of the unwrapped_host_mem
         dst_row_pitch, dst_slice_pitch, tmp_cb, num_events_in_wait_list,
         event_wait_list, event, CL_COMMAND_COPY_BUFFER, 0);
-    UNLOCK_RETURN(ret);
+    return ret;
   }
 }
 
@@ -3893,10 +3858,10 @@ CL_API_ENTRY cl_mem CL_API_CALL clCreatePipeIntelFPGA(
     cl_uint pipe_max_packets, const cl_pipe_properties *properties,
     cl_int *errcode_ret) {
   cl_mem mem;
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
 
   if (!acl_context_is_valid(context)) {
-    UNLOCK_BAIL(CL_INVALID_CONTEXT);
+    BAIL(CL_INVALID_CONTEXT);
   }
 
   // Check flags
@@ -3904,8 +3869,7 @@ CL_API_ENTRY cl_mem CL_API_CALL clCreatePipeIntelFPGA(
     // Check for invalid enum bits
     if (flags & ~(CL_MEM_READ_WRITE | CL_MEM_READ_ONLY | CL_MEM_WRITE_ONLY |
                   CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_WRITE_ONLY)) {
-      UNLOCK_BAIL_INFO(CL_INVALID_VALUE, context,
-                       "Invalid or unsupported flags");
+      BAIL_INFO(CL_INVALID_VALUE, context, "Invalid or unsupported flags");
     }
 
     {
@@ -3925,12 +3889,12 @@ CL_API_ENTRY cl_mem CL_API_CALL clCreatePipeIntelFPGA(
 
       // Check for exactly one read/write spec
       if (num_rw_specs > 1) {
-        UNLOCK_BAIL_INFO(CL_INVALID_VALUE, context,
-                         "More than one read/write flag is specified");
+        BAIL_INFO(CL_INVALID_VALUE, context,
+                  "More than one read/write flag is specified");
       }
       if (num_hostrw_specs > 1) {
-        UNLOCK_BAIL_INFO(CL_INVALID_VALUE, context,
-                         "More than one host read/write flag is specified");
+        BAIL_INFO(CL_INVALID_VALUE, context,
+                  "More than one host read/write flag is specified");
       }
 
       // Default to CL_MEM_READ_WRITE.
@@ -3941,29 +3905,28 @@ CL_API_ENTRY cl_mem CL_API_CALL clCreatePipeIntelFPGA(
       if (((flags & CL_MEM_HOST_READ_ONLY) && (flags & CL_MEM_READ_ONLY)) ||
           ((flags & CL_MEM_HOST_WRITE_ONLY) && (flags & CL_MEM_WRITE_ONLY)) ||
           (num_hostrw_specs && (flags & CL_MEM_READ_WRITE))) {
-        UNLOCK_BAIL_INFO(CL_INVALID_VALUE, context,
-                         "Conflicting read/write flags specified");
+        BAIL_INFO(CL_INVALID_VALUE, context,
+                  "Conflicting read/write flags specified");
       }
     }
   }
 
   if (pipe_packet_size == 0) {
-    UNLOCK_BAIL_INFO(CL_INVALID_PIPE_SIZE, context, "Pipe packet size is zero");
+    BAIL_INFO(CL_INVALID_PIPE_SIZE, context, "Pipe packet size is zero");
   }
   if (pipe_packet_size > acl_platform.pipe_max_packet_size) {
-    UNLOCK_BAIL_INFO(CL_INVALID_PIPE_SIZE, context,
-                     "Pipe packet size exceeds maximum allowed");
+    BAIL_INFO(CL_INVALID_PIPE_SIZE, context,
+              "Pipe packet size exceeds maximum allowed");
   }
 
   if (properties != NULL) {
-    UNLOCK_BAIL_INFO(CL_INVALID_VALUE, context,
-                     "Properties must be NULL for pipes");
+    BAIL_INFO(CL_INVALID_VALUE, context, "Properties must be NULL for pipes");
   }
 
   mem = acl_alloc_cl_mem();
   if (!mem) {
-    UNLOCK_BAIL_INFO(CL_OUT_OF_HOST_MEMORY, context,
-                     "Could not allocate a cl_mem object");
+    BAIL_INFO(CL_OUT_OF_HOST_MEMORY, context,
+              "Could not allocate a cl_mem object");
   }
 
   acl_reset_ref_count(mem);
@@ -3994,8 +3957,8 @@ CL_API_ENTRY cl_mem CL_API_CALL clCreatePipeIntelFPGA(
     host_pipe_info = acl_new<host_pipe_t>();
     if (!host_pipe_info) {
       acl_free_cl_mem(mem);
-      UNLOCK_BAIL_INFO(CL_OUT_OF_HOST_MEMORY, context,
-                       "Could not allocate memory for internal data structure");
+      BAIL_INFO(CL_OUT_OF_HOST_MEMORY, context,
+                "Could not allocate memory for internal data structure");
     }
     host_pipe_info->m_physical_device_id = 0;
     host_pipe_info->m_channel_handle = -1;
@@ -4022,7 +3985,7 @@ CL_API_ENTRY cl_mem CL_API_CALL clCreatePipeIntelFPGA(
 
   acl_track_object(ACL_OBJ_MEM_OBJECT, mem);
 
-  UNLOCK_RETURN(mem);
+  return mem;
 }
 
 ACL_EXPORT
@@ -4038,17 +4001,17 @@ ACL_EXPORT CL_API_ENTRY cl_int CL_API_CALL clGetPipeInfoIntelFPGA(
     cl_mem pipe, cl_pipe_info param_name, size_t param_value_size,
     void *param_value, size_t *param_value_size_ret) {
   acl_result_t result;
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
 
   RESULT_INIT;
 
   if (!acl_mem_is_valid(pipe)) {
-    UNLOCK_RETURN(CL_INVALID_MEM_OBJECT);
+    return CL_INVALID_MEM_OBJECT;
   }
 
   // Wrong object type
   if (pipe->mem_object_type != CL_MEM_OBJECT_PIPE) {
-    UNLOCK_RETURN(CL_INVALID_MEM_OBJECT);
+    return CL_INVALID_MEM_OBJECT;
   }
 
   switch (param_name) {
@@ -4064,14 +4027,14 @@ ACL_EXPORT CL_API_ENTRY cl_int CL_API_CALL clGetPipeInfoIntelFPGA(
 
   if (result.size == 0) {
     // We didn't implement the enum. Error out semi-gracefully.
-    UNLOCK_RETURN(CL_INVALID_VALUE);
+    return CL_INVALID_VALUE;
   }
 
   if (param_value) {
     // Actually try to return the string.
     if (param_value_size < result.size) {
       // Buffer is too small to hold the return value.
-      UNLOCK_RETURN(CL_INVALID_VALUE);
+      return CL_INVALID_VALUE;
     }
     RESULT_COPY(param_value, param_value_size);
   }
@@ -4079,7 +4042,7 @@ ACL_EXPORT CL_API_ENTRY cl_int CL_API_CALL clGetPipeInfoIntelFPGA(
   if (param_value_size_ret) {
     *param_value_size_ret = result.size;
   }
-  UNLOCK_RETURN(CL_SUCCESS);
+  return CL_SUCCESS;
 }
 
 ACL_EXPORT CL_API_ENTRY cl_int CL_API_CALL
@@ -4101,35 +4064,34 @@ ACL_EXPORT CL_API_ENTRY cl_int CL_API_CALL clEnqueueMigrateMemObjectsIntelFPGA(
   unsigned int mem_id;
   int *needs_release_on_fail;
 
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
 
   if (!acl_command_queue_is_valid(command_queue)) {
-    UNLOCK_RETURN(CL_INVALID_COMMAND_QUEUE);
+    return CL_INVALID_COMMAND_QUEUE;
   }
 
   if (num_mem_objects == 0) {
-    UNLOCK_ERR_RET(CL_INVALID_VALUE, command_queue->context,
-                   "Number of memory objects is zero");
+    ERR_RET(CL_INVALID_VALUE, command_queue->context,
+            "Number of memory objects is zero");
   }
   if (mem_objects == 0) {
-    UNLOCK_ERR_RET(CL_INVALID_VALUE, command_queue->context,
-                   "Array of memory objects is NULL");
+    ERR_RET(CL_INVALID_VALUE, command_queue->context,
+            "Array of memory objects is NULL");
   }
 
   for (i = 0; i < num_mem_objects; ++i) {
     if (!acl_mem_is_valid(mem_objects[i])) {
-      UNLOCK_RETURN(CL_INVALID_MEM_OBJECT);
+      return CL_INVALID_MEM_OBJECT;
     }
 
     if (command_queue->context != mem_objects[i]->context) {
-      UNLOCK_RETURN(CL_INVALID_CONTEXT);
+      return CL_INVALID_CONTEXT;
     }
   }
 
   if (flags != 0 && (flags & ~(CL_MIGRATE_MEM_OBJECT_HOST |
                                CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED))) {
-    UNLOCK_ERR_RET(CL_INVALID_VALUE, command_queue->context,
-                   "Invalid flags provided");
+    ERR_RET(CL_INVALID_VALUE, command_queue->context, "Invalid flags provided");
   }
 
   physical_id = command_queue->device->def.physical_device_id;
@@ -4138,8 +4100,8 @@ ACL_EXPORT CL_API_ENTRY cl_int CL_API_CALL clEnqueueMigrateMemObjectsIntelFPGA(
   int tmp_mem_id =
       acl_get_default_device_global_memory(command_queue->device->def);
   if (tmp_mem_id < 0) {
-    UNLOCK_ERR_RET(CL_OUT_OF_RESOURCES, command_queue->context,
-                   "Can not find default global memory system");
+    ERR_RET(CL_OUT_OF_RESOURCES, command_queue->context,
+            "Can not find default global memory system");
   }
   mem_id = (unsigned int)tmp_mem_id;
 
@@ -4180,7 +4142,7 @@ ACL_EXPORT CL_API_ENTRY cl_int CL_API_CALL clEnqueueMigrateMemObjectsIntelFPGA(
       mem_objects[i]->reserved_allocations_count[physical_id][mem_id]--;
     }
     free(needs_release_on_fail);
-    UNLOCK_RETURN(status);
+    return status;
   }
 
   // All space is reserved, create an event/command to actually move the data at
@@ -4191,7 +4153,7 @@ ACL_EXPORT CL_API_ENTRY cl_int CL_API_CALL clEnqueueMigrateMemObjectsIntelFPGA(
 
   if (status != CL_SUCCESS) {
     free(needs_release_on_fail);
-    UNLOCK_RETURN(status); // already signalled callback
+    return status; // already signalled callback
   }
 
   local_event->cmd.info.memory_migration.num_mem_objects = num_mem_objects;
@@ -4203,7 +4165,7 @@ ACL_EXPORT CL_API_ENTRY cl_int CL_API_CALL clEnqueueMigrateMemObjectsIntelFPGA(
             num_mem_objects * sizeof(acl_mem_migrate_wrapper_t));
 
     if (!new_src_mem_list) {
-      UNLOCK_RETURN(CL_OUT_OF_RESOURCES);
+      return CL_OUT_OF_RESOURCES;
     }
 
     local_event->cmd.info.memory_migration.src_mem_list = new_src_mem_list;
@@ -4213,7 +4175,7 @@ ACL_EXPORT CL_API_ENTRY cl_int CL_API_CALL clEnqueueMigrateMemObjectsIntelFPGA(
             num_mem_objects * sizeof(acl_mem_migrate_wrapper_t));
 
     if (!local_event->cmd.info.memory_migration.src_mem_list) {
-      UNLOCK_RETURN(CL_OUT_OF_RESOURCES);
+      return CL_OUT_OF_RESOURCES;
     }
   }
 
@@ -4238,7 +4200,7 @@ ACL_EXPORT CL_API_ENTRY cl_int CL_API_CALL clEnqueueMigrateMemObjectsIntelFPGA(
 
   free(needs_release_on_fail);
 
-  UNLOCK_RETURN(CL_SUCCESS);
+  return CL_SUCCESS;
 }
 
 ACL_EXPORT CL_API_ENTRY cl_int CL_API_CALL clEnqueueMigrateMemObjects(
@@ -4592,10 +4554,10 @@ void acl_mem_destructor_callback(cl_mem memobj) {
     cb_head = cb_head->next;
     memobj->destructor_callback_list = cb_head;
     acl_free(temp);
-
-    lock_count = acl_suspend_lock();
-    mem_destructor_notify_fn(memobj, notify_user_data);
-    acl_resume_lock(lock_count);
+    {
+      acl_suspend_lock_guard l(acl_mutex_wrapper);
+      mem_destructor_notify_fn(memobj, notify_user_data);
+    }
   }
 }
 
diff --git a/src/acl_platform.cpp b/src/acl_platform.cpp
index 0a6ba6e5..687aa093 100644
--- a/src/acl_platform.cpp
+++ b/src/acl_platform.cpp
@@ -89,7 +89,7 @@ ACL_EXPORT
 CL_API_ENTRY cl_int CL_API_CALL
 clGetPlatformIDsIntelFPGA(cl_uint num_entries, cl_platform_id *platforms,
                           cl_uint *num_platforms_ret) {
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
 
   // Set this in case of early return due to error in other arguments.
   if (num_platforms_ret) {
@@ -97,10 +97,10 @@ clGetPlatformIDsIntelFPGA(cl_uint num_entries, cl_platform_id *platforms,
   }
 
   if (platforms && num_entries <= 0) {
-    UNLOCK_RETURN(CL_INVALID_VALUE);
+    return CL_INVALID_VALUE;
   }
   if (num_platforms_ret == 0 && platforms == 0) {
-    UNLOCK_RETURN(CL_INVALID_VALUE);
+    return CL_INVALID_VALUE;
   }
 
   // We want to support two kinds of flows:
@@ -133,21 +133,21 @@ clGetPlatformIDsIntelFPGA(cl_uint num_entries, cl_platform_id *platforms,
     // acl_platform.initialized = 1.
     result = acl_init_from_hal_discovery();
     if (!result) {
-      UNLOCK_RETURN(CL_PLATFORM_NOT_FOUND_KHR);
+      return CL_PLATFORM_NOT_FOUND_KHR;
     }
   }
   if (!acl_get_hal()) {
-    UNLOCK_RETURN(CL_PLATFORM_NOT_FOUND_KHR);
+    return CL_PLATFORM_NOT_FOUND_KHR;
   }
   if (!acl_platform.initialized) {
-    UNLOCK_RETURN(CL_PLATFORM_NOT_FOUND_KHR);
+    return CL_PLATFORM_NOT_FOUND_KHR;
   }
 
   // Return some data
   if (platforms) {
     platforms[0] = &acl_platform;
   }
-  UNLOCK_RETURN(CL_SUCCESS);
+  return CL_SUCCESS;
 }
 
 ACL_EXPORT
@@ -170,14 +170,14 @@ CL_API_ENTRY cl_int CL_API_CALL clGetPlatformInfoIntelFPGA(
     size_t param_value_size, void *param_value, size_t *param_value_size_ret) {
   const char *str = 0;
   size_t result_len;
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
 
   if (!acl_platform_is_valid(platform)) {
-    UNLOCK_RETURN(CL_INVALID_PLATFORM);
+    return CL_INVALID_PLATFORM;
   }
 
-  UNLOCK_VALIDATE_ARRAY_OUT_ARGS(param_value_size, param_value,
-                                 param_value_size_ret, 0);
+  VALIDATE_ARRAY_OUT_ARGS(param_value_size, param_value, param_value_size_ret,
+                          0);
 
   switch (param_name) {
   // We don't offer an online compiler.
@@ -200,7 +200,7 @@ CL_API_ENTRY cl_int CL_API_CALL clGetPlatformInfoIntelFPGA(
     str = acl_platform.suffix;
     break;
   default:
-    UNLOCK_RETURN(CL_INVALID_VALUE);
+    return CL_INVALID_VALUE;
     break;
   }
   assert(str);
@@ -210,7 +210,7 @@ CL_API_ENTRY cl_int CL_API_CALL clGetPlatformInfoIntelFPGA(
     // Actually try to return the string.
     if (param_value_size < result_len) {
       // Buffer is too small to hold the return value.
-      UNLOCK_RETURN(CL_INVALID_VALUE);
+      return CL_INVALID_VALUE;
     }
     strncpy((char *)param_value, str, result_len);
   }
@@ -219,7 +219,7 @@ CL_API_ENTRY cl_int CL_API_CALL clGetPlatformInfoIntelFPGA(
     *param_value_size_ret = result_len;
   }
 
-  UNLOCK_RETURN(CL_SUCCESS);
+  return CL_SUCCESS;
 }
 
 ACL_EXPORT
@@ -234,13 +234,13 @@ ACL_EXPORT
 CL_API_ENTRY cl_int CL_API_CALL
 clUnloadPlatformCompilerIntelFPGA(cl_platform_id platform) {
   // Not fully implemented yet.
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
   if (!acl_platform_is_valid(platform)) {
-    UNLOCK_RETURN(CL_INVALID_PLATFORM);
+    return CL_INVALID_PLATFORM;
   }
   // For the sake of MSVC compiler warnings.
   // We don't have any platform compilers, so unloading is successful!
-  UNLOCK_RETURN(CL_SUCCESS);
+  return CL_SUCCESS;
 }
 
 ACL_EXPORT
@@ -896,8 +896,9 @@ void acl_receive_device_exception(unsigned physical_device_id,
   // acl_assert_locked_or_sig() instead of just acl_assert_locked().
   CL_EXCEPTION_TYPE_INTEL current_exception, listen_mask;
 
+  std::unique_lock lock{acl_mutex_wrapper, std::defer_lock};
   if (!acl_is_inside_sig()) {
-    acl_lock();
+    lock.lock();
   }
   current_exception =
       acl_platform.device[physical_device_id].device_exception_status;
@@ -950,22 +951,16 @@ void acl_receive_device_exception(unsigned physical_device_id,
     // callback
     acl_signal_device_update();
   }
-
-  if (!acl_is_inside_sig()) {
-    acl_unlock();
-  }
 }
 
 ACL_EXPORT
 CL_API_ENTRY void CL_API_CALL
 clTrackLiveObjectsIntelFPGA(cl_platform_id platform) {
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
 
   if (platform == &acl_platform) {
     acl_platform.track_leaked_objects = 1;
   }
-
-  acl_unlock();
 }
 
 ACL_EXPORT
@@ -973,7 +968,7 @@ CL_API_ENTRY void CL_API_CALL clReportLiveObjectsIntelFPGA(
     cl_platform_id platform,
     void(CL_CALLBACK *report_fn)(void *, void *, const char *, cl_uint),
     void *user_data) {
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
 
   if (platform == &acl_platform) {
     acl_cl_object_node_t *node = acl_platform.cl_obj_head;
@@ -1010,16 +1005,15 @@ CL_API_ENTRY void CL_API_CALL clReportLiveObjectsIntelFPGA(
       }
       if (report_fn) {
         void *object = node->object;
-        int lock_count = acl_suspend_lock();
-        report_fn(user_data, object, name, refcount);
-        acl_resume_lock(lock_count);
+        {
+          acl_suspend_lock_guard l(acl_mutex_wrapper);
+          report_fn(user_data, object, name, refcount);
+        }
       }
 
       node = next;
     }
   }
-
-  acl_unlock();
 }
 
 #ifdef __GNUC__
diff --git a/src/acl_printf.cpp b/src/acl_printf.cpp
index fa66a8b7..60280fcf 100644
--- a/src/acl_printf.cpp
+++ b/src/acl_printf.cpp
@@ -997,8 +997,9 @@ void acl_schedule_printf_buffer_pickup(int activation_id, int size,
   // signal handler, which can't lock mutexes, so we don't lock in that case.
   // All functions called from this one therefore have to use
   // acl_assert_locked_or_sig() instead of just acl_assert_locked().
+  std::unique_lock lock{acl_mutex_wrapper, std::defer_lock};
   if (!acl_is_inside_sig()) {
-    acl_lock();
+    lock.lock();
   }
 
 #ifdef DEBUG
@@ -1016,10 +1017,6 @@ void acl_schedule_printf_buffer_pickup(int activation_id, int size,
   }
   // Signal all waiters.
   acl_signal_device_update();
-
-  if (!acl_is_inside_sig()) {
-    acl_unlock();
-  }
 }
 
 void acl_process_printf_buffer(void *user_data, acl_device_op_t *op) {
diff --git a/src/acl_profiler.cpp b/src/acl_profiler.cpp
index 22079c84..b5a090ae 100644
--- a/src/acl_profiler.cpp
+++ b/src/acl_profiler.cpp
@@ -293,8 +293,9 @@ int write_profile_info_to_file(unsigned num_profile_counters,
   if (!profile_enable)
     return 0;
 
+  std::unique_lock lock{acl_mutex_wrapper, std::defer_lock};
   if (!acl_is_inside_sig()) {
-    acl_lock();
+    lock.lock();
   }
 
   acl_open_profiler_file();
@@ -303,9 +304,6 @@ int write_profile_info_to_file(unsigned num_profile_counters,
   if (opened_count < 1) {
     acl_print_debug_msg("Profiler output file is not opened: " STR(
         ACL_PROFILER_OUTPUT_FILENAME) "\n");
-    if (!acl_is_inside_sig()) {
-      acl_unlock();
-    }
     return 0;
   }
 
@@ -339,17 +337,8 @@ int write_profile_info_to_file(unsigned num_profile_counters,
   if (bytes_written < 0 ||
       (unsigned long int)bytes_written != temp_buf.size()) {
     acl_print_debug_msg("Could not write profile data to file!\n");
-
-    if (!acl_is_inside_sig()) {
-      acl_unlock();
-    }
     return 0;
   }
-
-  if (!acl_is_inside_sig()) {
-    acl_unlock();
-  }
-
   return 1;
 }
 
@@ -467,9 +456,8 @@ unsigned long is_profile_enabled() { return profile_enable; }
 unsigned long is_profile_timer_on() { return profile_timer_on; }
 
 void acl_set_autorun_start_time() {
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
   autorun_start_time = acl_get_hal()->get_timestamp();
-  acl_unlock();
 }
 
 CL_API_ENTRY cl_int CL_API_CALL clGetProfileInfoIntelFPGA(cl_event event) {
@@ -482,16 +470,16 @@ CL_API_ENTRY cl_int CL_API_CALL clGetProfileInfoIntelFPGA(cl_event event) {
   int i;
   _cl_command_queue *command_queue;
   cl_device_id device_id;
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
 
   if (!acl_event_is_valid(event)) {
     acl_print_debug_msg("clGetProfileInfoIntelFPGA is called for NULL event\n");
-    UNLOCK_RETURN(CL_INVALID_EVENT);
+    return CL_INVALID_EVENT;
   }
   if (event->execution_status != CL_RUNNING) {
     acl_print_debug_msg(
         "clGetProfileInfoIntelFPGA is called for non-running event\n");
-    UNLOCK_RETURN(CL_INVALID_EVENT);
+    return CL_INVALID_EVENT;
   }
 
   context = event->context;
@@ -499,29 +487,27 @@ CL_API_ENTRY cl_int CL_API_CALL clGetProfileInfoIntelFPGA(cl_event event) {
   if (!acl_context_is_valid(context)) {
     acl_print_debug_msg(
         "clGetProfileInfoIntelFPGA is called for NULL context\n");
-    UNLOCK_RETURN(CL_INVALID_CONTEXT);
+    return CL_INVALID_CONTEXT;
   }
 
   command_queue = event->command_queue;
 
   if (!acl_command_queue_is_valid(command_queue)) {
-    UNLOCK_ERR_RET(
-        CL_INVALID_COMMAND_QUEUE, context,
-        "clGetProfileInfoIntelFPGA is called for NULL command_queue");
+    ERR_RET(CL_INVALID_COMMAND_QUEUE, context,
+            "clGetProfileInfoIntelFPGA is called for NULL command_queue");
   }
 
   device_id = command_queue->device;
 
   if (!acl_device_is_valid(device_id)) {
-    UNLOCK_ERR_RET(CL_INVALID_DEVICE, context,
-                   "clGetProfileInfoIntelFPGA is called for NULL device_id");
+    ERR_RET(CL_INVALID_DEVICE, context,
+            "clGetProfileInfoIntelFPGA is called for NULL device_id");
   }
 
   profile_data = 0;
   kernel = event->cmd.info.ndrange_kernel.kernel;
   if (!acl_kernel_is_valid(kernel)) {
-    UNLOCK_ERR_RET(CL_INVALID_KERNEL, context,
-                   "Invalid kernel attached to event");
+    ERR_RET(CL_INVALID_KERNEL, context, "Invalid kernel attached to event");
   }
 
   // use autodiscovery info to find out how many words will be read from the
@@ -531,8 +517,7 @@ CL_API_ENTRY cl_int CL_API_CALL clGetProfileInfoIntelFPGA(cl_event event) {
   if (num_profile_counters == 0) {
     // there is not profiler data and we are not printing timers
     // nothing to print
-    UNLOCK_ERR_RET(CL_PROFILING_INFO_NOT_AVAILABLE, context,
-                   "No profile information");
+    ERR_RET(CL_PROFILING_INFO_NOT_AVAILABLE, context, "No profile information");
   }
 
   // this kernel has profiling data, get it
@@ -569,11 +554,10 @@ CL_API_ENTRY cl_int CL_API_CALL clGetProfileInfoIntelFPGA(cl_event event) {
           profile_data, ACL_DEVICE_OP_KERNEL,
           (unsigned long long)event->timestamp[CL_RUNNING],
           (unsigned long long)0, curr_shared_counters)) {
-    UNLOCK_ERR_RET(CL_OUT_OF_RESOURCES, context,
-                   "Unabled to dump profile data");
+    ERR_RET(CL_OUT_OF_RESOURCES, context, "Unabled to dump profile data");
   }
 
-  UNLOCK_RETURN(CL_SUCCESS);
+  return CL_SUCCESS;
 }
 
 CL_API_ENTRY cl_int CL_API_CALL clGetProfileDataDeviceIntelFPGA(
@@ -597,16 +581,16 @@ CL_API_ENTRY cl_int CL_API_CALL clGetProfileDataDeviceIntelFPGA(
   param_value_size_ret = param_value_size_ret;
   errcode_ret = errcode_ret;
 
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
 
   // Check if valid device_id
   if (!acl_device_is_valid(device_id)) {
-    UNLOCK_RETURN(CL_INVALID_DEVICE);
+    return CL_INVALID_DEVICE;
   }
 
   // Check if valid program
   if (!acl_program_is_valid(program)) {
-    UNLOCK_RETURN(CL_INVALID_PROGRAM);
+    return CL_INVALID_PROGRAM;
   }
 
   // If program is valid, then context is valid because acl_program_is_valid
@@ -628,7 +612,7 @@ CL_API_ENTRY cl_int CL_API_CALL clGetProfileDataDeviceIntelFPGA(
           ACL_PROFILE_AUTORUN_KERNEL_NAME) " for autorun profiling. Make sure "
                                            "the .aocx was compiled with "
                                            "autorun kernel profiling enabled";
-      UNLOCK_ERR_RET(status, program->context, message);
+      ERR_RET(status, program->context, message);
     }
 
     // use autodiscovery info to find out how many words will be read from the
@@ -642,8 +626,7 @@ CL_API_ENTRY cl_int CL_API_CALL clGetProfileDataDeviceIntelFPGA(
       const char *message = "No profile information for kernel " STR(
           ACL_PROFILE_AUTORUN_KERNEL_NAME) " for reading back autorun profile "
                                            "data";
-      UNLOCK_ERR_RET(CL_PROFILING_INFO_NOT_AVAILABLE, program->context,
-                     message);
+      ERR_RET(CL_PROFILING_INFO_NOT_AVAILABLE, program->context, message);
     } else {
       uint64_t *readback_profile_data;
       readback_profile_data = (uint64_t *)acl_malloc(
@@ -677,12 +660,12 @@ CL_API_ENTRY cl_int CL_API_CALL clGetProfileDataDeviceIntelFPGA(
               device_id, context, accel_def->iface.name.c_str(),
               readback_profile_data, ACL_DEVICE_OP_KERNEL, autorun_start_time,
               profiled_time, num_profile_counters, curr_shared_counters)) {
-        UNLOCK_RETURN(CL_OUT_OF_RESOURCES);
+        return CL_OUT_OF_RESOURCES;
       }
     }
   }
 
-  UNLOCK_RETURN(CL_SUCCESS);
+  return CL_SUCCESS;
 }
 
 int acl_process_autorun_profiler_scan_chain(unsigned int physical_device_id,
@@ -694,25 +677,20 @@ int acl_process_autorun_profiler_scan_chain(unsigned int physical_device_id,
   uint64_t *profile_data = nullptr;
   std::string name = "";
 
+  std::unique_lock lock{acl_mutex_wrapper, std::defer_lock};
   if (!acl_is_inside_sig()) {
-    acl_lock();
+    lock.lock();
   }
 
   const acl_device_binary_t *binary =
       acl_get_platform()->device[physical_device_id].loaded_bin;
   if (binary == nullptr) {
-    if (!acl_is_inside_sig()) {
-      acl_unlock();
-    }
     return 0;
   }
   const acl_accel_def_t *accel_def =
       binary->get_dev_prog()->get_kernel_accel_def(
           ACL_PROFILE_AUTORUN_KERNEL_NAME);
   if (accel_def == nullptr) {
-    if (!acl_is_inside_sig()) {
-      acl_unlock();
-    }
     return 0;
   }
 
@@ -730,9 +708,6 @@ int acl_process_autorun_profiler_scan_chain(unsigned int physical_device_id,
                                     num_profile_counters);
   } else {
     // There is no profiler data - nothing to print
-    if (!acl_is_inside_sig()) {
-      acl_unlock();
-    }
     return 0;
   }
 
@@ -750,9 +725,6 @@ int acl_process_autorun_profiler_scan_chain(unsigned int physical_device_id,
     acl_free(profile_data);
   }
 
-  if (!acl_is_inside_sig()) {
-    acl_unlock();
-  }
   return 1;
 }
 
@@ -770,8 +742,9 @@ int acl_process_profiler_scan_chain(acl_device_op_t *op) {
   cl_command_type op_type;
   _cl_command_queue *command_queue;
   cl_device_id device_id;
+  std::unique_lock lock{acl_mutex_wrapper, std::defer_lock};
   if (!acl_is_inside_sig()) {
-    acl_lock();
+    lock.lock();
   }
 
   char name[MAX_NAME_SIZE];
@@ -787,25 +760,16 @@ int acl_process_profiler_scan_chain(acl_device_op_t *op) {
     if (!acl_event_is_valid(event)) {
       acl_print_debug_msg(
           "acl_process_profiler_scan_chain is called for an invalid event\n");
-      if (!acl_is_inside_sig()) {
-        acl_unlock();
-      }
       return 0;
     }
     if (!acl_command_queue_is_valid(event->command_queue)) {
       acl_print_debug_msg("acl_process_profiler_scan_chain is called for an "
                           "event with an invalid command_queue\n");
-      if (!acl_is_inside_sig()) {
-        acl_unlock();
-      }
       return 0;
     }
     if (acl_event_is_done(event)) {
       acl_print_debug_msg(
           "acl_process_profiler_scan_chain is called for a completed event\n");
-      if (!acl_is_inside_sig()) {
-        acl_unlock();
-      }
       return 0;
     }
   }
@@ -816,18 +780,12 @@ int acl_process_profiler_scan_chain(acl_device_op_t *op) {
   if (!device_id) {
     acl_print_debug_msg(
         "acl_process_profiler_scan_chain is called for NULL device_id\n");
-    if (!acl_is_inside_sig()) {
-      acl_unlock();
-    }
     return 0;
   }
 
   // this is not a kernel event and we are not printing timers
   // so nothing to print
   if (op_type != ACL_DEVICE_OP_KERNEL && profile_timer_on != 1) {
-    if (!acl_is_inside_sig()) {
-      acl_unlock();
-    }
     return 0;
   }
 
@@ -842,9 +800,6 @@ int acl_process_profiler_scan_chain(acl_device_op_t *op) {
     if (num_profile_counters == 0 && profile_timer_on != 1) {
       // there is not profiler data and we are not printing timers
       // nothing to print
-      if (!acl_is_inside_sig()) {
-        acl_unlock();
-      }
       return 0;
     }
 
@@ -867,9 +822,6 @@ int acl_process_profiler_scan_chain(acl_device_op_t *op) {
   } else if (profile_timer_on != 1) {
     // if ACL_PROFILE_TIMER is not set, do not print info about the rest of
     // the events
-    if (!acl_is_inside_sig()) {
-      acl_unlock();
-    }
     return 0;
   } else if (op_type == ACL_DEVICE_OP_MEM_TRANSFER_COPY) {
     snprintf(name, MAX_NAME_SIZE, ".mem_transfer_copy");
@@ -887,9 +839,6 @@ int acl_process_profiler_scan_chain(acl_device_op_t *op) {
     // Ignore unknown op_type (don't attempt to extract any profiling from it or
     // get timestamps)
     acl_print_debug_msg("Unknown device op type: '%d'\n", int(op_type));
-    if (!acl_is_inside_sig()) {
-      acl_unlock();
-    }
     return 0;
   }
 
@@ -916,9 +865,6 @@ int acl_process_profiler_scan_chain(acl_device_op_t *op) {
     dump_profile_buffer_to_file();
   }
 
-  if (!acl_is_inside_sig()) {
-    acl_unlock();
-  }
   return 1;
 }
 
diff --git a/src/acl_program.cpp b/src/acl_program.cpp
index d7a55f7d..00201865 100644
--- a/src/acl_program.cpp
+++ b/src/acl_program.cpp
@@ -112,12 +112,12 @@ l_device_memory_definition_copy(acl_device_def_autodiscovery_t *dest_dev,
 
 ACL_EXPORT
 CL_API_ENTRY cl_int CL_API_CALL clRetainProgramIntelFPGA(cl_program program) {
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
   if (!acl_program_is_valid(program)) {
-    UNLOCK_RETURN(CL_INVALID_PROGRAM);
+    return CL_INVALID_PROGRAM;
   }
   acl_retain(program);
-  UNLOCK_RETURN(CL_SUCCESS);
+  return CL_SUCCESS;
 }
 
 ACL_EXPORT
@@ -127,9 +127,9 @@ CL_API_ENTRY cl_int CL_API_CALL clRetainProgram(cl_program program) {
 
 ACL_EXPORT
 CL_API_ENTRY cl_int CL_API_CALL clReleaseProgramIntelFPGA(cl_program program) {
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
   if (!acl_program_is_valid(program)) {
-    UNLOCK_RETURN(CL_INVALID_PROGRAM);
+    return CL_INVALID_PROGRAM;
   }
   acl_release(program);
   if (!acl_ref_count(program)) {
@@ -145,7 +145,7 @@ CL_API_ENTRY cl_int CL_API_CALL clReleaseProgramIntelFPGA(cl_program program) {
     if (program)
       l_free_program(program);
   }
-  UNLOCK_RETURN(CL_SUCCESS);
+  return CL_SUCCESS;
 }
 
 ACL_EXPORT
@@ -172,28 +172,28 @@ CL_API_ENTRY cl_program CL_API_CALL clCreateProgramWithSourceIntelFPGA(
   int pass;
   cl_program program = 0;
   struct acl_file_handle_t *capture_fp = NULL;
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
 
   if (!acl_context_is_valid(context))
-    UNLOCK_BAIL(CL_INVALID_CONTEXT);
+    BAIL(CL_INVALID_CONTEXT);
 
   if (count == 0) {
-    UNLOCK_BAIL_INFO(CL_INVALID_VALUE, context, "Count parameter is zero");
+    BAIL_INFO(CL_INVALID_VALUE, context, "Count parameter is zero");
   }
   if (strings == 0) {
-    UNLOCK_BAIL_INFO(CL_INVALID_VALUE, context, "No source strings specified");
+    BAIL_INFO(CL_INVALID_VALUE, context, "No source strings specified");
   }
   for (i = 0; i < count; i++) {
     if (strings[i] == 0) {
-      UNLOCK_BAIL_INFO(CL_INVALID_VALUE, context, "A string pointers is NULL");
+      BAIL_INFO(CL_INVALID_VALUE, context, "A string pointers is NULL");
     }
   }
 
   // Go ahead and allocate it.
   program = acl_alloc_cl_program();
   if (program == 0) {
-    UNLOCK_BAIL_INFO(CL_OUT_OF_HOST_MEMORY, context,
-                     "Could not allocate a program object");
+    BAIL_INFO(CL_OUT_OF_HOST_MEMORY, context,
+              "Could not allocate a program object");
   }
 
   l_init_program(program, context);
@@ -233,8 +233,8 @@ CL_API_ENTRY cl_program CL_API_CALL clCreateProgramWithSourceIntelFPGA(
         if (capture_fp) {
           acl_fclose(capture_fp);
         }
-        UNLOCK_BAIL_INFO(CL_OUT_OF_HOST_MEMORY, context,
-                         "Could not allocate memory to store program source");
+        BAIL_INFO(CL_OUT_OF_HOST_MEMORY, context,
+                  "Could not allocate memory to store program source");
       }
       program->source_text = buffer;
     }
@@ -273,7 +273,7 @@ CL_API_ENTRY cl_program CL_API_CALL clCreateProgramWithSourceIntelFPGA(
 
   acl_track_object(ACL_OBJ_PROGRAM, program);
 
-  UNLOCK_RETURN(program);
+  return program;
 }
 
 ACL_EXPORT
@@ -292,38 +292,38 @@ CL_API_ENTRY cl_program CL_API_CALL clCreateProgramWithBinaryIntelFPGA(
   cl_uint i;
   cl_uint idev;
   cl_program program = 0;
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
 
   if (!acl_context_is_valid(context))
-    UNLOCK_BAIL(CL_INVALID_CONTEXT);
+    BAIL(CL_INVALID_CONTEXT);
 
   if (num_devices == 0 || device_list == 0) {
-    UNLOCK_BAIL_INFO(CL_INVALID_VALUE, context, "Invalid device list");
+    BAIL_INFO(CL_INVALID_VALUE, context, "Invalid device list");
   }
 
   for (i = 0; i < num_devices; i++) {
     if (!acl_device_is_valid(device_list[i])) {
-      UNLOCK_BAIL_INFO(CL_INVALID_DEVICE, context, "Invalid device");
+      BAIL_INFO(CL_INVALID_DEVICE, context, "Invalid device");
     }
     if (!acl_context_uses_device(context, device_list[i])) {
-      UNLOCK_BAIL_INFO(CL_INVALID_DEVICE, context,
-                       "Device is not associated with the context");
+      BAIL_INFO(CL_INVALID_DEVICE, context,
+                "Device is not associated with the context");
     }
     if (lengths[i] == 0 || binaries[i] == 0) {
       if (binary_status) {
         binary_status[i] = CL_INVALID_VALUE;
       }
-      UNLOCK_BAIL_INFO(CL_INVALID_VALUE, context,
-                       lengths[i] == 0 ? "A binary length is zero"
-                                       : "A binary pointer is NULL");
+      BAIL_INFO(CL_INVALID_VALUE, context,
+                lengths[i] == 0 ? "A binary length is zero"
+                                : "A binary pointer is NULL");
     }
   }
 
   // Go ahead and allocate it.
   program = acl_alloc_cl_program();
   if (program == 0) {
-    UNLOCK_BAIL_INFO(CL_OUT_OF_HOST_MEMORY, context,
-                     "Could not allocate a program object");
+    BAIL_INFO(CL_OUT_OF_HOST_MEMORY, context,
+              "Could not allocate a program object");
   }
 
   l_init_program(program, context);
@@ -347,7 +347,7 @@ CL_API_ENTRY cl_program CL_API_CALL clCreateProgramWithBinaryIntelFPGA(
             if (binary_status) {
               binary_status[idev] = CL_INVALID_BINARY;
             }
-            UNLOCK_BAIL_INFO(CL_INVALID_BINARY, context, "Invalid binary");
+            BAIL_INFO(CL_INVALID_BINARY, context, "Invalid binary");
           }
         } else {
           assert(context->uses_dynamic_sysdef);
@@ -384,7 +384,7 @@ CL_API_ENTRY cl_program CL_API_CALL clCreateProgramWithBinaryIntelFPGA(
                 if (binary_status) {
                   binary_status[idev] = CL_INVALID_BINARY;
                 }
-                UNLOCK_BAIL_INFO(CL_INVALID_BINARY, context, "Invalid binary");
+                BAIL_INFO(CL_INVALID_BINARY, context, "Invalid binary");
               }
 
               // Need to unload the binary and only load it on an as needed
@@ -410,8 +410,8 @@ CL_API_ENTRY cl_program CL_API_CALL clCreateProgramWithBinaryIntelFPGA(
       if (binary_status) {
         binary_status[idev] = CL_INVALID_VALUE;
       }
-      UNLOCK_BAIL_INFO(CL_OUT_OF_HOST_MEMORY, context,
-                       "Could not allocate memory to store program binaries");
+      BAIL_INFO(CL_OUT_OF_HOST_MEMORY, context,
+                "Could not allocate memory to store program binaries");
     }
 
     // Wait to set status until after failures may have occurred for this
@@ -431,7 +431,7 @@ CL_API_ENTRY cl_program CL_API_CALL clCreateProgramWithBinaryIntelFPGA(
 
   l_try_to_eagerly_program_device(program);
 
-  UNLOCK_RETURN(program);
+  return program;
 }
 
 ACL_EXPORT
@@ -453,40 +453,40 @@ clCreateProgramWithBinaryAndProgramDeviceIntelFPGA(
   cl_uint i;
   cl_uint idev;
   cl_program program = 0;
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
   if (!acl_context_is_valid(context))
-    UNLOCK_BAIL(CL_INVALID_CONTEXT);
+    BAIL(CL_INVALID_CONTEXT);
 
   // split_kernel mode is not supported in this special extension API which is
   // not part of the OpenCL standard.
   assert(context->split_kernel == 0);
 
   if (num_devices == 0 || device_list == 0) {
-    UNLOCK_BAIL_INFO(CL_INVALID_VALUE, context, "Invalid device list");
+    BAIL_INFO(CL_INVALID_VALUE, context, "Invalid device list");
   }
 
   for (i = 0; i < num_devices; i++) {
     if (!acl_device_is_valid(device_list[i])) {
-      UNLOCK_BAIL_INFO(CL_INVALID_DEVICE, context, "Invalid device");
+      BAIL_INFO(CL_INVALID_DEVICE, context, "Invalid device");
     }
     if (!acl_context_uses_device(context, device_list[i])) {
-      UNLOCK_BAIL_INFO(CL_INVALID_DEVICE, context,
-                       "Device is not associated with the context");
+      BAIL_INFO(CL_INVALID_DEVICE, context,
+                "Device is not associated with the context");
     }
     if (lengths[i] == 0 || binaries[i] == 0) {
       if (binary_status) {
         binary_status[i] = CL_INVALID_VALUE;
       }
-      UNLOCK_BAIL_INFO(CL_INVALID_VALUE, context,
-                       lengths[i] == 0 ? "A binary length is zero"
-                                       : "A binary pointer is NULL");
+      BAIL_INFO(CL_INVALID_VALUE, context,
+                lengths[i] == 0 ? "A binary length is zero"
+                                : "A binary pointer is NULL");
     }
   }
 
   program = acl_alloc_cl_program();
   if (program == 0) {
-    UNLOCK_BAIL_INFO(CL_OUT_OF_HOST_MEMORY, context,
-                     "Could not allocate a program object");
+    BAIL_INFO(CL_OUT_OF_HOST_MEMORY, context,
+              "Could not allocate a program object");
   }
 
   l_init_program(program, context);
@@ -509,7 +509,7 @@ clCreateProgramWithBinaryAndProgramDeviceIntelFPGA(
           if (binary_status) {
             binary_status[idev] = CL_INVALID_BINARY;
           }
-          UNLOCK_BAIL_INFO(CL_INVALID_BINARY, context, "Invalid binary");
+          BAIL_INFO(CL_INVALID_BINARY, context, "Invalid binary");
         }
       } else {
         // Copy memory definition from initial device def to program in
@@ -526,8 +526,8 @@ clCreateProgramWithBinaryAndProgramDeviceIntelFPGA(
       if (binary_status) {
         binary_status[idev] = CL_INVALID_VALUE;
       }
-      UNLOCK_BAIL_INFO(CL_OUT_OF_HOST_MEMORY, context,
-                       "Could not allocate memory to store program binaries");
+      BAIL_INFO(CL_OUT_OF_HOST_MEMORY, context,
+                "Could not allocate memory to store program binaries");
     }
 
     // Wait to set status until after failures may have occurred for this
@@ -583,16 +583,16 @@ clCreateProgramWithBinaryAndProgramDeviceIntelFPGA(
         acl_program_device(NULL, &reprogram_op);
 
         if (reprogram_op.execution_status != CL_SUCCESS) {
-          UNLOCK_BAIL_INFO(CL_DEVICE_NOT_AVAILABLE, context,
-                           "Reprogram of device failed");
+          BAIL_INFO(CL_DEVICE_NOT_AVAILABLE, context,
+                    "Reprogram of device failed");
         }
 
       } else {
-        UNLOCK_BAIL_INFO(CL_OUT_OF_HOST_MEMORY, context, "Invalid binary");
+        BAIL_INFO(CL_OUT_OF_HOST_MEMORY, context, "Invalid binary");
       }
     } else {
-      UNLOCK_BAIL_INFO(CL_BUILD_PROGRAM_FAILURE, context,
-                       "Program is not built correctly");
+      BAIL_INFO(CL_BUILD_PROGRAM_FAILURE, context,
+                "Program is not built correctly");
     }
   }
 
@@ -600,24 +600,29 @@ clCreateProgramWithBinaryAndProgramDeviceIntelFPGA(
     *errcode_ret = CL_SUCCESS;
   }
 
-  UNLOCK_RETURN(program);
+  return program;
 }
 
 ACL_EXPORT
 CL_API_ENTRY cl_program CL_API_CALL clCreateProgramWithBuiltInKernelsIntelFPGA(
     cl_context context, cl_uint num_devices, const cl_device_id *device_list,
     const char *kernel_names, cl_int *errcode_ret) {
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
 
   if (!acl_context_is_valid(context))
-    UNLOCK_BAIL(CL_INVALID_CONTEXT);
+    BAIL(CL_INVALID_CONTEXT);
 
   if (num_devices == 0 || device_list == 0) {
-    UNLOCK_BAIL_INFO(CL_INVALID_VALUE, context, "Invalid device list");
+    BAIL_INFO(CL_INVALID_VALUE, context, "Invalid device list");
   }
 
   if (kernel_names == NULL) {
-    UNLOCK_BAIL_INFO(CL_INVALID_VALUE, context, "kernel_names is NULL");
+    BAIL_INFO(CL_INVALID_VALUE, context, "kernel_names is NULL");
+  }
+
+  if (num_devices >= ACL_MAX_DEVICE) {
+    BAIL_INFO(CL_INVALID_VALUE, context,
+              "num_dives specified is great thatn ACL_MAX_DEVICES");
   }
 
   // list of semicolon delimited string of kernel names
@@ -630,12 +635,12 @@ CL_API_ENTRY cl_program CL_API_CALL clCreateProgramWithBuiltInKernelsIntelFPGA(
 
   for (cl_uint i = 0; i < num_devices; i++) {
     if (!acl_device_is_valid(device_list[i])) {
-      UNLOCK_BAIL_INFO(CL_INVALID_DEVICE, context, "Invalid device");
+      BAIL_INFO(CL_INVALID_DEVICE, context, "Invalid device");
     }
 
     if (!acl_context_uses_device(context, device_list[i])) {
-      UNLOCK_BAIL_INFO(CL_INVALID_DEVICE, context,
-                       "Device is not associated with the context");
+      BAIL_INFO(CL_INVALID_DEVICE, context,
+                "Device is not associated with the context");
     }
 
     // make sure current device contains all the builtin kernels
@@ -651,17 +656,17 @@ CL_API_ENTRY cl_program CL_API_CALL clCreateProgramWithBuiltInKernelsIntelFPGA(
         break;
     }
     if (find_count != 0) {
-      UNLOCK_BAIL_INFO(CL_INVALID_VALUE, context,
-                       "kernel_names contains a kernel name that is not "
-                       "supported by all of the devices in device_list");
+      BAIL_INFO(CL_INVALID_VALUE, context,
+                "kernel_names contains a kernel name that is not "
+                "supported by all of the devices in device_list");
     }
   }
 
   // Go ahead and allocate it.
   cl_program program = acl_alloc_cl_program();
   if (program == 0) {
-    UNLOCK_BAIL_INFO(CL_OUT_OF_HOST_MEMORY, context,
-                     "Could not allocate a program object");
+    BAIL_INFO(CL_OUT_OF_HOST_MEMORY, context,
+              "Could not allocate a program object");
   }
 
   l_init_program(program, context);
@@ -676,8 +681,7 @@ CL_API_ENTRY cl_program CL_API_CALL clCreateProgramWithBuiltInKernelsIntelFPGA(
         l_create_dev_prog(program, device_list[idev], 0, NULL);
     if (program->dev_prog[idev]) {
       if (context->programs_devices || context->uses_dynamic_sysdef) {
-        UNLOCK_BAIL_INFO(CL_INVALID_VALUE, context,
-                         "No builtin kernels available\n");
+        BAIL_INFO(CL_INVALID_VALUE, context, "No builtin kernels available\n");
       } else {
 
         // i put this here since dla flow makes call to clGetProgramInfo which
@@ -695,8 +699,8 @@ CL_API_ENTRY cl_program CL_API_CALL clCreateProgramWithBuiltInKernelsIntelFPGA(
     } else {
       // Release all the memory we've allocated.
       l_free_program(program);
-      UNLOCK_BAIL_INFO(CL_OUT_OF_HOST_MEMORY, context,
-                       "Could not allocate memory to store program binaries");
+      BAIL_INFO(CL_OUT_OF_HOST_MEMORY, context,
+                "Could not allocate memory to store program binaries");
     }
   }
 
@@ -710,7 +714,7 @@ CL_API_ENTRY cl_program CL_API_CALL clCreateProgramWithBuiltInKernelsIntelFPGA(
 
   acl_track_object(ACL_OBJ_PROGRAM, program);
 
-  UNLOCK_RETURN(program);
+  return program;
 }
 
 ACL_EXPORT
@@ -727,9 +731,9 @@ CL_API_ENTRY cl_int CL_API_CALL clCompileProgramIntelFPGA(
     const char *options, cl_uint num_input_headers,
     const cl_program *input_headers, const char **header_include_names,
     acl_program_build_notify_fn_t pfn_notify, void *user_data) {
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
   if (!acl_program_is_valid(program))
-    UNLOCK_RETURN(CL_INVALID_PROGRAM);
+    return CL_INVALID_PROGRAM;
 
   // Suppress compiler warnings.
   num_devices = num_devices;
@@ -741,8 +745,8 @@ CL_API_ENTRY cl_int CL_API_CALL clCompileProgramIntelFPGA(
   pfn_notify = pfn_notify;
   user_data = user_data;
 
-  UNLOCK_ERR_RET(CL_COMPILER_NOT_AVAILABLE, program->context,
-                 "Device compiler is not available");
+  ERR_RET(CL_COMPILER_NOT_AVAILABLE, program->context,
+          "Device compiler is not available");
 }
 
 ACL_EXPORT
@@ -762,9 +766,9 @@ CL_API_ENTRY cl_program CL_API_CALL clLinkProgramIntelFPGA(
     const char *options, cl_uint num_input_programs,
     const cl_program *input_programs, acl_program_build_notify_fn_t pfn_notify,
     void *user_data, cl_int *errcode_ret) {
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
   if (!acl_context_is_valid(context))
-    UNLOCK_BAIL(CL_INVALID_CONTEXT);
+    BAIL(CL_INVALID_CONTEXT);
   // For the sake of MSVC compiler warnings.
   num_devices = num_devices;
   device_list = device_list;
@@ -774,8 +778,7 @@ CL_API_ENTRY cl_program CL_API_CALL clLinkProgramIntelFPGA(
   pfn_notify = pfn_notify;
   user_data = user_data;
 
-  UNLOCK_BAIL_INFO(CL_LINKER_NOT_AVAILABLE, context,
-                   "Device linker is not available");
+  BAIL_INFO(CL_LINKER_NOT_AVAILABLE, context, "Device linker is not available");
 }
 
 ACL_EXPORT
@@ -795,14 +798,14 @@ CL_API_ENTRY cl_int CL_API_CALL clGetProgramInfoIntelFPGA(
     void *param_value, size_t *param_value_size_ret) {
   cl_context context;
   acl_result_t result;
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
 
   if (!acl_program_is_valid(program)) {
-    UNLOCK_RETURN(CL_INVALID_PROGRAM);
+    return CL_INVALID_PROGRAM;
   }
   context = program->context;
-  UNLOCK_VALIDATE_ARRAY_OUT_ARGS(param_value_size, param_value,
-                                 param_value_size_ret, context);
+  VALIDATE_ARRAY_OUT_ARGS(param_value_size, param_value, param_value_size_ret,
+                          context);
 
   RESULT_INIT;
 
@@ -838,8 +841,8 @@ CL_API_ENTRY cl_int CL_API_CALL clGetProgramInfoIntelFPGA(
       // They actually want the values
 
       if (param_value_size < (program->num_devices * sizeof(size_t))) {
-        UNLOCK_ERR_RET(CL_INVALID_VALUE, context,
-                       "Parameter return buffer is too small");
+        ERR_RET(CL_INVALID_VALUE, context,
+                "Parameter return buffer is too small");
       }
       for (unsigned i = 0; i < program->num_devices; i++) {
         // program->dev_prog[] could be NULL if a compile failed.
@@ -848,7 +851,7 @@ CL_API_ENTRY cl_int CL_API_CALL clGetProgramInfoIntelFPGA(
             dev_prog ? dev_prog->device_binary.get_binary_len() : 0;
       }
     }
-    UNLOCK_RETURN(CL_SUCCESS);
+    return CL_SUCCESS;
   }
 
   case CL_PROGRAM_BINARIES: {
@@ -863,8 +866,8 @@ CL_API_ENTRY cl_int CL_API_CALL clGetProgramInfoIntelFPGA(
       // They actually want the values
       unsigned char **dest = (unsigned char **)param_value;
       if (param_value_size < (program->num_devices * sizeof(char *))) {
-        UNLOCK_ERR_RET(CL_INVALID_VALUE, context,
-                       "Parameter return buffer is too small");
+        ERR_RET(CL_INVALID_VALUE, context,
+                "Parameter return buffer is too small");
       }
       for (unsigned i = 0; i < program->num_devices; ++i) {
         auto *dev_prog = program->dev_prog[i];
@@ -886,7 +889,7 @@ CL_API_ENTRY cl_int CL_API_CALL clGetProgramInfoIntelFPGA(
         }
       }
     }
-    UNLOCK_RETURN(CL_SUCCESS);
+    return CL_SUCCESS;
   }
 
   case CL_PROGRAM_NUM_KERNELS: {
@@ -908,10 +911,9 @@ CL_API_ENTRY cl_int CL_API_CALL clGetProgramInfoIntelFPGA(
       }
     }
     if (!exists_built_dev_prog)
-      UNLOCK_ERR_RET(
-          CL_INVALID_PROGRAM_EXECUTABLE, context,
-          "A successfully built program executable was not found for any "
-          "device in the list of devices associated with program");
+      ERR_RET(CL_INVALID_PROGRAM_EXECUTABLE, context,
+              "A successfully built program executable was not found for any "
+              "device in the list of devices associated with program");
 
     RESULT_SIZE_T(kernel_cnt);
     break;
@@ -952,10 +954,10 @@ CL_API_ENTRY cl_int CL_API_CALL clGetProgramInfoIntelFPGA(
     }
 
     if (!exists_built_dev_prog)
-      UNLOCK_ERR_RET(CL_INVALID_PROGRAM_EXECUTABLE, context,
-                     "A successfully built program executable was not "
-                     "found for any device in the list of devices "
-                     "associated with program");
+      ERR_RET(CL_INVALID_PROGRAM_EXECUTABLE, context,
+              "A successfully built program executable was not "
+              "found for any device in the list of devices "
+              "associated with program");
 
     // Based on the OpenCL 1.2 CTS api test, total_ret_len must include the
     // space for the null terminator.
@@ -966,8 +968,8 @@ CL_API_ENTRY cl_int CL_API_CALL clGetProgramInfoIntelFPGA(
 
     if (param_value) {
       if (total_ret_len > param_value_size) {
-        UNLOCK_ERR_RET(CL_INVALID_VALUE, context,
-                       "Parameter return buffer is too small");
+        ERR_RET(CL_INVALID_VALUE, context,
+                "Parameter return buffer is too small");
       }
 
       std::stringstream ss;
@@ -985,17 +987,17 @@ CL_API_ENTRY cl_int CL_API_CALL clGetProgramInfoIntelFPGA(
                   total_ret_len);
     }
   }
-    UNLOCK_RETURN(CL_SUCCESS);
+    return CL_SUCCESS;
 
   default:
-    UNLOCK_ERR_RET(CL_INVALID_VALUE, context, "Invalid program info query");
+    ERR_RET(CL_INVALID_VALUE, context, "Invalid program info query");
   }
   // zero size result is valid!
 
   if (param_value) {
     if (param_value_size < result.size) {
-      UNLOCK_ERR_RET(CL_INVALID_VALUE, context,
-                     "Parameter return buffer is too small");
+      ERR_RET(CL_INVALID_VALUE, context,
+              "Parameter return buffer is too small");
     }
     RESULT_COPY(param_value, param_value_size);
   }
@@ -1003,7 +1005,7 @@ CL_API_ENTRY cl_int CL_API_CALL clGetProgramInfoIntelFPGA(
   if (param_value_size_ret) {
     *param_value_size_ret = result.size;
   }
-  UNLOCK_RETURN(CL_SUCCESS);
+  return CL_SUCCESS;
 }
 
 ACL_EXPORT
@@ -1024,14 +1026,14 @@ CL_API_ENTRY cl_int CL_API_CALL clGetProgramBuildInfoIntelFPGA(
   cl_context context;
   acl_device_program_info_t *dev_prog;
   acl_result_t result;
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
 
   if (!acl_program_is_valid(program)) {
-    UNLOCK_RETURN(CL_INVALID_PROGRAM);
+    return CL_INVALID_PROGRAM;
   }
   context = program->context;
-  UNLOCK_VALIDATE_ARRAY_OUT_ARGS(param_value_size, param_value,
-                                 param_value_size_ret, context);
+  VALIDATE_ARRAY_OUT_ARGS(param_value_size, param_value, param_value_size_ret,
+                          context);
 
   RESULT_INIT;
 
@@ -1041,8 +1043,8 @@ CL_API_ENTRY cl_int CL_API_CALL clGetProgramBuildInfoIntelFPGA(
     }
   }
   if (dev_idx >= program->num_devices) {
-    UNLOCK_ERR_RET(CL_INVALID_DEVICE, context,
-                   "The specified device is not associated with the program");
+    ERR_RET(CL_INVALID_DEVICE, context,
+            "The specified device is not associated with the program");
   }
   dev_prog = program->dev_prog[dev_idx];
 
@@ -1068,18 +1070,17 @@ CL_API_ENTRY cl_int CL_API_CALL clGetProgramBuildInfoIntelFPGA(
   } break;
 
   default:
-    UNLOCK_ERR_RET(CL_INVALID_VALUE, context,
-                   "Invalid program build info query");
+    ERR_RET(CL_INVALID_VALUE, context, "Invalid program build info query");
   }
 
   if (result.size == 0) {
-    UNLOCK_RETURN(CL_INVALID_VALUE);
+    return CL_INVALID_VALUE;
   } // should already have signalled
 
   if (param_value) {
     if (param_value_size < result.size) {
-      UNLOCK_ERR_RET(CL_INVALID_VALUE, context,
-                     "Parameter return buffer is too small");
+      ERR_RET(CL_INVALID_VALUE, context,
+              "Parameter return buffer is too small");
     }
     RESULT_COPY(param_value, param_value_size);
   }
@@ -1087,7 +1088,7 @@ CL_API_ENTRY cl_int CL_API_CALL clGetProgramBuildInfoIntelFPGA(
   if (param_value_size_ret) {
     *param_value_size_ret = result.size;
   }
-  UNLOCK_RETURN(CL_SUCCESS);
+  return CL_SUCCESS;
 }
 
 ACL_EXPORT
@@ -1106,32 +1107,32 @@ CL_API_ENTRY cl_int CL_API_CALL clBuildProgramIntelFPGA(
     void *user_data) {
   cl_context context;
   cl_int status = CL_SUCCESS;
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
 
   if (!acl_program_is_valid(program)) {
-    UNLOCK_RETURN(CL_INVALID_PROGRAM);
+    return CL_INVALID_PROGRAM;
   }
   context = program->context;
   acl_print_debug_msg("Building program...\n");
 
   if (program->num_kernels > 0) {
-    UNLOCK_ERR_RET(CL_INVALID_OPERATION, context,
-                   "At least one kernel is still attached to the program");
+    ERR_RET(CL_INVALID_OPERATION, context,
+            "At least one kernel is still attached to the program");
   }
   if (device_list && num_devices == 0) {
-    UNLOCK_ERR_RET(CL_INVALID_VALUE, context,
-                   "Invalid device list: num_devices is zero but device_list "
-                   "is specified");
+    ERR_RET(CL_INVALID_VALUE, context,
+            "Invalid device list: num_devices is zero but device_list "
+            "is specified");
   }
   if (0 == device_list && num_devices > 0) {
-    UNLOCK_ERR_RET(
+    ERR_RET(
         CL_INVALID_VALUE, context,
         "Invalid device list: num_devices is non-zero but device_list is NULL");
   }
 
   if (pfn_notify == 0 && user_data != 0) {
-    UNLOCK_ERR_RET(CL_INVALID_VALUE, context,
-                   "user_data is set but pfn_notify is not");
+    ERR_RET(CL_INVALID_VALUE, context,
+            "user_data is set but pfn_notify is not");
   }
 
   if (device_list) {
@@ -1148,8 +1149,8 @@ CL_API_ENTRY cl_int CL_API_CALL clBuildProgramIntelFPGA(
         saw_it = (program->device[iprogdev] == device_list[idev]);
       }
       if (!saw_it) {
-        UNLOCK_ERR_RET(CL_INVALID_DEVICE, context,
-                       "A specified device is not associated with the program");
+        ERR_RET(CL_INVALID_DEVICE, context,
+                "A specified device is not associated with the program");
       }
     }
     // Ok, each device is associated with the program.
@@ -1186,7 +1187,6 @@ CL_API_ENTRY cl_int CL_API_CALL clBuildProgramIntelFPGA(
   if (status == CL_SUCCESS)
     l_try_to_eagerly_program_device(program);
 
-  acl_unlock();
   // Call the notification callback.
   if (pfn_notify)
     pfn_notify(program, user_data);
diff --git a/src/acl_sampler.cpp b/src/acl_sampler.cpp
index 5d9d4c74..07a41c11 100644
--- a/src/acl_sampler.cpp
+++ b/src/acl_sampler.cpp
@@ -40,7 +40,7 @@ CL_API_ENTRY cl_sampler clCreateSamplerWithPropertiesIntelFPGA(
   cl_sampler sampler;
   int next_free_sampler_head;
 
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
 
   sampler_id = acl_platform.free_sampler_head;
   sampler = &(acl_platform.sampler[sampler_id]);
@@ -53,7 +53,7 @@ CL_API_ENTRY cl_sampler clCreateSamplerWithPropertiesIntelFPGA(
   sampler->filter_mode = 0xFFFFFFFF;
 
   if (!acl_context_is_valid(context)) {
-    UNLOCK_BAIL(CL_INVALID_CONTEXT);
+    BAIL(CL_INVALID_CONTEXT);
   }
 
   sampler->context = context;
@@ -73,8 +73,8 @@ CL_API_ENTRY cl_sampler clCreateSamplerWithPropertiesIntelFPGA(
     }
   }
   if (!some_device_supports_images) {
-    UNLOCK_BAIL_INFO(CL_INVALID_OPERATION, context,
-                     "No devices in context support images");
+    BAIL_INFO(CL_INVALID_OPERATION, context,
+              "No devices in context support images");
   }
 
   iprop = 0;
@@ -82,21 +82,20 @@ CL_API_ENTRY cl_sampler clCreateSamplerWithPropertiesIntelFPGA(
     if (sampler_properties[iprop] == CL_SAMPLER_NORMALIZED_COORDS) {
       ++iprop;
       if (sampler->normalized_coords != 0xFFFFFFFF) {
-        UNLOCK_BAIL_INFO(
+        BAIL_INFO(
             CL_INVALID_VALUE, context,
             "Normalized coords property specified more than once for sampler");
       }
       if (sampler_properties[iprop] != CL_FALSE &&
           sampler_properties[iprop] != CL_TRUE) {
-        UNLOCK_BAIL_INFO(
-            CL_INVALID_VALUE, context,
-            "Invalid value for normalized coords property of sampler");
+        BAIL_INFO(CL_INVALID_VALUE, context,
+                  "Invalid value for normalized coords property of sampler");
       }
       sampler->normalized_coords = sampler_properties[iprop];
     } else if (sampler_properties[iprop] == CL_SAMPLER_ADDRESSING_MODE) {
       ++iprop;
       if (sampler->addressing_mode != 0xFFFFFFFF) {
-        UNLOCK_BAIL_INFO(
+        BAIL_INFO(
             CL_INVALID_VALUE, context,
             "Addressing mode property specified more than once for sampler");
       }
@@ -105,29 +104,27 @@ CL_API_ENTRY cl_sampler clCreateSamplerWithPropertiesIntelFPGA(
           sampler_properties[iprop] != CL_ADDRESS_CLAMP_TO_EDGE &&
           sampler_properties[iprop] != CL_ADDRESS_CLAMP &&
           sampler_properties[iprop] != CL_ADDRESS_NONE) {
-        UNLOCK_BAIL_INFO(
-            CL_INVALID_VALUE, context,
-            "Invalid value for addressing mode property of sampler");
+        BAIL_INFO(CL_INVALID_VALUE, context,
+                  "Invalid value for addressing mode property of sampler");
       }
       sampler->addressing_mode = sampler_properties[iprop];
     } else if (sampler_properties[iprop] == CL_SAMPLER_FILTER_MODE) {
       ++iprop;
       if (sampler->filter_mode != 0xFFFFFFFF) {
-        UNLOCK_BAIL_INFO(
-            CL_INVALID_VALUE, context,
-            "Filter mode property specified more than once for sampler");
+        BAIL_INFO(CL_INVALID_VALUE, context,
+                  "Filter mode property specified more than once for sampler");
       }
       if (sampler_properties[iprop] != CL_FILTER_NEAREST &&
           sampler_properties[iprop] != CL_FILTER_LINEAR) {
-        UNLOCK_BAIL_INFO(CL_INVALID_VALUE, context,
-                         "Invalid value for filter mode property of sampler");
+        BAIL_INFO(CL_INVALID_VALUE, context,
+                  "Invalid value for filter mode property of sampler");
       }
       sampler->filter_mode = sampler_properties[iprop];
     } else {
       std::stringstream msg;
       msg << "Invalid sampler property name " << sampler_properties[iprop]
           << "\n";
-      UNLOCK_BAIL_INFO(CL_INVALID_VALUE, context, msg.str().c_str());
+      BAIL_INFO(CL_INVALID_VALUE, context, msg.str().c_str());
     }
     ++iprop;
   }
@@ -155,7 +152,7 @@ CL_API_ENTRY cl_sampler clCreateSamplerWithPropertiesIntelFPGA(
 
   acl_track_object(ACL_OBJ_MEM_OBJECT, result);
 
-  UNLOCK_RETURN(result);
+  return result;
 }
 
 ACL_EXPORT
@@ -196,9 +193,9 @@ clCreateSampler(cl_context context, cl_bool normalized_coords,
 
 ACL_EXPORT
 CL_API_ENTRY cl_int CL_API_CALL clRetainSamplerIntelFPGA(cl_sampler sampler) {
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
   if (!acl_sampler_is_valid(sampler)) {
-    UNLOCK_RETURN(CL_INVALID_SAMPLER);
+    return CL_INVALID_SAMPLER;
   }
   acl_retain(sampler);
 
@@ -207,7 +204,6 @@ CL_API_ENTRY cl_int CL_API_CALL clRetainSamplerIntelFPGA(cl_sampler sampler) {
                       acl_ref_count(sampler));
 #endif
 
-  acl_unlock();
   return CL_SUCCESS;
 }
 
@@ -218,11 +214,11 @@ CL_API_ENTRY cl_int CL_API_CALL clRetainSampler(cl_sampler sampler) {
 
 ACL_EXPORT
 CL_API_ENTRY cl_int CL_API_CALL clReleaseSamplerIntelFPGA(cl_sampler sampler) {
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
   // In the double-free case, we'll error out here because the reference count
   // will be 0.
   if (!acl_sampler_is_valid(sampler)) {
-    UNLOCK_RETURN(CL_INVALID_SAMPLER);
+    return CL_INVALID_SAMPLER;
   }
 
   acl_release(sampler);
@@ -250,7 +246,7 @@ CL_API_ENTRY cl_int CL_API_CALL clReleaseSamplerIntelFPGA(cl_sampler sampler) {
     clReleaseContext(context);
   }
 
-  UNLOCK_RETURN(CL_SUCCESS);
+  return CL_SUCCESS;
 }
 
 ACL_EXPORT
@@ -266,9 +262,9 @@ CL_API_ENTRY cl_int CL_API_CALL clGetSamplerInfoIntelFPGA(
   cl_context context;
   RESULT_INIT;
 
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
   if (!acl_sampler_is_valid(sampler)) {
-    UNLOCK_RETURN(CL_INVALID_SAMPLER);
+    return CL_INVALID_SAMPLER;
   }
 
   context = sampler->context;
@@ -295,8 +291,8 @@ CL_API_ENTRY cl_int CL_API_CALL clGetSamplerInfoIntelFPGA(
   }
 
   if (result.size == 0)
-    UNLOCK_ERR_RET(CL_INVALID_VALUE, context,
-                   "Invalid or unsupported sampler object query");
+    ERR_RET(CL_INVALID_VALUE, context,
+            "Invalid or unsupported sampler object query");
 
   if (param_value) {
     if (param_value_size < result.size)
@@ -308,7 +304,7 @@ CL_API_ENTRY cl_int CL_API_CALL clGetSamplerInfoIntelFPGA(
   if (param_value_size_ret) {
     *param_value_size_ret = result.size;
   }
-  UNLOCK_RETURN(CL_SUCCESS);
+  return CL_SUCCESS;
 }
 
 ACL_EXPORT
diff --git a/src/acl_svm.cpp b/src/acl_svm.cpp
index acb36459..79aac8a1 100644
--- a/src/acl_svm.cpp
+++ b/src/acl_svm.cpp
@@ -42,18 +42,18 @@ CL_API_ENTRY void *CL_API_CALL clSVMAllocIntelFPGA(cl_context context,
   // this context supports SVM
   cl_bool context_has_svm;
 #endif
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
 
   // Valid context
 #ifndef REMOVE_VALID_CHECKS
 
   if (!acl_context_is_valid(context))
-    UNLOCK_RETURN(NULL);
+    return NULL;
 
   // Check for invalid enum bits
   if (flags & ~(CL_MEM_READ_WRITE | CL_MEM_READ_ONLY | CL_MEM_WRITE_ONLY |
                 CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS)) {
-    UNLOCK_RETURN(NULL);
+    return NULL;
   }
 
   // Check for exactly one read/write spec
@@ -65,32 +65,32 @@ CL_API_ENTRY void *CL_API_CALL clSVMAllocIntelFPGA(cl_context context,
     num_rw_specs++;
   // Default to CL_MEM_READ_WRITE.
   if (num_rw_specs > 1)
-    UNLOCK_RETURN(NULL);
+    return NULL;
   if (num_rw_specs == 0)
     flags |= CL_MEM_READ_WRITE;
 
   // Cannot specify SVM atomics without fine grain
   if ((flags & CL_MEM_SVM_ATOMICS) && !(flags & CL_MEM_SVM_FINE_GRAIN_BUFFER)) {
-    UNLOCK_RETURN(NULL);
+    return NULL;
   }
 
   // If SVM atomics specified, check if any device in context supports SVM
   // atomics Right now though, we don't support SVM atomics so just return NULL
   if (flags & CL_MEM_SVM_ATOMICS) {
-    UNLOCK_RETURN(NULL);
+    return NULL;
   }
 
   // If fine grain specified, check if any device in context supports fine grain
   // Right now though, we don't support SVM fine grain so just return NULL
   if (flags & CL_MEM_SVM_FINE_GRAIN_BUFFER) {
-    UNLOCK_RETURN(NULL);
+    return NULL;
   }
 
   // size is 0 or > CL_DEVICE_MAX_MEM_ALLOC_SIZE value for any device in context
   if (size == 0)
-    UNLOCK_RETURN(NULL);
+    return NULL;
   if (size > context->max_mem_alloc_size) {
-    UNLOCK_RETURN(NULL);
+    return NULL;
   }
 
   // alignment is not a power of two or the OpenCL implementation cannot support
@@ -101,7 +101,7 @@ CL_API_ENTRY void *CL_API_CALL clSVMAllocIntelFPGA(cl_context context,
   if (alignment == 0)
     alignment = ACL_MEM_ALIGN;
   if (alignment != ACL_MEM_ALIGN)
-    UNLOCK_RETURN(NULL);
+    return NULL;
 
 #endif // !REMOVE_VALID_CHECKS
 
@@ -111,7 +111,7 @@ CL_API_ENTRY void *CL_API_CALL clSVMAllocIntelFPGA(cl_context context,
 #else // LINUX
   mem_result = posix_memalign(&result, alignment, size);
   if (mem_result != 0) {
-    UNLOCK_RETURN(NULL);
+    return NULL;
   }
 #endif
 #else
@@ -135,7 +135,7 @@ CL_API_ENTRY void *CL_API_CALL clSVMAllocIntelFPGA(cl_context context,
 #else // LINUX
     mem_result = posix_memalign(&result, alignment, size);
     if (mem_result != 0) {
-      UNLOCK_RETURN(NULL);
+      return NULL;
     }
 #endif
   } else {
@@ -161,7 +161,7 @@ CL_API_ENTRY void *CL_API_CALL clSVMAllocIntelFPGA(cl_context context,
   context->svm_list->ptr = result;
   context->svm_list->size = size;
 
-  UNLOCK_RETURN(result);
+  return result;
 }
 
 ACL_EXPORT
@@ -177,7 +177,7 @@ CL_API_ENTRY void clSVMFreeIntelFPGA(cl_context context, void *svm_pointer) {
   acl_svm_entry_t *next_entry;
   unsigned int idevice;
   cl_bool context_has_svm;
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
   context_has_svm = CL_FALSE;
   if (acl_get_hal()) {
     for (idevice = 0; idevice < context->num_devices; ++idevice) {
@@ -189,15 +189,15 @@ CL_API_ENTRY void clSVMFreeIntelFPGA(cl_context context, void *svm_pointer) {
   }
 #ifndef REMOVE_VALID_CHECKS
   if (!acl_context_is_valid(context))
-    UNLOCK_RETURN_VOID;
+    return;
 
   if (svm_pointer == NULL)
-    UNLOCK_RETURN_VOID;
+    return;
 
 #endif // !REMOVE_VALID_CHECKS
   // Only free the SVM pointer if it is from this context
   if (context->svm_list == NULL)
-    UNLOCK_RETURN_VOID;
+    return;
 
   last_entry = NULL;
   next_entry = context->svm_list;
@@ -233,8 +233,6 @@ CL_API_ENTRY void clSVMFreeIntelFPGA(cl_context context, void *svm_pointer) {
     last_entry = next_entry;
     next_entry = next_entry->next;
   }
-
-  acl_unlock();
 }
 
 ACL_EXPORT
@@ -249,31 +247,31 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueSVMMemcpyIntelFPGA(
     const cl_event *event_wait_list, cl_event *event) {
   cl_event local_event = 0; // used for blocking
   cl_int status;
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
 
   if (!acl_command_queue_is_valid(command_queue)) {
-    UNLOCK_RETURN(CL_INVALID_COMMAND_QUEUE);
+    return CL_INVALID_COMMAND_QUEUE;
   }
 
   if (src_ptr == 0) {
-    UNLOCK_ERR_RET(CL_INVALID_VALUE, command_queue->context,
-                   "Pointer argument cannot be NULL");
+    ERR_RET(CL_INVALID_VALUE, command_queue->context,
+            "Pointer argument cannot be NULL");
   }
   if (dst_ptr == 0) {
-    UNLOCK_ERR_RET(CL_INVALID_VALUE, command_queue->context,
-                   "Pointer argument cannot be NULL");
+    ERR_RET(CL_INVALID_VALUE, command_queue->context,
+            "Pointer argument cannot be NULL");
   }
   if (size == 0) {
-    UNLOCK_ERR_RET(CL_INVALID_VALUE, command_queue->context,
-                   "Pointer size cannot be 0");
+    ERR_RET(CL_INVALID_VALUE, command_queue->context,
+            "Pointer size cannot be 0");
   }
 
   if (((char *)src_ptr < (char *)dst_ptr &&
        (char *)src_ptr + size > (char *)dst_ptr) ||
       ((char *)dst_ptr < (char *)src_ptr &&
        (char *)dst_ptr + size > (char *)src_ptr)) {
-    UNLOCK_ERR_RET(CL_MEM_COPY_OVERLAP, command_queue->context,
-                   "Source and destination memory overlaps");
+    ERR_RET(CL_MEM_COPY_OVERLAP, command_queue->context,
+            "Source and destination memory overlaps");
   }
 
   // Create an event/command to actually move the data at the appropriate
@@ -282,7 +280,7 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueSVMMemcpyIntelFPGA(
       acl_create_event(command_queue, num_events_in_wait_list, event_wait_list,
                        CL_COMMAND_SVM_MEMCPY, &local_event);
   if (status != CL_SUCCESS)
-    UNLOCK_RETURN(status); // already signalled callback
+    return status; // already signalled callback
   local_event->cmd.info.svm_xfer.src_ptr = src_ptr;
   local_event->cmd.info.svm_xfer.dst_ptr = dst_ptr;
   local_event->cmd.info.svm_xfer.src_size = size;
@@ -303,7 +301,7 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueSVMMemcpyIntelFPGA(
     clReleaseEvent(local_event);
     acl_idle_update(command_queue->context); // Clean up early
   }
-  UNLOCK_RETURN(CL_SUCCESS);
+  return CL_SUCCESS;
 }
 
 ACL_EXPORT
@@ -322,42 +320,42 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueSVMMemFillIntelFPGA(
     const cl_event *event_wait_list, cl_event *event) {
   cl_event local_event = 0; // used for blocking
   cl_int status;
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
 
   if (!acl_command_queue_is_valid(command_queue)) {
-    UNLOCK_RETURN(CL_INVALID_COMMAND_QUEUE);
+    return CL_INVALID_COMMAND_QUEUE;
   }
 
   if (svm_ptr == 0) {
-    UNLOCK_ERR_RET(CL_INVALID_VALUE, command_queue->context,
-                   "Pointer argument cannot be NULL");
+    ERR_RET(CL_INVALID_VALUE, command_queue->context,
+            "Pointer argument cannot be NULL");
   }
   if (((uintptr_t)svm_ptr) % (pattern_size * 8) != 0) {
-    UNLOCK_ERR_RET(CL_INVALID_VALUE, command_queue->context,
-                   "Pointer not aligned with pattern size");
+    ERR_RET(CL_INVALID_VALUE, command_queue->context,
+            "Pointer not aligned with pattern size");
   }
   if (pattern == 0) {
-    UNLOCK_ERR_RET(CL_INVALID_VALUE, command_queue->context,
-                   "Pattern argument cannot be NULL");
+    ERR_RET(CL_INVALID_VALUE, command_queue->context,
+            "Pattern argument cannot be NULL");
   }
   if (pattern_size == 0) {
-    UNLOCK_ERR_RET(CL_INVALID_VALUE, command_queue->context,
-                   "Pattern size argument cannot be 0");
+    ERR_RET(CL_INVALID_VALUE, command_queue->context,
+            "Pattern size argument cannot be 0");
   }
   if (pattern_size != 1 && pattern_size != 2 && pattern_size != 4 &&
       pattern_size != 8 && pattern_size != 16 && pattern_size != 32 &&
       pattern_size != 64 && pattern_size != 128) {
-    UNLOCK_ERR_RET(
+    ERR_RET(
         CL_INVALID_VALUE, command_queue->context,
         "Pattern size argument must be one of {1, 2, 4, 8, 16, 32, 64, 128}");
   }
   if (size == 0) {
-    UNLOCK_ERR_RET(CL_INVALID_VALUE, command_queue->context,
-                   "Pointer size cannot be 0");
+    ERR_RET(CL_INVALID_VALUE, command_queue->context,
+            "Pointer size cannot be 0");
   }
   if (size % pattern_size != 0) {
-    UNLOCK_ERR_RET(CL_INVALID_VALUE, command_queue->context,
-                   "Pointer size must be multiple of pattern size");
+    ERR_RET(CL_INVALID_VALUE, command_queue->context,
+            "Pointer size must be multiple of pattern size");
   }
 
   // Create an event/command to actually move the data at the appropriate
@@ -366,7 +364,7 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueSVMMemFillIntelFPGA(
       acl_create_event(command_queue, num_events_in_wait_list, event_wait_list,
                        CL_COMMAND_SVM_MEMFILL, &local_event);
   if (status != CL_SUCCESS)
-    UNLOCK_RETURN(status); // already signalled callback
+    return status; // already signalled callback
   local_event->cmd.info.svm_xfer.src_ptr = pattern;
   local_event->cmd.info.svm_xfer.dst_ptr = svm_ptr;
   local_event->cmd.info.svm_xfer.src_size = pattern_size;
@@ -383,7 +381,7 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueSVMMemFillIntelFPGA(
     clReleaseEvent(local_event);
     acl_idle_update(command_queue->context); // Clean up early
   }
-  UNLOCK_RETURN(CL_SUCCESS);
+  return CL_SUCCESS;
 }
 
 CL_API_ENTRY cl_int CL_API_CALL clEnqueueSVMMemFill(
@@ -401,22 +399,22 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueSVMMapIntelFPGA(
     const cl_event *event_wait_list, cl_event *event) {
   cl_event local_event = 0; // used for blocking
   cl_int status;
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
 
   if (!acl_command_queue_is_valid(command_queue)) {
-    UNLOCK_RETURN(CL_INVALID_COMMAND_QUEUE);
+    return CL_INVALID_COMMAND_QUEUE;
   }
   if (svm_ptr == NULL) {
-    UNLOCK_ERR_RET(CL_INVALID_VALUE, command_queue->context,
-                   "Pointer argument cannot be NULL");
+    ERR_RET(CL_INVALID_VALUE, command_queue->context,
+            "Pointer argument cannot be NULL");
   }
   if (size == 0) {
-    UNLOCK_ERR_RET(CL_INVALID_VALUE, command_queue->context,
-                   "Pointer size cannot be 0");
+    ERR_RET(CL_INVALID_VALUE, command_queue->context,
+            "Pointer size cannot be 0");
   }
   if (flags & ~(CL_MAP_READ | CL_MAP_WRITE)) {
-    UNLOCK_ERR_RET(CL_INVALID_VALUE, command_queue->context,
-                   "Invalid or unsupported flags");
+    ERR_RET(CL_INVALID_VALUE, command_queue->context,
+            "Invalid or unsupported flags");
   }
 
   // Create an event/command to actually move the data at the appropriate
@@ -424,7 +422,7 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueSVMMapIntelFPGA(
   status = acl_create_event(command_queue, num_events_in_wait_list,
                             event_wait_list, CL_COMMAND_SVM_MAP, &local_event);
   if (status != CL_SUCCESS)
-    UNLOCK_RETURN(status); // already signalled callback
+    return status; // already signalled callback
   // We don't use this right now, but if we ever have to sync up caches we will
   // need this.
   local_event->cmd.info.svm_xfer.dst_ptr = svm_ptr;
@@ -445,7 +443,7 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueSVMMapIntelFPGA(
     clReleaseEvent(local_event);
     acl_idle_update(command_queue->context); // Clean up early
   }
-  UNLOCK_RETURN(CL_SUCCESS);
+  return CL_SUCCESS;
 }
 
 CL_API_ENTRY cl_int CL_API_CALL clEnqueueSVMMap(
@@ -463,14 +461,14 @@ clEnqueueSVMUnmapIntelFPGA(cl_command_queue command_queue, void *svm_ptr,
                            const cl_event *event_wait_list, cl_event *event) {
   cl_event local_event = 0; // used for blocking
   cl_int status;
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
 
   if (!acl_command_queue_is_valid(command_queue)) {
-    UNLOCK_RETURN(CL_INVALID_COMMAND_QUEUE);
+    return CL_INVALID_COMMAND_QUEUE;
   }
   if (svm_ptr == NULL) {
-    UNLOCK_ERR_RET(CL_INVALID_VALUE, command_queue->context,
-                   "Pointer argument cannot be NULL");
+    ERR_RET(CL_INVALID_VALUE, command_queue->context,
+            "Pointer argument cannot be NULL");
   }
 
   // Create an event/command to actually move the data at the appropriate
@@ -479,7 +477,7 @@ clEnqueueSVMUnmapIntelFPGA(cl_command_queue command_queue, void *svm_ptr,
       acl_create_event(command_queue, num_events_in_wait_list, event_wait_list,
                        CL_COMMAND_SVM_UNMAP, &local_event);
   if (status != CL_SUCCESS)
-    UNLOCK_RETURN(status); // already signalled callback
+    return status; // already signalled callback
   // We don't use this right now, but if we ever have to sync up caches we will
   // need this.
   local_event->cmd.info.svm_xfer.dst_ptr = svm_ptr;
@@ -495,7 +493,7 @@ clEnqueueSVMUnmapIntelFPGA(cl_command_queue command_queue, void *svm_ptr,
     clReleaseEvent(local_event);
     acl_idle_update(command_queue->context); // Clean up early
   }
-  UNLOCK_RETURN(CL_SUCCESS);
+  return CL_SUCCESS;
 }
 
 CL_API_ENTRY cl_int CL_API_CALL
@@ -517,18 +515,18 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueSVMFreeIntelFPGA(
     const cl_event *event_wait_list, cl_event *event) {
   cl_event local_event = 0; // used for blocking
   cl_int status;
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
 
   if (!acl_command_queue_is_valid(command_queue)) {
-    UNLOCK_RETURN(CL_INVALID_COMMAND_QUEUE);
+    return CL_INVALID_COMMAND_QUEUE;
   }
   if (svm_pointers == NULL) {
-    UNLOCK_ERR_RET(CL_INVALID_VALUE, command_queue->context,
-                   "List of SVM pointers argument cannot be NULL");
+    ERR_RET(CL_INVALID_VALUE, command_queue->context,
+            "List of SVM pointers argument cannot be NULL");
   }
   if (num_svm_pointers == 0) {
-    UNLOCK_ERR_RET(CL_INVALID_VALUE, command_queue->context,
-                   "Number of SVM pointers cannot be 0");
+    ERR_RET(CL_INVALID_VALUE, command_queue->context,
+            "Number of SVM pointers cannot be 0");
   }
 
   // Create an event/command to actually move the data at the appropriate
@@ -536,7 +534,7 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueSVMFreeIntelFPGA(
   status = acl_create_event(command_queue, num_events_in_wait_list,
                             event_wait_list, CL_COMMAND_SVM_FREE, &local_event);
   if (status != CL_SUCCESS)
-    UNLOCK_RETURN(status); // already signalled callback
+    return status; // already signalled callback
   // We don't use this right now, but if we ever have to sync up caches we will
   // need this.
   local_event->cmd.info.svm_free.pfn_free_func = pfn_free_func;
@@ -555,7 +553,7 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueSVMFreeIntelFPGA(
     clReleaseEvent(local_event);
     acl_idle_update(command_queue->context); // Clean up early
   }
-  UNLOCK_RETURN(CL_SUCCESS);
+  return CL_SUCCESS;
 }
 
 CL_API_ENTRY cl_int CL_API_CALL clEnqueueSVMFree(
diff --git a/src/acl_thread.cpp b/src/acl_thread.cpp
index d7b2e70c..b0511505 100644
--- a/src/acl_thread.cpp
+++ b/src/acl_thread.cpp
@@ -1,9 +1,6 @@
 // Copyright (C) 2015-2021 Intel Corporation
 // SPDX-License-Identifier: BSD-3-Clause
 
-// System headers.
-#include <thread>
-
 // External library headers.
 #include <acl_threadsupport/acl_threadsupport.h>
 
@@ -16,13 +13,14 @@
 ACL_TLS int acl_global_lock_count = 0;
 ACL_TLS int acl_inside_sig_flag = 0;
 ACL_TLS int acl_inside_sig_old_lock_count = 0;
+acl_mutex_wrapper_t acl_mutex_wrapper;
 
 static struct acl_condvar_s l_acl_global_condvar;
 
 // l_init_once() is defined in an OS-specific section below
 static void l_init_once();
 
-void acl_lock() {
+void acl_mutex_wrapper_t::lock() {
   l_init_once();
   if (acl_global_lock_count == 0) {
     acl_acquire_condvar(&l_acl_global_condvar);
@@ -30,7 +28,7 @@ void acl_lock() {
   acl_global_lock_count++;
 }
 
-void acl_unlock() {
+void acl_mutex_wrapper_t::unlock() {
   acl_assert_locked();
   acl_global_lock_count--;
   if (acl_global_lock_count == 0) {
@@ -40,7 +38,7 @@ void acl_unlock() {
 
 int acl_is_locked_callback(void) { return (acl_global_lock_count > 0); }
 
-int acl_suspend_lock() {
+int acl_mutex_wrapper_t::suspend_lock() {
   int old_lock_count = acl_global_lock_count;
   acl_global_lock_count = 0;
   if (old_lock_count > 0)
@@ -48,7 +46,7 @@ int acl_suspend_lock() {
   return old_lock_count;
 }
 
-void acl_resume_lock(int lock_count) {
+void acl_mutex_wrapper_t::resume_lock(int lock_count) {
   acl_assert_unlocked();
   if (lock_count > 0)
     acl_acquire_condvar(&l_acl_global_condvar);
@@ -100,6 +98,7 @@ static void l_init_once() {
 
 __attribute__((constructor)) static void l_global_lock_init() {
   acl_init_condvar(&l_acl_global_condvar);
+  acl_mutex_wrapper = acl_mutex_wrapper_t();
 }
 
 __attribute__((destructor)) static void l_global_lock_uninit() {
@@ -124,6 +123,7 @@ static BOOL CALLBACK l_init_once_callback(PINIT_ONCE InitOnce, PVOID Parameter,
   (void)(Context);
 
   acl_init_condvar(&l_acl_global_condvar);
+  acl_mutex_wrapper = acl_mutex_wrapper_t();
   return TRUE;
 }
 
@@ -140,8 +140,7 @@ static void l_init_once() {
 // e.g. polling BSPs (using yield) to prevent one thread from hogging the mutex
 // while waiting for something like clFinish.
 void acl_yield_lock_and_thread() {
-  int lock_count;
-  lock_count = acl_suspend_lock();
+  acl_suspend_lock_guard lock(acl_mutex_wrapper);
 #ifdef __arm__
   // arm-linux-gnueabihf-g++ version used is 4.7.1.
   // std::this_thread::yield can be enabled for it by defining
@@ -152,5 +151,4 @@ void acl_yield_lock_and_thread() {
 #else
   std::this_thread::yield();
 #endif
-  acl_resume_lock(lock_count);
 }
diff --git a/src/acl_usm.cpp b/src/acl_usm.cpp
index 695117b1..f2fcbc82 100644
--- a/src/acl_usm.cpp
+++ b/src/acl_usm.cpp
@@ -50,32 +50,32 @@ ACL_EXPORT
 CL_API_ENTRY void *CL_API_CALL clHostMemAllocINTEL(
     cl_context context, const cl_mem_properties_intel *properties, size_t size,
     cl_uint alignment, cl_int *errcode_ret) {
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
   if (errcode_ret) {
     *errcode_ret = CL_SUCCESS;
   }
 
   if (!acl_context_is_valid(context)) {
-    UNLOCK_BAIL(CL_INVALID_CONTEXT);
+    BAIL(CL_INVALID_CONTEXT);
   }
 
   if (size == 0) {
-    UNLOCK_BAIL_INFO(CL_INVALID_BUFFER_SIZE, context,
-                     "Memory buffer cannot be of size zero");
+    BAIL_INFO(CL_INVALID_BUFFER_SIZE, context,
+              "Memory buffer cannot be of size zero");
   }
 
   // Spec only allows for power of 2 allignment.
   // Alignment of '0' means use the default
   if (alignment & (alignment - 1)) {
-    UNLOCK_BAIL_INFO(CL_INVALID_VALUE, context, "alignment must be power of 2");
+    BAIL_INFO(CL_INVALID_VALUE, context, "alignment must be power of 2");
   }
 
   // Spec specifies that alignment is no bigger than the largest supported data
   // type
   if (alignment > sizeof(cl_long16)) {
-    UNLOCK_BAIL_INFO(CL_INVALID_VALUE, context,
-                     "Requested alignment greater than largest data type "
-                     "supported by device (long16)");
+    BAIL_INFO(CL_INVALID_VALUE, context,
+              "Requested alignment greater than largest data type "
+              "supported by device (long16)");
   }
 
   std::vector<cl_device_id> devices = std::vector<cl_device_id>(
@@ -113,7 +113,7 @@ CL_API_ENTRY void *CL_API_CALL clHostMemAllocINTEL(
       mem_id = (cl_uint) * (properties + 1);
     } break;
     default: {
-      UNLOCK_BAIL_INFO(CL_INVALID_PROPERTY, context, "Invalid properties");
+      BAIL_INFO(CL_INVALID_PROPERTY, context, "Invalid properties");
     }
     }
     properties += 2;
@@ -122,7 +122,7 @@ CL_API_ENTRY void *CL_API_CALL clHostMemAllocINTEL(
   for (const auto dev : devices) {
     if (!acl_usm_has_access_capability(dev,
                                        CL_DEVICE_HOST_MEM_CAPABILITIES_INTEL)) {
-      UNLOCK_BAIL_INFO(
+      BAIL_INFO(
           CL_INVALID_OPERATION, context,
           "Device does not support host Unified Shared Memory allocations: " +
               dev->def.autodiscovery_def.name);
@@ -132,16 +132,14 @@ CL_API_ENTRY void *CL_API_CALL clHostMemAllocINTEL(
     cl_int ret = clGetDeviceInfo(dev, CL_DEVICE_MAX_MEM_ALLOC_SIZE,
                                  sizeof(max_alloc), &max_alloc, 0);
     if (ret) {
-      UNLOCK_BAIL_INFO(
-          ret, context,
-          "Failed to query CL_DEVICE_MAX_MEM_ALLOC_SIZE for device: " +
-              dev->def.autodiscovery_def.name);
+      BAIL_INFO(ret, context,
+                "Failed to query CL_DEVICE_MAX_MEM_ALLOC_SIZE for device: " +
+                    dev->def.autodiscovery_def.name);
     }
     if (size > max_alloc) {
-      UNLOCK_BAIL_INFO(
-          CL_INVALID_BUFFER_SIZE, context,
-          "Size larger than allocation size supported by device: " +
-              dev->def.autodiscovery_def.name);
+      BAIL_INFO(CL_INVALID_BUFFER_SIZE, context,
+                "Size larger than allocation size supported by device: " +
+                    dev->def.autodiscovery_def.name);
     }
   }
 
@@ -164,7 +162,7 @@ CL_API_ENTRY void *CL_API_CALL clHostMemAllocINTEL(
         (acl_usm_allocation_t *)acl_malloc(sizeof(acl_usm_allocation_t));
 
     if (!usm_alloc) {
-      UNLOCK_BAIL_INFO(CL_OUT_OF_HOST_MEMORY, context, "Out of host memory");
+      BAIL_INFO(CL_OUT_OF_HOST_MEMORY, context, "Out of host memory");
     }
 
     int error = 0;
@@ -175,21 +173,21 @@ CL_API_ENTRY void *CL_API_CALL clHostMemAllocINTEL(
       acl_free(usm_alloc);
       switch (error) {
       case CL_OUT_OF_HOST_MEMORY:
-        UNLOCK_BAIL_INFO(error, context,
-                         "Error: Unable to allocate " + std::to_string(size) +
-                             " bytes");
+        BAIL_INFO(error, context,
+                  "Error: Unable to allocate " + std::to_string(size) +
+                      " bytes");
         break;
       case CL_INVALID_VALUE:
-        UNLOCK_BAIL_INFO(error, context,
-                         "Error: Unsupported alignment of " +
-                             std::to_string(alignment));
+        BAIL_INFO(error, context,
+                  "Error: Unsupported alignment of " +
+                      std::to_string(alignment));
         break;
       case CL_INVALID_PROPERTY:
-        UNLOCK_BAIL_INFO(error, context, "Error: Unsuported properties");
+        BAIL_INFO(error, context, "Error: Unsuported properties");
         break;
       default:
-        UNLOCK_BAIL_INFO(CL_OUT_OF_HOST_MEMORY, context,
-                         "Error: Unable to allocate memory");
+        BAIL_INFO(CL_OUT_OF_HOST_MEMORY, context,
+                  "Error: Unable to allocate memory");
         break;
       }
     }
@@ -203,12 +201,11 @@ CL_API_ENTRY void *CL_API_CALL clHostMemAllocINTEL(
     usm_alloc->alignment = alignment;
 
     l_add_usm_alloc_to_context(context, usm_alloc);
-    UNLOCK_RETURN(mem);
+    return mem;
   }
 
-  UNLOCK_BAIL_INFO(
-      CL_INVALID_VALUE, context,
-      "Host allocation is not supported for devices in this context");
+  BAIL_INFO(CL_INVALID_VALUE, context,
+            "Host allocation is not supported for devices in this context");
 }
 
 ACL_EXPORT
@@ -216,22 +213,22 @@ CL_API_ENTRY void *CL_API_CALL
 clDeviceMemAllocINTEL(cl_context context, cl_device_id device,
                       const cl_mem_properties_intel *properties, size_t size,
                       cl_uint alignment, cl_int *errcode_ret) {
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
 
   // Valid argument check
   if (!acl_context_is_valid(context)) {
-    UNLOCK_BAIL(CL_INVALID_CONTEXT);
+    BAIL(CL_INVALID_CONTEXT);
   }
   if (!acl_device_is_valid(device)) {
-    UNLOCK_BAIL_INFO(CL_INVALID_DEVICE, context, "Invalid device");
+    BAIL_INFO(CL_INVALID_DEVICE, context, "Invalid device");
   }
   if (!acl_context_uses_device(context, device)) {
-    UNLOCK_BAIL_INFO(CL_INVALID_DEVICE, context,
-                     "Device is not associated with the context");
+    BAIL_INFO(CL_INVALID_DEVICE, context,
+              "Device is not associated with the context");
   }
   if (size == 0) {
-    UNLOCK_BAIL_INFO(CL_INVALID_BUFFER_SIZE, context,
-                     "Memory buffer cannot be of size zero");
+    BAIL_INFO(CL_INVALID_BUFFER_SIZE, context,
+              "Memory buffer cannot be of size zero");
   }
 
   cl_ulong max_alloc = 0;
@@ -239,9 +236,8 @@ clDeviceMemAllocINTEL(cl_context context, cl_device_id device,
                   &max_alloc, 0);
 
   if (size > max_alloc) {
-    UNLOCK_BAIL_INFO(
-        CL_INVALID_BUFFER_SIZE, context,
-        "Memory buffer size is larger than max size supported by device");
+    BAIL_INFO(CL_INVALID_BUFFER_SIZE, context,
+              "Memory buffer size is larger than max size supported by device");
   }
 
   // Spec allows for power of 2 allignment.
@@ -252,11 +248,10 @@ clDeviceMemAllocINTEL(cl_context context, cl_device_id device,
     alignment = ACL_MEM_ALIGN;
   }
   if (alignment & (alignment - 1)) {
-    UNLOCK_BAIL_INFO(CL_INVALID_VALUE, context, "alignment must be power of 2");
+    BAIL_INFO(CL_INVALID_VALUE, context, "alignment must be power of 2");
   }
   if (alignment > ACL_MEM_ALIGN) {
-    UNLOCK_BAIL_INFO(CL_INVALID_VALUE, context,
-                     "Alignment value is not supported");
+    BAIL_INFO(CL_INVALID_VALUE, context, "Alignment value is not supported");
   }
   alignment = ACL_MEM_ALIGN;
 
@@ -273,7 +268,7 @@ clDeviceMemAllocINTEL(cl_context context, cl_device_id device,
       mem_id = (cl_uint) * (properties + 1);
     } break;
     default: {
-      UNLOCK_BAIL_INFO(CL_INVALID_DEVICE, context, "Invalid properties");
+      BAIL_INFO(CL_INVALID_DEVICE, context, "Invalid properties");
     }
     }
     properties += 2;
@@ -287,20 +282,19 @@ clDeviceMemAllocINTEL(cl_context context, cl_device_id device,
   cl_mem usm_device_buffer = clCreateBufferWithPropertiesINTEL(
       context, props, CL_MEM_READ_WRITE, size, NULL, &status);
   if (status != CL_SUCCESS) {
-    UNLOCK_BAIL_INFO(status, context, "Failed to allocate device memory");
+    BAIL_INFO(status, context, "Failed to allocate device memory");
   }
   // Runtime will do device allocation on bind to device
   if (!acl_bind_buffer_to_device(device, usm_device_buffer)) {
     clReleaseMemObjectIntelFPGA(usm_device_buffer);
-    UNLOCK_BAIL_INFO(CL_OUT_OF_RESOURCES, context,
-                     "Failed to allocate device memory");
+    BAIL_INFO(CL_OUT_OF_RESOURCES, context, "Failed to allocate device memory");
   }
   acl_usm_allocation_t *usm_alloc =
       (acl_usm_allocation_t *)acl_malloc(sizeof(acl_usm_allocation_t));
 
   if (!usm_alloc) {
     clReleaseMemObjectIntelFPGA(usm_device_buffer);
-    UNLOCK_BAIL_INFO(CL_OUT_OF_RESOURCES, context, "Out of host memory");
+    BAIL_INFO(CL_OUT_OF_RESOURCES, context, "Out of host memory");
   }
 
   void *ptr = acl_get_physical_address(usm_device_buffer, device);
@@ -320,7 +314,7 @@ clDeviceMemAllocINTEL(cl_context context, cl_device_id device,
     *errcode_ret = CL_SUCCESS;
   }
 
-  UNLOCK_RETURN(ptr);
+  return ptr;
 }
 
 ACL_EXPORT
@@ -328,28 +322,28 @@ CL_API_ENTRY void *CL_API_CALL
 clSharedMemAllocINTEL(cl_context context, cl_device_id device,
                       const cl_mem_properties_intel *properties, size_t size,
                       cl_uint alignment, cl_int *errcode_ret) {
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
   if (errcode_ret) {
     *errcode_ret = CL_SUCCESS;
   }
 
   if (!acl_context_is_valid(context)) {
-    UNLOCK_BAIL(CL_INVALID_CONTEXT);
+    BAIL(CL_INVALID_CONTEXT);
   }
   if (device != nullptr && !acl_device_is_valid(device)) {
-    UNLOCK_BAIL_INFO(CL_INVALID_DEVICE, context, "Invalid device");
+    BAIL_INFO(CL_INVALID_DEVICE, context, "Invalid device");
   }
   if (device != nullptr && !acl_context_uses_device(context, device)) {
-    UNLOCK_BAIL_INFO(CL_INVALID_DEVICE, context,
-                     "Device is not associated with the context");
+    BAIL_INFO(CL_INVALID_DEVICE, context,
+              "Device is not associated with the context");
   }
   if (size == 0) {
-    UNLOCK_BAIL_INFO(CL_INVALID_BUFFER_SIZE, context,
-                     "Allocation cannot be of size zero");
+    BAIL_INFO(CL_INVALID_BUFFER_SIZE, context,
+              "Allocation cannot be of size zero");
   }
   // USM spec allows only power-of-2 alignment, or 0 (default alignment)
   if (alignment & (alignment - 1)) {
-    UNLOCK_BAIL_INFO(CL_INVALID_VALUE, context, "alignment must be power of 2");
+    BAIL_INFO(CL_INVALID_VALUE, context, "alignment must be power of 2");
   }
 
   // Ensure the specified device, or at least one of the devices in the context
@@ -364,7 +358,7 @@ clSharedMemAllocINTEL(cl_context context, cl_device_id device,
   for (const auto dev : devices) {
     if (!acl_usm_has_access_capability(
             dev, CL_DEVICE_SINGLE_DEVICE_SHARED_MEM_CAPABILITIES_INTEL)) {
-      UNLOCK_BAIL_INFO(
+      BAIL_INFO(
           CL_INVALID_OPERATION, context,
           "Device does not support shared Unified Shared Memory allocations");
     }
@@ -373,9 +367,9 @@ clSharedMemAllocINTEL(cl_context context, cl_device_id device,
   // Spec specifies that alignment is no bigger than the largest supported data
   // type
   if (alignment > sizeof(cl_long16)) {
-    UNLOCK_BAIL_INFO(CL_INVALID_VALUE, context,
-                     "Requested alignment greater than largest data type "
-                     "supported by device (long16)");
+    BAIL_INFO(CL_INVALID_VALUE, context,
+              "Requested alignment greater than largest data type "
+              "supported by device (long16)");
   }
 
   // Ensure requested size is valid and supported by the specified device, or at
@@ -385,17 +379,16 @@ clSharedMemAllocINTEL(cl_context context, cl_device_id device,
     cl_int ret = clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE,
                                  sizeof(dev_alloc), &dev_alloc, 0);
     if (ret) {
-      UNLOCK_BAIL_INFO(
-          ret, context,
-          "Failed to query CL_DEVICE_MAX_MEM_ALLOC_SIZE for device");
+      BAIL_INFO(ret, context,
+                "Failed to query CL_DEVICE_MAX_MEM_ALLOC_SIZE for device");
     }
     if (size > dev_alloc) {
-      UNLOCK_BAIL_INFO(CL_INVALID_BUFFER_SIZE, context,
-                       "Size larger than allocation size supported by device");
+      BAIL_INFO(CL_INVALID_BUFFER_SIZE, context,
+                "Size larger than allocation size supported by device");
     }
   }
   if (device == nullptr && (size > context->max_mem_alloc_size)) {
-    UNLOCK_BAIL_INFO(
+    BAIL_INFO(
         CL_INVALID_BUFFER_SIZE, context,
         "Size larger than allocation size supported by any device in context");
   }
@@ -409,28 +402,27 @@ clSharedMemAllocINTEL(cl_context context, cl_device_id device,
     switch (*properties) {
     case CL_MEM_ALLOC_FLAGS_INTEL: {
       if (seen_flags.insert(CL_MEM_ALLOC_FLAGS_INTEL).second == false) {
-        UNLOCK_BAIL_INFO(CL_INVALID_PROPERTY, context,
-                         "Property specified multiple times");
+        BAIL_INFO(CL_INVALID_PROPERTY, context,
+                  "Property specified multiple times");
       }
       switch (*(properties + 1)) {
       case CL_MEM_ALLOC_WRITE_COMBINED_INTEL:
         break;
       default:
-        UNLOCK_BAIL_INFO(CL_INVALID_PROPERTY, context,
-                         "Invalid value for property");
+        BAIL_INFO(CL_INVALID_PROPERTY, context, "Invalid value for property");
       }
       alloc_flags = *(properties + 1);
     } break;
     case CL_MEM_ALLOC_BUFFER_LOCATION_INTEL: {
       if (seen_flags.insert(CL_MEM_ALLOC_BUFFER_LOCATION_INTEL).second ==
           false) {
-        UNLOCK_BAIL_INFO(CL_INVALID_PROPERTY, context,
-                         "Property specified multiple times");
+        BAIL_INFO(CL_INVALID_PROPERTY, context,
+                  "Property specified multiple times");
       }
       mem_id = (cl_uint) * (properties + 1);
     } break;
     default: {
-      UNLOCK_BAIL_INFO(CL_INVALID_PROPERTY, context, "Invalid properties");
+      BAIL_INFO(CL_INVALID_PROPERTY, context, "Invalid properties");
     }
     }
     properties += 2;
@@ -455,7 +447,7 @@ clSharedMemAllocINTEL(cl_context context, cl_device_id device,
         (acl_usm_allocation_t *)acl_malloc(sizeof(acl_usm_allocation_t));
 
     if (!usm_alloc) {
-      UNLOCK_BAIL_INFO(CL_OUT_OF_HOST_MEMORY, context, "Out of host memory");
+      BAIL_INFO(CL_OUT_OF_HOST_MEMORY, context, "Out of host memory");
     }
 
     int error;
@@ -466,21 +458,21 @@ clSharedMemAllocINTEL(cl_context context, cl_device_id device,
       acl_free(usm_alloc);
       switch (error) {
       case CL_OUT_OF_HOST_MEMORY:
-        UNLOCK_BAIL_INFO(error, context,
-                         "Error: Unable to allocate " + std::to_string(size) +
-                             " bytes");
+        BAIL_INFO(error, context,
+                  "Error: Unable to allocate " + std::to_string(size) +
+                      " bytes");
         break;
       case CL_INVALID_VALUE:
-        UNLOCK_BAIL_INFO(error, context,
-                         "Error: Unsupported alignment of " +
-                             std::to_string(alignment));
+        BAIL_INFO(error, context,
+                  "Error: Unsupported alignment of " +
+                      std::to_string(alignment));
         break;
       case CL_INVALID_PROPERTY:
-        UNLOCK_BAIL_INFO(error, context, "Error: Unsuported properties");
+        BAIL_INFO(error, context, "Error: Unsuported properties");
         break;
       default:
-        UNLOCK_BAIL_INFO(CL_OUT_OF_HOST_MEMORY, context,
-                         "Error: Unable to allocate memory");
+        BAIL_INFO(CL_OUT_OF_HOST_MEMORY, context,
+                  "Error: Unable to allocate memory");
         break;
       }
     }
@@ -494,45 +486,43 @@ clSharedMemAllocINTEL(cl_context context, cl_device_id device,
     usm_alloc->alignment = alignment;
 
     l_add_usm_alloc_to_context(context, usm_alloc);
-    UNLOCK_RETURN(mem);
+    return mem;
   }
 
   // After all the error check, still error out
   // Shared allocation is not supported yet
-  UNLOCK_BAIL_INFO(
-      CL_INVALID_VALUE, context,
-      "Shared allocation is not supported for devices in this context");
+  BAIL_INFO(CL_INVALID_VALUE, context,
+            "Shared allocation is not supported for devices in this context");
 }
 
 ACL_EXPORT
 CL_API_ENTRY cl_int CL_API_CALL clMemFreeINTEL(cl_context context, void *ptr) {
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
 
   if (!acl_context_is_valid(context)) {
-    UNLOCK_RETURN(CL_INVALID_CONTEXT);
+    return CL_INVALID_CONTEXT;
   }
 
   // NULL is valid input where nothing happens
   if (ptr == NULL) {
-    UNLOCK_RETURN(CL_SUCCESS);
+    return CL_SUCCESS;
   }
 
   acl_usm_allocation_t *usm_alloc = acl_get_usm_alloc_from_ptr(context, ptr);
   if (!usm_alloc) {
-    UNLOCK_ERR_RET(CL_INVALID_VALUE, context,
-                   "Memory must be USM allocation in context");
+    ERR_RET(CL_INVALID_VALUE, context,
+            "Memory must be USM allocation in context");
   }
   if (usm_alloc->range.begin != ptr) {
-    UNLOCK_ERR_RET(CL_INVALID_VALUE, context,
-                   "Pointer must be exact value returned by allocation");
+    ERR_RET(CL_INVALID_VALUE, context,
+            "Pointer must be exact value returned by allocation");
   }
 
   switch (usm_alloc->type) {
   case CL_MEM_TYPE_HOST_INTEL: {
     if (acl_get_hal()->free) {
       if (acl_get_hal()->free(context, const_cast<void *>(ptr))) {
-        UNLOCK_ERR_RET(CL_INVALID_VALUE, context,
-                       "Failed to free host allocation");
+        ERR_RET(CL_INVALID_VALUE, context, "Failed to free host allocation");
       }
     }
     break;
@@ -540,22 +530,20 @@ CL_API_ENTRY cl_int CL_API_CALL clMemFreeINTEL(cl_context context, void *ptr) {
   case CL_MEM_TYPE_DEVICE_INTEL: {
     cl_int status = clReleaseMemObjectIntelFPGA(usm_alloc->mem);
     if (status != CL_SUCCESS) {
-      UNLOCK_RETURN(status);
+      return status;
     }
     break;
   }
   case CL_MEM_TYPE_SHARED_INTEL: {
     if (acl_get_hal()->free) {
       if (acl_get_hal()->free(context, const_cast<void *>(ptr))) {
-        UNLOCK_ERR_RET(CL_INVALID_VALUE, context,
-                       "Failed to free shared allocation");
+        ERR_RET(CL_INVALID_VALUE, context, "Failed to free shared allocation");
       }
     }
     break;
   }
   default: {
-    UNLOCK_ERR_RET(CL_INVALID_VALUE, context,
-                   "Pointer must be from USM allocation");
+    ERR_RET(CL_INVALID_VALUE, context, "Pointer must be from USM allocation");
     break;
   }
   }
@@ -563,31 +551,31 @@ CL_API_ENTRY cl_int CL_API_CALL clMemFreeINTEL(cl_context context, void *ptr) {
   l_remove_usm_alloc_from_context(context, usm_alloc);
   acl_free(usm_alloc);
 
-  UNLOCK_RETURN(CL_SUCCESS);
+  return CL_SUCCESS;
 }
 
 ACL_EXPORT
 CL_API_ENTRY cl_int CL_API_CALL clMemBlockingFreeINTEL(cl_context context,
                                                        void *ptr) {
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
 
   if (!acl_context_is_valid(context)) {
-    UNLOCK_RETURN(CL_INVALID_CONTEXT);
+    return CL_INVALID_CONTEXT;
   }
 
   // NULL is valid input where nothing happens
   if (ptr == NULL) {
-    UNLOCK_RETURN(CL_SUCCESS);
+    return CL_SUCCESS;
   }
 
   acl_usm_allocation_t *usm_alloc = acl_get_usm_alloc_from_ptr(context, ptr);
   if (!usm_alloc) {
-    UNLOCK_ERR_RET(CL_INVALID_VALUE, context,
-                   "Memory must be USM allocation in context");
+    ERR_RET(CL_INVALID_VALUE, context,
+            "Memory must be USM allocation in context");
   }
   if (usm_alloc->range.begin != ptr) {
-    UNLOCK_ERR_RET(CL_INVALID_VALUE, context,
-                   "Pointer must be exact value returned by allocation");
+    ERR_RET(CL_INVALID_VALUE, context,
+            "Pointer must be exact value returned by allocation");
   }
 
   // wait for enqueued commands that uses ptr to finish before free
@@ -597,8 +585,7 @@ CL_API_ENTRY cl_int CL_API_CALL clMemBlockingFreeINTEL(cl_context context,
   case CL_MEM_TYPE_HOST_INTEL: {
     if (acl_get_hal()->free) {
       if (acl_get_hal()->free(context, const_cast<void *>(ptr))) {
-        UNLOCK_ERR_RET(CL_INVALID_VALUE, context,
-                       "Failed to free host allocation");
+        ERR_RET(CL_INVALID_VALUE, context, "Failed to free host allocation");
       }
     }
     break;
@@ -606,22 +593,20 @@ CL_API_ENTRY cl_int CL_API_CALL clMemBlockingFreeINTEL(cl_context context,
   case CL_MEM_TYPE_DEVICE_INTEL: {
     cl_int status = clReleaseMemObjectIntelFPGA(usm_alloc->mem);
     if (status != CL_SUCCESS) {
-      UNLOCK_RETURN(status);
+      return status;
     }
     break;
   }
   case CL_MEM_TYPE_SHARED_INTEL: {
     if (acl_get_hal()->free) {
       if (acl_get_hal()->free(context, const_cast<void *>(ptr))) {
-        UNLOCK_ERR_RET(CL_INVALID_VALUE, context,
-                       "Failed to free shared allocation");
+        ERR_RET(CL_INVALID_VALUE, context, "Failed to free shared allocation");
       }
     }
     break;
   }
   default: {
-    UNLOCK_ERR_RET(CL_INVALID_VALUE, context,
-                   "Pointer must be from USM allocation");
+    ERR_RET(CL_INVALID_VALUE, context, "Pointer must be from USM allocation");
     break;
   }
   }
@@ -629,20 +614,20 @@ CL_API_ENTRY cl_int CL_API_CALL clMemBlockingFreeINTEL(cl_context context,
   l_remove_usm_alloc_from_context(context, usm_alloc);
   acl_free(usm_alloc);
 
-  UNLOCK_RETURN(CL_SUCCESS);
+  return CL_SUCCESS;
 }
 
 ACL_EXPORT
 CL_API_ENTRY cl_int CL_API_CALL clGetMemAllocInfoINTEL(
     cl_context context, const void *ptr, cl_mem_info_intel param_name,
     size_t param_value_size, void *param_value, size_t *param_value_size_ret) {
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
 
   if (!acl_context_is_valid(context)) {
-    UNLOCK_RETURN(CL_INVALID_CONTEXT);
+    return CL_INVALID_CONTEXT;
   }
-  UNLOCK_VALIDATE_ARRAY_OUT_ARGS(param_value_size, param_value,
-                                 param_value_size_ret, context);
+  VALIDATE_ARRAY_OUT_ARGS(param_value_size, param_value, param_value_size_ret,
+                          context);
 
   // Get USM allocation associated with ptr
   acl_usm_allocation_t *usm_alloc = acl_get_usm_alloc_from_ptr(context, ptr);
@@ -701,8 +686,7 @@ CL_API_ENTRY cl_int CL_API_CALL clGetMemAllocInfoINTEL(
   } break;
 
   default: {
-    UNLOCK_ERR_RET(CL_INVALID_VALUE, context,
-                   "Param name is not a valid query");
+    ERR_RET(CL_INVALID_VALUE, context, "Param name is not a valid query");
   } break;
   }
 
@@ -710,8 +694,8 @@ CL_API_ENTRY cl_int CL_API_CALL clGetMemAllocInfoINTEL(
     // Try to return the param value.
     if (param_value_size < result.size) {
       // Buffer is too small to hold the return value.
-      UNLOCK_ERR_RET(CL_INVALID_VALUE, context,
-                     "Param value size is smaller than query return type");
+      ERR_RET(CL_INVALID_VALUE, context,
+              "Param value size is smaller than query return type");
     }
     RESULT_COPY(param_value, param_value_size);
   }
@@ -719,7 +703,7 @@ CL_API_ENTRY cl_int CL_API_CALL clGetMemAllocInfoINTEL(
   if (param_value_size_ret) {
     *param_value_size_ret = result.size;
   }
-  UNLOCK_RETURN(CL_SUCCESS);
+  return CL_SUCCESS;
 }
 
 // clEnqueueMemsetINTEL has been removed in the latest OpenCL spec, but SYCl
@@ -741,60 +725,59 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueMemFillINTEL(
     size_t pattern_size, size_t size, cl_uint num_events_in_wait_list,
     const cl_event *event_wait_list, cl_event *event) {
   char *ptr;
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
 
   if (!acl_command_queue_is_valid(command_queue)) {
-    UNLOCK_RETURN(CL_INVALID_COMMAND_QUEUE);
+    return CL_INVALID_COMMAND_QUEUE;
   }
   if (dst_ptr == NULL) {
-    UNLOCK_ERR_RET(CL_INVALID_VALUE, command_queue->context,
-                   "Pointer argument cannot be NULL");
+    ERR_RET(CL_INVALID_VALUE, command_queue->context,
+            "Pointer argument cannot be NULL");
   }
   if (((uintptr_t)dst_ptr) % (pattern_size) != 0) {
-    UNLOCK_ERR_RET(CL_INVALID_VALUE, command_queue->context,
-                   "Pointer not aligned with pattern size");
+    ERR_RET(CL_INVALID_VALUE, command_queue->context,
+            "Pointer not aligned with pattern size");
   }
   if (pattern == NULL) {
-    UNLOCK_ERR_RET(CL_INVALID_VALUE, command_queue->context,
-                   "Pattern argument cannot be NULL");
+    ERR_RET(CL_INVALID_VALUE, command_queue->context,
+            "Pattern argument cannot be NULL");
   }
   if (pattern_size == 0) {
-    UNLOCK_ERR_RET(CL_INVALID_VALUE, command_queue->context,
-                   "Pattern size argument cannot be 0");
+    ERR_RET(CL_INVALID_VALUE, command_queue->context,
+            "Pattern size argument cannot be 0");
   }
   // Pattern size must be less than largest supported int/float vec type
   if (pattern_size > sizeof(double) * 16) {
-    UNLOCK_ERR_RET(CL_INVALID_VALUE, command_queue->context,
-                   "Patern size must be less than double16");
+    ERR_RET(CL_INVALID_VALUE, command_queue->context,
+            "Patern size must be less than double16");
   }
   // Pattern size can only be power of 2
   if (pattern_size & (pattern_size - 1)) {
-    UNLOCK_ERR_RET(CL_INVALID_VALUE, command_queue->context,
-                   "Patern size must be power of 2");
+    ERR_RET(CL_INVALID_VALUE, command_queue->context,
+            "Patern size must be power of 2");
   }
   if (size == 0) {
-    UNLOCK_ERR_RET(CL_INVALID_VALUE, command_queue->context,
-                   "Size cannot be 0");
+    ERR_RET(CL_INVALID_VALUE, command_queue->context, "Size cannot be 0");
   }
   if (size % pattern_size != 0) {
-    UNLOCK_ERR_RET(CL_INVALID_VALUE, command_queue->context,
-                   "Size must be multiple of pattern size");
+    ERR_RET(CL_INVALID_VALUE, command_queue->context,
+            "Size must be multiple of pattern size");
   }
 
   // This array is passed to clSetEventCallback for releasing the
   // allocated memory and releasing the event, if *event is null.
   void **callback_data = (void **)acl_malloc(sizeof(void *) * 2);
   if (!callback_data) {
-    UNLOCK_ERR_RET(CL_OUT_OF_HOST_MEMORY, command_queue->context,
-                   "Out of host memory");
+    ERR_RET(CL_OUT_OF_HOST_MEMORY, command_queue->context,
+            "Out of host memory");
   }
 
   acl_aligned_ptr_t *aligned_ptr =
       (acl_aligned_ptr_t *)acl_malloc(sizeof(acl_aligned_ptr_t));
   if (!aligned_ptr) {
     acl_free(callback_data);
-    UNLOCK_ERR_RET(CL_OUT_OF_HOST_MEMORY, command_queue->context,
-                   "Out of host memory");
+    ERR_RET(CL_OUT_OF_HOST_MEMORY, command_queue->context,
+            "Out of host memory");
   }
 
   // Replicating the value, size times.
@@ -803,8 +786,8 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueMemFillINTEL(
   if (!ptr) {
     acl_free(aligned_ptr);
     acl_free(callback_data);
-    UNLOCK_ERR_RET(CL_OUT_OF_HOST_MEMORY, command_queue->context,
-                   "Out of host memory");
+    ERR_RET(CL_OUT_OF_HOST_MEMORY, command_queue->context,
+            "Out of host memory");
   }
 
   for (cl_uint i = 0; i < size / pattern_size; i++) {
@@ -822,8 +805,8 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueMemFillINTEL(
       acl_mem_aligned_free(command_queue->context, aligned_ptr);
       acl_free(aligned_ptr);
       acl_free(callback_data);
-      UNLOCK_ERR_RET(CL_INVALID_VALUE, command_queue->context,
-                     "Size accesses outside of USM allocation dst_ptr range");
+      ERR_RET(CL_INVALID_VALUE, command_queue->context,
+              "Size accesses outside of USM allocation dst_ptr range");
     }
     if (usm_alloc->type == CL_MEM_TYPE_DEVICE_INTEL) {
       dst_device = usm_alloc->device;
@@ -838,8 +821,8 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueMemFillINTEL(
     acl_mem_aligned_free(command_queue->context, aligned_ptr);
     acl_free(aligned_ptr);
     acl_free(callback_data);
-    UNLOCK_ERR_RET(CL_INVALID_VALUE, command_queue->context,
-                   "Memory allocation needs to be on command queue device");
+    ERR_RET(CL_INVALID_VALUE, command_queue->context,
+            "Memory allocation needs to be on command queue device");
   }
 
   cl_event tmp_event = NULL;
@@ -853,7 +836,7 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueMemFillINTEL(
     acl_mem_aligned_free(command_queue->context, aligned_ptr);
     acl_free(aligned_ptr);
     acl_free(callback_data);
-    UNLOCK_RETURN(status); // already signalled callback
+    return status; // already signalled callback
   }
   tmp_event->cmd.info.usm_xfer.src_ptr = ptr;
   tmp_event->cmd.info.usm_xfer.dst_ptr = dst_ptr;
@@ -878,7 +861,7 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueMemFillINTEL(
                      acl_free_allocation_after_event_completion,
                      (void *)callback_data);
 
-  UNLOCK_RETURN(CL_SUCCESS);
+  return CL_SUCCESS;
 }
 
 ACL_EXPORT
@@ -886,30 +869,30 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueMemcpyINTEL(
     cl_command_queue command_queue, cl_bool blocking, void *dst_ptr,
     const void *src_ptr, size_t size, cl_uint num_events_in_wait_list,
     const cl_event *event_wait_list, cl_event *event) {
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
 
   if (!acl_command_queue_is_valid(command_queue)) {
-    UNLOCK_RETURN(CL_INVALID_COMMAND_QUEUE);
+    return CL_INVALID_COMMAND_QUEUE;
   }
   if (dst_ptr == NULL) {
-    UNLOCK_ERR_RET(CL_INVALID_VALUE, command_queue->context,
-                   "Pointer argument cannot be NULL");
+    ERR_RET(CL_INVALID_VALUE, command_queue->context,
+            "Pointer argument cannot be NULL");
   }
   if (src_ptr == NULL) {
-    UNLOCK_ERR_RET(CL_INVALID_VALUE, command_queue->context,
-                   "Pointer argument cannot be NULL");
+    ERR_RET(CL_INVALID_VALUE, command_queue->context,
+            "Pointer argument cannot be NULL");
   }
   if (size == 0) {
-    UNLOCK_ERR_RET(CL_INVALID_VALUE, command_queue->context,
-                   "Pointer size cannot be 0");
+    ERR_RET(CL_INVALID_VALUE, command_queue->context,
+            "Pointer size cannot be 0");
   }
 
   if (((char *)src_ptr < (char *)dst_ptr &&
        (char *)src_ptr + size > (char *)dst_ptr) ||
       ((char *)dst_ptr < (char *)src_ptr &&
        (char *)dst_ptr + size > (char *)src_ptr)) {
-    UNLOCK_ERR_RET(CL_MEM_COPY_OVERLAP, command_queue->context,
-                   "Source and destination memory overlaps");
+    ERR_RET(CL_MEM_COPY_OVERLAP, command_queue->context,
+            "Source and destination memory overlaps");
   }
 
   acl_usm_allocation_t *dst_usm_alloc =
@@ -919,8 +902,8 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueMemcpyINTEL(
   cl_device_id dst_device = NULL;
   if (dst_usm_alloc) {
     if (l_ptr_in_usm_alloc_range(dst_usm_alloc, dst_ptr, size) != CL_TRUE) {
-      UNLOCK_ERR_RET(CL_INVALID_VALUE, command_queue->context,
-                     "Size accesses outside of USM allocation dst_ptr range");
+      ERR_RET(CL_INVALID_VALUE, command_queue->context,
+              "Size accesses outside of USM allocation dst_ptr range");
     }
     if (dst_usm_alloc->type == CL_MEM_TYPE_DEVICE_INTEL) {
       dst_device = dst_usm_alloc->device;
@@ -936,8 +919,8 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueMemcpyINTEL(
   // Even if src_ptr is not USM pointer, continue assuming it's system mem
   if (src_usm_alloc) {
     if (l_ptr_in_usm_alloc_range(src_usm_alloc, src_ptr, size) != CL_TRUE) {
-      UNLOCK_ERR_RET(CL_INVALID_VALUE, command_queue->context,
-                     "Size accesses outside of USM allocation src_ptr range");
+      ERR_RET(CL_INVALID_VALUE, command_queue->context,
+              "Size accesses outside of USM allocation src_ptr range");
     }
     if (src_usm_alloc->type == CL_MEM_TYPE_DEVICE_INTEL) {
       src_device = src_usm_alloc->device;
@@ -947,8 +930,8 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueMemcpyINTEL(
 
   if ((dst_device && dst_device->id != command_queue->device->id) ||
       (src_device && src_device->id != command_queue->device->id)) {
-    UNLOCK_ERR_RET(CL_INVALID_VALUE, command_queue->context,
-                   "Memory allocation needs to be on command queue's device");
+    ERR_RET(CL_INVALID_VALUE, command_queue->context,
+            "Memory allocation needs to be on command queue's device");
   }
 
   cl_event tmp_event = NULL;
@@ -958,7 +941,7 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueMemcpyINTEL(
       acl_create_event(command_queue, num_events_in_wait_list, event_wait_list,
                        CL_COMMAND_MEMCPY_INTEL, &tmp_event);
   if (status != CL_SUCCESS) {
-    UNLOCK_RETURN(status); // already signalled callback
+    return status; // already signalled callback
   }
   tmp_event->cmd.info.usm_xfer.src_ptr = src_ptr;
   tmp_event->cmd.info.usm_xfer.dst_ptr = dst_ptr;
@@ -982,10 +965,10 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueMemcpyINTEL(
   }
 
   if (blocking && status == CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST) {
-    UNLOCK_RETURN(status);
+    return status;
   }
 
-  UNLOCK_RETURN(CL_SUCCESS);
+  return CL_SUCCESS;
 }
 
 // Unused argument names are commented out to avoid Windows compile warning:
@@ -996,14 +979,14 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueMigrateMemINTEL(
     cl_command_queue command_queue, const void *ptr, size_t /* size */,
     cl_mem_migration_flags /* flags */, cl_uint num_events_in_wait_list,
     const cl_event *event_wait_list, cl_event *event) {
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
 
   if (!acl_command_queue_is_valid(command_queue)) {
-    UNLOCK_RETURN(CL_INVALID_COMMAND_QUEUE);
+    return CL_INVALID_COMMAND_QUEUE;
   }
   if (ptr == NULL) {
-    UNLOCK_ERR_RET(CL_INVALID_VALUE, command_queue->context,
-                   "Pointer argument can not be NULL");
+    ERR_RET(CL_INVALID_VALUE, command_queue->context,
+            "Pointer argument can not be NULL");
   }
 
   // Migrate currently doesn't do anything
@@ -1015,7 +998,7 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueMigrateMemINTEL(
                        CL_COMMAND_MIGRATEMEM_INTEL, &local_event);
 
   if (result != CL_SUCCESS) {
-    UNLOCK_RETURN(result);
+    return result;
   }
 
   if (event) {
@@ -1023,7 +1006,7 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueMigrateMemINTEL(
   } else {
     clReleaseEvent(local_event);
   }
-  UNLOCK_RETURN(CL_SUCCESS);
+  return CL_SUCCESS;
 }
 
 // Unused argument names are commented out to avoid Windows compile warning:
@@ -1034,14 +1017,14 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueMemAdviseINTEL(
     cl_command_queue command_queue, const void *ptr, size_t /* size */,
     cl_mem_advice_intel /* advice */, cl_uint num_events_in_wait_list,
     const cl_event *event_wait_list, cl_event *event) {
-  acl_lock();
+  std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};
 
   if (!acl_command_queue_is_valid(command_queue)) {
-    UNLOCK_RETURN(CL_INVALID_COMMAND_QUEUE);
+    return CL_INVALID_COMMAND_QUEUE;
   }
   if (ptr == NULL) {
-    UNLOCK_ERR_RET(CL_INVALID_VALUE, command_queue->context,
-                   "Pointer argument can not be NULL");
+    ERR_RET(CL_INVALID_VALUE, command_queue->context,
+            "Pointer argument can not be NULL");
   }
 
   // MemAdvise currently doesn't do anything
@@ -1053,7 +1036,7 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueMemAdviseINTEL(
                        CL_COMMAND_MEMADVISE_INTEL, &local_event);
 
   if (result != CL_SUCCESS) {
-    UNLOCK_RETURN(result);
+    return result;
   }
 
   if (event) {
@@ -1061,7 +1044,7 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueMemAdviseINTEL(
   } else {
     clReleaseEvent(local_event);
   }
-  UNLOCK_RETURN(CL_SUCCESS);
+  return CL_SUCCESS;
 }
 
 void acl_usm_memcpy(void *, acl_device_op_t *op) {
diff --git a/test/acl_device_op_test.cpp b/test/acl_device_op_test.cpp
index e6c116fb..2934e2cb 100644
--- a/test/acl_device_op_test.cpp
+++ b/test/acl_device_op_test.cpp
@@ -133,7 +133,7 @@ static struct _cl_event myevents[EVENT_NUM] = {{0}};
 
 TEST_GROUP(device_op) {
   virtual void setup() {
-    acl_lock();
+    acl_mutex_wrapper.lock();
     acl_test_setup_generic_system();
     acl_init_device_op_queue(&m_doq);
     clear_queue_callbacks(&m_doq);
@@ -152,7 +152,7 @@ TEST_GROUP(device_op) {
   virtual void teardown() {
     unload();
     acl_test_teardown_generic_system();
-    acl_unlock();
+    acl_mutex_wrapper.unlock();
     acl_test_run_standard_teardown_checks();
   }
 
diff --git a/test/acl_event_test.cpp b/test/acl_event_test.cpp
index 84f1186b..eec5d763 100644
--- a/test/acl_event_test.cpp
+++ b/test/acl_event_test.cpp
@@ -292,7 +292,7 @@ MT_TEST(acl_event, acl_create_event) {
 
   // Bad command queue
   struct _cl_command_queue fake_cq = {0};
-  acl_lock();
+  acl_mutex_wrapper.lock();
   CHECK_EQUAL(CL_INVALID_COMMAND_QUEUE,
               acl_create_event(0, 0, 0, CL_COMMAND_MARKER, &user_event));
   CHECK_EQUAL(CL_INVALID_COMMAND_QUEUE,
@@ -338,7 +338,7 @@ MT_TEST(acl_event, acl_create_event) {
   CHECK(cq0);
   acl_update_queue(cq0);
 
-  acl_unlock();
+  acl_mutex_wrapper.unlock();
 
   // Check the callback functions initialization.
   CHECK_EQUAL(NULL, event[0]->callback_list);
@@ -614,7 +614,7 @@ MT_TEST(acl_event, event_liveness) {
   CHECK_EQUAL(CL_SUCCESS, status);
   CHECK(cq0);
 
-  acl_lock();
+  acl_mutex_wrapper.lock();
   cl_event event;
   status = acl_create_event(cq0, 0, 0, CL_COMMAND_MARKER, &event);
   CHECK_EQUAL(CL_SUCCESS, status);
@@ -659,7 +659,7 @@ MT_TEST(acl_event, event_liveness) {
   acl_set_execution_status(event, CL_QUEUED);
   CHECK(acl_event_is_live(event));
   acl_retain(event);
-  acl_unlock();
+  acl_mutex_wrapper.unlock();
 
   this->kill_event(event);
 
@@ -690,7 +690,7 @@ MT_TEST(acl_event, event_callbacks) {
   CHECK_EQUAL(CL_SUCCESS, status);
   CHECK(cq0);
 
-  acl_lock();
+  acl_mutex_wrapper.lock();
   cl_event event, event2, event3;
   status = acl_create_event(cq0, 0, 0, CL_COMMAND_MARKER, &event);
   CHECK_EQUAL(CL_SUCCESS, status);
@@ -807,7 +807,7 @@ MT_TEST(acl_event, event_callbacks) {
     CHECK_EQUAL(1, call_flags_event3[i]);
   }
   clReleaseEvent(user_event);
-  acl_unlock();
+  acl_mutex_wrapper.unlock();
 
   this->kill_event(event);
   this->kill_event(event2);
diff --git a/test/acl_globals_test.cpp b/test/acl_globals_test.cpp
index 7c1a361e..5376d4e6 100644
--- a/test/acl_globals_test.cpp
+++ b/test/acl_globals_test.cpp
@@ -693,12 +693,12 @@ const acl_system_def_t *acl_test_get_empty_system_def() {
   return &acltest_empty_system;
 }
 
-TEST_GROUP(acl_globals_undef){void setup(){acl_lock();
+TEST_GROUP(acl_globals_undef){void setup(){acl_mutex_wrapper.lock();
 acl_set_hal(acl_test_get_simple_hal());
 }
 void teardown() {
   acl_reset_hal();
-  acl_unlock();
+  acl_mutex_wrapper.unlock();
   acl_test_run_standard_teardown_checks();
 }
 
diff --git a/test/acl_hal_mmd_test.cpp b/test/acl_hal_mmd_test.cpp
index 41dcc390..5b901ee9 100644
--- a/test/acl_hal_mmd_test.cpp
+++ b/test/acl_hal_mmd_test.cpp
@@ -19,14 +19,14 @@
 #include <stdio.h>
 #include <string.h>
 
-TEST_GROUP(acl_hal_mmd){void setup(){acl_lock();
+TEST_GROUP(acl_hal_mmd){void setup(){acl_mutex_wrapper.lock();
 acl_hal_test_setup_generic_system();
 this->load();
 }
 void teardown() {
   this->unload();
   acl_hal_test_teardown_generic_system();
-  acl_unlock();
+  acl_mutex_wrapper.unlock();
   acl_test_run_standard_teardown_checks();
 }
 
diff --git a/test/acl_hal_test.cpp b/test/acl_hal_test.cpp
index 266a9cd1..6e27ab2a 100644
--- a/test/acl_hal_test.cpp
+++ b/test/acl_hal_test.cpp
@@ -162,10 +162,10 @@ bool acltest_hal_emulate_device_mem = false;
 static void *acltest_hal_device_mem = 0;
 static size_t acltest_hal_device_mem_size = 0;
 
-TEST_GROUP(acl_hal){void setup(){acl_lock();
+TEST_GROUP(acl_hal){void setup(){acl_mutex_wrapper.lock();
 }
 void teardown() {
-  acl_unlock();
+  acl_mutex_wrapper.unlock();
   acl_assert_unlocked();
 }
 }
diff --git a/test/acl_support_test.cpp b/test/acl_support_test.cpp
index fa33b99a..114c0c1c 100644
--- a/test/acl_support_test.cpp
+++ b/test/acl_support_test.cpp
@@ -23,12 +23,12 @@
 #include <glob.h>
 #endif
 
-TEST_GROUP(support){void setup(){acl_lock();
+TEST_GROUP(support){void setup(){acl_mutex_wrapper.lock();
 acl_test_setup_generic_system();
 }
 void teardown() {
   acl_test_teardown_generic_system();
-  acl_unlock();
+  acl_mutex_wrapper.unlock();
   acl_test_run_standard_teardown_checks();
 }
 
diff --git a/test/acl_test.cpp b/test/acl_test.cpp
index 1a194255..890735fc 100644
--- a/test/acl_test.cpp
+++ b/test/acl_test.cpp
@@ -91,24 +91,24 @@ int main(int argc, const char **argv) {
 }
 
 void acl_test_setup_generic_system() {
-  acl_lock();
+  acl_mutex_wrapper.lock();
   acl_set_hal(acl_test_get_simple_hal());
   acl_init(acl_test_get_complex_system_def());
-  acl_unlock();
+  acl_mutex_wrapper.unlock();
 }
 
 void acl_test_setup_empty_system() {
-  acl_lock();
+  acl_mutex_wrapper.lock();
   acl_set_hal(acl_test_get_simple_hal());
   acl_init(acl_test_get_empty_system_def());
-  acl_unlock();
+  acl_mutex_wrapper.unlock();
 }
 
 void acl_test_setup_sample_default_board_system(void) {
-  acl_lock();
+  acl_mutex_wrapper.lock();
   acl_set_hal(acl_test_get_simple_hal());
   acl_init(&acl_test_example_binary_sysdef);
-  acl_unlock();
+  acl_mutex_wrapper.unlock();
 }
 
 void acl_test_teardown_sample_default_board_system(void) {
@@ -117,11 +117,11 @@ void acl_test_teardown_sample_default_board_system(void) {
 
 void acl_test_teardown_generic_system(void) { acl_test_teardown_system(); }
 void acl_test_teardown_system(void) {
-  acl_lock();
+  acl_mutex_wrapper.lock();
   acl_reset();
   acl_reset_hal();
   acltest_hal_teardown();
-  acl_unlock();
+  acl_mutex_wrapper.unlock();
 }
 
 void acl_hal_test_setup_generic_system(void) { return; };
@@ -482,8 +482,8 @@ static void l_run_benchmark() {
     times = &results["acl_lock/acl_unlock"];
     for (int inner_rep = 0; inner_rep < INNER_REPS; ++inner_rep) {
       start_time = l_get_timestamp();
-      acl_lock();
-      acl_unlock();
+      acl_mutex_wrapper.lock();
+      acl_mutex_wrapper.unlock();
       end_time = l_get_timestamp();
       times->push_back(end_time - start_time);
     }
@@ -491,11 +491,11 @@ static void l_run_benchmark() {
     std::cout << "Measuring acl_assert_locked..." << std::endl;
     times = &results["acl_assert_locked"];
     for (int inner_rep = 0; inner_rep < INNER_REPS; ++inner_rep) {
-      acl_lock();
+      acl_mutex_wrapper.lock();
       start_time = l_get_timestamp();
       acl_assert_locked();
       end_time = l_get_timestamp();
-      acl_unlock();
+      acl_mutex_wrapper.unlock();
       times->push_back(end_time - start_time);
     }
 
diff --git a/test/acl_test.h b/test/acl_test.h
index a7943d52..97897b01 100644
--- a/test/acl_test.h
+++ b/test/acl_test.h
@@ -55,9 +55,8 @@ void CL_CALLBACK acl_test_notify_print(const char *errinfo,
 
 #define ACL_LOCKED(...)                                                        \
   do {                                                                         \
-    acl_lock();                                                                \
+    std::scoped_lock<acl_mutex_wrapper_t> lock{acl_mutex_wrapper};             \
     { __VA_ARGS__; }                                                           \
-    acl_unlock();                                                              \
   } while (0)
 
 /* CAUTION.  These are only used in self-tests.