Merge pull request #1549 from LLNL/excise-loop-exec

Excise loop exec
LLNL · Oct 2, 2023 · 2658c5d · 2658c5d
2 parents 7fd27f3 + 3458b65
commit 2658c5d
Show file tree

Hide file tree

Showing 15 changed files with 61 additions and 200 deletions.
diff --git a/docs/sphinx/user_guide/feature/local_array.rst b/docs/sphinx/user_guide/feature/local_array.rst
@@ -57,12 +57,12 @@ its constituent parts::
   // 
 
   using POL = RAJA::KernelPolicy<
-                RAJA::statement::For<1, RAJA::loop_exec,
+                RAJA::statement::For<1, RAJA::seq_exec,
                   RAJA::statement::InitLocalMem<RAJA::cpu_tile_mem, RAJA::ParamList<0, 1>,
-                    RAJA::statement::For<0, RAJA::loop_exec,
+                    RAJA::statement::For<0, RAJA::seq_exec,
                       RAJA::statement::Lambda<0>
                     >,
-                    RAJA::statement::For<0, RAJA::loop_exec,
+                    RAJA::statement::For<0, RAJA::seq_exec,
                       RAJA::statement::Lambda<1>
                     >
                   >

diff --git a/docs/sphinx/user_guide/feature/plugins.rst b/docs/sphinx/user_guide/feature/plugins.rst
@@ -212,7 +212,7 @@ After CHAI has been built with RAJA support enabled, applications can use CHAI
       array[i] = i * 2.0f;
   });
 
-  RAJA::forall<RAJA::loop_exec>(0, 1000, [=] (int i) {
+  RAJA::forall<RAJA::seq_exec>(0, 1000, [=] (int i) {
     std::cout << "array[" << i << "]  is " << array[i] << std::endl;
   });
 

diff --git a/docs/sphinx/user_guide/feature/policies.rst b/docs/sphinx/user_guide/feature/policies.rst
@@ -49,21 +49,19 @@ apply during code compilation.
  ====================================== ============= ==========================
  seq_launch_t                           launch        Creates a sequential
                                                       execution space.
- seq_exec                               forall,       Strictly sequential
-                                        kernel (For), execution.
-                                        scan,
-                                        sort
+ seq_exec                               forall,       Sequential execution,
+                                        kernel (For), where the compiler is
+                                        scan,         allowed to apply any
+                                        sort          any optimizations
+                                                      that its heuristics deem
+                                                      beneficial; i.e., no loop
+                                                      decorations (pragmas or 
+                                                      intrinsics) in the RAJA
+                                                      implementation.
  simd_exec                              forall,       Try to force generation of
                                         kernel (For), SIMD instructions via
                                         scan          compiler hints in RAJA's
                                                       internal implementation.
- loop_exec                              forall,       Allow the compiler to
-                                        kernel (For), generate any optimizations
-                                        scan,         that its heuristics deem
-                                        sort          beneficial;
-                                                      i.e., no loop decorations
-                                                      (pragmas or intrinsics) in
-                                                      RAJA implementation.
  ====================================== ============= ==========================
 
 
@@ -686,11 +684,11 @@ surround code that uses execution back-ends other than OpenMP. For example::
 
   RAJA::region<RAJA::seq_region>([=]() {
 
-     RAJA::forall<RAJA::loop_exec>(segment, [=] (int idx) {
+     RAJA::forall<RAJA::seq_exec>(segment, [=] (int idx) {
          // do something at iterate 'idx'
      } );
 
-     RAJA::forall<RAJA::loop_exec>(segment, [=] (int idx) {
+     RAJA::forall<RAJA::seq_exec>(segment, [=] (int idx) {
          // do something else at iterate 'idx'
      } );
 
@@ -725,7 +723,6 @@ Reduction Policy        Loop Policies Brief description
                         to Use With
 ======================= ============= ==========================================
 seq_reduce              seq_exec,     Non-parallel (sequential) reduction.
-                        loop_exec
 omp_reduce              any OpenMP    OpenMP parallel reduction.
                         policy
 omp_reduce_ordered      any OpenMP    OpenMP parallel reduction with result
@@ -766,7 +763,7 @@ Atomic Policy                 Loop Policies Brief description
                               to Use With
 ============================= ============= ========================================
 seq_atomic                    seq_exec,     Atomic operation performed in a
-                              loop_exec     non-parallel (sequential) kernel.
+                                            non-parallel (sequential) kernel.
 omp_atomic                    any OpenMP    Atomic operation in OpenM kernel.P
                               policy        multithreading or target kernel;
                                             i.e., apply ``omp atomic`` pragma.
@@ -781,14 +778,12 @@ cuda/hip_atomic_explicit      any CUDA/HIP  Atomic operation performed in a CUDA
                                             argument. See additional explanation
                                             and example below.
 builtin_atomic                seq_exec,     Compiler *builtin* atomic operation.
-                              loop_exec,
                               any OpenMP
                               policy
-auto_atomic                   seq_exec,     Atomic operation *compatible* with loop
-                              loop_exec,    execution policy. See example below.
-                              any OpenMP    Can not be used inside cuda/hip
-                              policy,       explicit atomic policies.
-                              any
+auto_atomic                   seq_exec,     Atomic operation *compatible* with 
+                              any OpenMP    loop execution policy. See example 
+                              policy,       below. Cannot be used inside CUDA or
+                              any           HIP explicit atomic policies. 
                               CUDA/HIP/SYCL
                               policy
 ============================= ============= ========================================

diff --git a/docs/sphinx/user_guide/feature/tiling.rst b/docs/sphinx/user_guide/feature/tiling.rst
@@ -79,7 +79,7 @@ The ``RAJA::launch`` API also supports loop tiling through specialized
 methods. The launch version of the code above is ::
 
   using launch_t = RAJA::LaunchPolicy<RAJA::seq_launch>;
-  using loop_t   = RAJA::LoopPolicy<RAJA::loop_exec>;
+  using loop_t   = RAJA::LoopPolicy<RAJA::seq_exec>;
 
   RAJA::launch<launch_t>(
     RAJA::LaunchParams(), RAJA_HOST_DEVICE(RAJA::launchContext ctx) {
@@ -154,7 +154,7 @@ The launch API uses ``RAJA::tile_tcount`` and ``RAJA::loop_icount`` methods
 which has a second argument on the lambda for the index. We illustrate usage below::
 
   using launch_t = RAJA::LaunchPolicy<RAJA::seq_launch>;
-  using loop_t   = RAJA::LoopPolicy<RAJA::loop_exec>;
+  using loop_t   = RAJA::LoopPolicy<RAJA::seq_exec>;
 
   RAJA::launch<launch_t>(
     RAJA::LaunchParams(), RAJA_HOST_DEVICE(RAJA::launchContext ctx) {

diff --git a/docs/sphinx/user_guide/tutorial.rst b/docs/sphinx/user_guide/tutorial.rst
@@ -95,7 +95,7 @@ Here, "ExecPol", "IdxType", and "LoopBody" are C++ types that a user specifies
 in her code and which are seen by the compiler when the code is built.
 For example::
 
-  RAJA::forall< RAJA::loop_exec >( RAJA::TypedRangeSegment<int>(0, N), [=](int i) {
+  RAJA::forall< RAJA::seq_exec >( RAJA::TypedRangeSegment<int>(0, N), [=](int i) {
     a[i] = b[i] + c[i];
   });
 
@@ -106,7 +106,7 @@ The C-style analogue of this kernel is::
     a[i] = b[i] + c[i];
   }
 
-The execution policy type ``RAJA::loop_exec`` template argument
+The execution policy type ``RAJA::seq_exec`` template argument
 is used to choose as specific implementation of the
 ``RAJA::forall`` method. The ``IdxType`` and ``LoopBody`` types are deduced by
 the compiler based the arguments passed to the ``RAJA::forall`` method;

diff --git a/docs/sphinx/user_guide/tutorial/add_vectors.rst b/docs/sphinx/user_guide/tutorial/add_vectors.rst
@@ -59,22 +59,15 @@ policy type:
    :end-before: _rajaseq_vector_add_end
    :language: C++ 
 
-The RAJA sequential execution policy enforces strictly sequential execution; 
-in particular, no SIMD vectorization instructions or other substantial 
-optimizations will be generated by the compiler. To attempt to force the 
-compiler to generate SIMD vector instructions, we would use the RAJA SIMD 
-execution policy:: 
+When using the RAJA sequential execution policy, the resulting loop
+implementation is essentially the same as writing a C-style for-loop
+with no directives applied to the loop. The compiler is allowed to 
+perform any optimizations that its heuristics deem are safe and potentially
+beneficial for performance. To attempt to force the compiler to generate SIMD 
+vector instructions, we would use the RAJA SIMD execution policy:: 
 
   RAJA::simd_exec
 
-An alternative RAJA policy is::
-
-  RAJA::loop_exec
-
-which allows the compiler to generate optimizations based on how its internal
-heuristics suggest that it is safe to do so and potentially 
-beneficial for performance, but the optimizations are not forced.
-
 To run the kernel with OpenMP multithreaded parallelism on a CPU, we use the
 ``RAJA::omp_parallel_for_exec`` execution policy:
 

diff --git a/docs/sphinx/user_guide/tutorial/kernel_exec_pols.rst b/docs/sphinx/user_guide/tutorial/kernel_exec_pols.rst
@@ -133,7 +133,7 @@ and
    :language: C++
 
 The first of these, in which we parallelize the outer 'k' loop, replaces
-the ``RAJA::loop_exec`` loop execution policy with the 
+the ``RAJA::seq_exec`` loop execution policy with the 
 ``RAJA::omp_parallel_for_exec`` policy, which applies the same OpenMP
 directive to the outer loop used in the C-style variant.
 

diff --git a/docs/sphinx/user_guide/tutorial/launch_basic.rst b/docs/sphinx/user_guide/tutorial/launch_basic.rst
@@ -74,10 +74,10 @@ The mapping between teams and threads to the underlying programming
 model depends on how the ``RAJA::loop`` template parameter types are
 defined. For example, we may define host and device mapping strategies as::
 
-  using teams_x = RAJA::LoopPolicy<RAJA::loop_exec,
-                                         RAJA::cuda_block_x_direct>;
-  using thread_x = RAJA::LoopPolicy<RAJA::loop_exec,
-                                          RAJA::cuda_block_x_direct>;
+  using teams_x = RAJA::LoopPolicy< RAJA::seq_exec,
+                                    RAJA::cuda_block_x_direct >;
+  using thread_x = RAJA::LoopPolicy< RAJA::seq_exec,
+                                     RAJA::cuda_block_x_direct >;
 
 Here, the ``RAJA::LoopPolicy`` type holds both the host (CPU) and 
 device (CUDA GPU) loop mapping strategies. On the host, both the team/thread 

diff --git a/docs/sphinx/user_guide/tutorial/matrix_transpose.rst b/docs/sphinx/user_guide/tutorial/matrix_transpose.rst
@@ -99,7 +99,7 @@ and number of blocks to launch is determined by the implementation of the
 For ``RAJA::launch`` variants, we use ``RAJA::loop`` methods 
 to write a loop hierarchy within the kernel execution space. For a sequential 
 implementation, we pass the ``RAJA::seq_launch_t`` template parameter
-to the launch method and pass the ``RAJA::loop_exec`` parameter to the loop 
+to the launch method and pass the ``RAJA::seq_exec`` parameter to the loop 
 methods. The complete sequential ``RAJA::launch`` variant is:
 
 .. literalinclude:: ../../../../exercises/launch-matrix-transpose_solution.cpp

diff --git a/include/RAJA/RAJA.hpp b/include/RAJA/RAJA.hpp
@@ -57,11 +57,6 @@
 //
 #include "RAJA/policy/sequential.hpp"
 
-//
-// All platforms must support loop execution.
-//
-#include "RAJA/policy/loop.hpp"
-
 //
 // All platforms should support simd and vector execution.
 //

diff --git a/include/RAJA/policy/loop.hpp b/include/RAJA/policy/loop.hpp
diff --git a/include/RAJA/policy/loop/policy.hpp b/include/RAJA/policy/loop/policy.hpp
diff --git a/test/include/RAJA_test-launch-direct-teams-threads-3D-execpol.hpp b/test/include/RAJA_test-launch-direct-teams-threads-3D-execpol.hpp
@@ -19,12 +19,12 @@
 using seq_policies = 
   camp::list<
              RAJA::LaunchPolicy<RAJA::seq_launch_t>,
-             RAJA::LoopPolicy<RAJA::loop_exec>,
-             RAJA::LoopPolicy<RAJA::loop_exec>,
-             RAJA::LoopPolicy<RAJA::loop_exec>,
-             RAJA::LoopPolicy<RAJA::loop_exec>,
-             RAJA::LoopPolicy<RAJA::loop_exec>,
-             RAJA::LoopPolicy<RAJA::loop_exec>
+             RAJA::LoopPolicy<RAJA::seq_exec>,
+             RAJA::LoopPolicy<RAJA::seq_exec>,
+             RAJA::LoopPolicy<RAJA::seq_exec>,
+             RAJA::LoopPolicy<RAJA::seq_exec>,
+             RAJA::LoopPolicy<RAJA::seq_exec>,
+             RAJA::LoopPolicy<RAJA::seq_exec>
             >;
 
 using Sequential_launch_policies = camp::list<seq_policies>;
@@ -35,11 +35,11 @@ using omp_policies =
   camp::list<
              RAJA::LaunchPolicy<RAJA::omp_launch_t>,
              RAJA::LoopPolicy<RAJA::omp_for_exec>,  
-             RAJA::LoopPolicy<RAJA::loop_exec>,
-             RAJA::LoopPolicy<RAJA::loop_exec>,
-             RAJA::LoopPolicy<RAJA::loop_exec>,
-             RAJA::LoopPolicy<RAJA::loop_exec>,
-             RAJA::LoopPolicy<RAJA::loop_exec>
+             RAJA::LoopPolicy<RAJA::seq_exec>,
+             RAJA::LoopPolicy<RAJA::seq_exec>,
+             RAJA::LoopPolicy<RAJA::seq_exec>,
+             RAJA::LoopPolicy<RAJA::seq_exec>,
+             RAJA::LoopPolicy<RAJA::seq_exec>
             >;
 
 using OpenMP_launch_policies = camp::list<omp_policies>;

diff --git a/test/include/RAJA_test-launch-loop-teams-threads-1D-execpol.hpp b/test/include/RAJA_test-launch-loop-teams-threads-1D-execpol.hpp
@@ -18,8 +18,8 @@
 //Launch policies
 using seq_policies = camp::list<
   RAJA::LaunchPolicy<RAJA::seq_launch_t>,
-  RAJA::LoopPolicy<RAJA::loop_exec>,
-  RAJA::LoopPolicy<RAJA::loop_exec>
+  RAJA::LoopPolicy<RAJA::seq_exec>,
+  RAJA::LoopPolicy<RAJA::seq_exec>
   >;
 
 using Sequential_launch_policies = camp::list<
@@ -30,7 +30,7 @@ using Sequential_launch_policies = camp::list<
 using omp_policies = camp::list<
          RAJA::LaunchPolicy<RAJA::omp_launch_t>,
          RAJA::LoopPolicy<RAJA::omp_for_exec>,  
-         RAJA::LoopPolicy<RAJA::loop_exec>
+         RAJA::LoopPolicy<RAJA::seq_exec>
   >;
 
 using OpenMP_launch_policies = camp::list<