Merge branch 'develop' into unbind

PaddlePaddle · May 21, 2024 · 82c3749 · 82c3749
2 parents f2e9a11 + 79a8490
commit 82c3749
Show file tree

Hide file tree

Showing 193 changed files with 4,938 additions and 2,694 deletions.
diff --git a/cmake/cblas.cmake b/cmake/cblas.cmake
@@ -15,9 +15,11 @@
 # Find the CBlas and lapack libraries
 #
 # It will search MKLML, OpenBlas, reference-cblas, extern-openblas in order.
+# On APPLE, accelerate framework (apple's blas implementation) will be
+# used, if applicable.
 #
 # If any cblas implementation found, the following variable will be set.
-#    CBLAS_PROVIDER  # one of MKLML, OPENBLAS, REFERENCE
+#    CBLAS_PROVIDER  # one of MKLML, ACCELERATE, OPENBLAS, REFERENCE
 #    CBLAS_INC_DIR   # the include directory for cblas.
 #    CBLAS_LIBS      # a list of libraries should be linked by paddle.
 #                    # Each library should be full path to object file.
@@ -45,6 +47,24 @@ if(WITH_MKLML)
                  "(include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBRARIES})")
 endif()
 
+## find accelerate on apple
+if(APPLE AND NOT DEFINED CBLAS_PROVIDER)
+  find_library(ACCELERATE_FRAMEWORK Accelerate)
+  if(ACCELERATE_FRAMEWORK)
+    message(STATUS "Accelerate framework found " "${ACCELERATE_FRAMEWORK}")
+
+    set(CBLAS_PROVIDER ACCELERATE)
+    # no need to setup include dir if it's accelerate
+    # set(CBLAS_INC_DIR "")
+    set(CBLAS_LIBRARIES ${ACCELERATE_FRAMEWORK})
+
+    add_definitions(-DPADDLE_USE_ACCELERATE)
+    add_definitions(-DLAPACK_FOUND)
+  else()
+    message(WARNING "Accelerate framework not found")
+  endif()
+endif()
+
 ## Then find openblas.
 if(NOT DEFINED CBLAS_PROVIDER)
   set(OPENBLAS_ROOT

diff --git a/cmake/cuda.cmake b/cmake/cuda.cmake
@@ -173,8 +173,6 @@ function(select_nvcc_arch_flags out_variable out_arch_bin)
   elseif(${CUDA_ARCH_NAME} STREQUAL "Turing")
     set(cuda_arch_bin "75")
   elseif(${CUDA_ARCH_NAME} STREQUAL "Ampere")
-    message(STATUS "Add Define CUDA_BFLOAT16_AVALIABLE")
-    add_definitions("-DCUDA_BFLOAT16_AVALIABLE")
     if(WITH_NV_JETSON)
       set(cuda_arch_bin "87")
     else()
@@ -185,8 +183,6 @@ function(select_nvcc_arch_flags out_variable out_arch_bin)
       endif()
     endif()
   elseif(${CUDA_ARCH_NAME} STREQUAL "Hopper")
-    message(STATUS "Add Define CUDA_BFLOAT16_AVALIABLE")
-    add_definitions("-DCUDA_BFLOAT16_AVALIABLE")
     set(cuda_arch_bin "90")
   elseif(${CUDA_ARCH_NAME} STREQUAL "All")
     set(cuda_arch_bin ${paddle_known_gpu_archs})
@@ -200,17 +196,8 @@ function(select_nvcc_arch_flags out_variable out_arch_bin)
       to get a full wheel package to resolve this warning.
       While, this version will still work on local GPU architecture.")
     detect_installed_gpus(cuda_arch_bin)
-    if(${cuda_arch_bin} MATCHES "[ ]*(8\.0|8\.6|8\.9|9\.0)[ ]*")
-      message(STATUS "Add Define CUDA_BFLOAT16_AVALIABLE")
-      add_definitions("-DCUDA_BFLOAT16_AVALIABLE")
-    endif()
   else() # (${CUDA_ARCH_NAME} STREQUAL "Manual")
     set(cuda_arch_bin ${CUDA_ARCH_BIN})
-
-    if(${CUDA_ARCH_BIN} MATCHES "[ ]*(80|86|89|90)[ ]*")
-      message(STATUS "Add Define CUDA_BFLOAT16_AVALIABLE")
-      add_definitions("-DCUDA_BFLOAT16_AVALIABLE")
-    endif()
   endif()
 
   if(NEW_RELEASE_JIT)

diff --git a/paddle/cinn/frontend/pass/auto_broadcast.cc b/paddle/cinn/frontend/pass/auto_broadcast.cc
@@ -46,10 +46,13 @@ class AutoBroadcastPass : public ProgramPass {
       if (axis == -1) {
         axis = output_shape.size() - input_shape.size();
       }
-      CHECK_LE(axis + input_shape.size(), output_shape.size())
-          << "Cannot Broadcast from shape=["
-          << cinn::utils::Join(input_shape, ", ") << "] to shape=["
-          << cinn::utils::Join(output_shape, ", ") << "] with axis=" << axis;
+      PADDLE_ENFORCE_LE(
+          axis + input_shape.size(),
+          output_shape.size(),
+          phi::errors::InvalidArgument(
+              "The size of  axis + input shape and output shape is incorrect."
+              "Expected axis + input shape size <= output shape size, "
+              "but receive axis + input shape size > output shape size."));
       for (int idx = 0; idx < input_shape.size(); ++idx) {
         broadcast_axes.push_back(axis++);
       }
@@ -77,8 +80,11 @@ class AutoBroadcastPass : public ProgramPass {
     }
 
     const auto& outputs = instr.GetOutputs();
-    CHECK_EQ(outputs.size(), 1)
-        << "The broadcast operator should has and only has one output";
+    PADDLE_ENFORCE_EQ(
+        outputs.size(),
+        1,
+        phi::errors::InvalidArgument(
+            "The broadcast operator should has and only has one output."));
     const auto& output = outputs.front();
 
     int axis = -1;

diff --git a/paddle/cinn/frontend/pass/auto_cast.cc b/paddle/cinn/frontend/pass/auto_cast.cc
@@ -120,10 +120,12 @@ static std::unordered_map<std::string, CastImplFunc> need_cast_list = {
        }
 
        // Except input [X], BatchNormTrain's Input should all be fp32
-       CHECK_EQ(instr->inputs.size(), 5UL)
-           << "The number of the given inputs is not equal to the required for "
-              "op "
-           << instr->op_type;
+       PADDLE_ENFORCE_EQ(
+           instr->inputs.size(),
+           5UL,
+           phi::errors::InvalidArgument("The size of instr inputs is incorrect."
+                                        "Expected size is 5, but receive %d.",
+                                        instr->inputs.size()));
        CHECK(instr->inputs[1]->type.is_float(32))
            << instr->op_type << "'s input [scale] should be float32, but here "
            << instr->inputs[1]->type;
@@ -161,10 +163,12 @@ static std::unordered_map<std::string, CastImplFunc> need_cast_list = {
        }
 
        // Except input [X], BatchNormTrain's Input should all be fp32
-       CHECK_EQ(instr->inputs.size(), 5UL)
-           << "The number of the given inputs is not equal to the required for "
-              "op "
-           << instr->op_type;
+       PADDLE_ENFORCE_EQ(
+           instr->inputs.size(),
+           5UL,
+           phi::errors::InvalidArgument("The size of instr inputs is incorrect."
+                                        "Expected size is 5, but receive %d.",
+                                        instr->inputs.size()));
        CHECK(instr->inputs[1]->type.is_float(32))
            << instr->op_type << "'s input [scale] should be float32, but here "
            << instr->inputs[1]->type;
@@ -206,13 +210,17 @@ static std::unordered_map<std::string, CastImplFunc> need_cast_list = {
        }
 
        // Except input [X], BatchNormTrain's Input should all be fp32
-       CHECK_EQ(instr->inputs.size(), 5UL)
-           << "The number of the given inputs is not equal to the required for "
-              "op "
-           << instr->op_type;
-       CHECK_EQ(instr->inputs[0]->type, instr->inputs[1]->type)
-           << instr->op_type
-           << "'s input [Y@GRAD] and input [X] 's type should be the same";
+       PADDLE_ENFORCE_EQ(
+           instr->inputs.size(),
+           5UL,
+           phi::errors::InvalidArgument("The size of instr inputs is incorrect."
+                                        "Expected size is 5, but receive %d.",
+                                        instr->inputs.size()));
+       PADDLE_ENFORCE_EQ(instr->inputs[0]->type,
+                         instr->inputs[1]->type,
+                         phi::errors::InvalidArgument(
+                             "instr op type's input [Y@GRAD] and input [X] 's "
+                             "type should be the same."));
        CHECK(instr->inputs[2]->type.is_float(32))
            << instr->op_type << "'s input [scale] should be float32, but here "
            << instr->inputs[1]->type;

diff --git a/paddle/cinn/frontend/pass/fill_constant_folding.cc b/paddle/cinn/frontend/pass/fill_constant_folding.cc
@@ -113,9 +113,12 @@ class FillConstantFoldingPass : public ProgramPass {
         continue;
       }
 
-      CHECK_EQ(instr->outputs.size(), 1UL)
-          << "The fill_constant op should has one, and only one output ! "
-             "Please check.";
+      PADDLE_ENFORCE_EQ(
+          instr->outputs.size(),
+          1UL,
+          phi::errors::InvalidArgument("The size of instr outputs is incorrect."
+                                       "Expected size is 1, but receive %d.",
+                                       instr->outputs.size()));
 
       const auto& shape = instr.GetAttrs<ShapeType>("shape");
       auto value = instr->attrs.at("value");

diff --git a/paddle/cinn/frontend/pass/fill_constant_rewriter.cc b/paddle/cinn/frontend/pass/fill_constant_rewriter.cc
@@ -206,9 +206,16 @@ class FillConstantRewriterPass : public ProgramPass {
       const Input2Instr& input2instr,
       const std::unordered_set<std::string>& fetch_ids,
       std::unordered_set<const Instruction*>* remove_instr) {
-    CHECK_EQ(fill_constant->op_type, std::string("fill_constant"));
-    CHECK_EQ(fill_constant->outputs.size(), 1UL)
-        << "The fill_constant op should just has one output! Please check.";
+    PADDLE_ENFORCE_EQ(fill_constant->op_type,
+                      std::string("fill_constant"),
+                      phi::errors::InvalidArgument(
+                          "The type of fill constant op is incorrect."));
+    PADDLE_ENFORCE_EQ(fill_constant->outputs.size(),
+                      1UL,
+                      phi::errors::InvalidArgument(
+                          "The size of fill constant outputs is incorrect."
+                          "Expected size is 1, but receive %d.",
+                          fill_constant->outputs.size()));
     const auto& out = fill_constant->outputs[0];
 
     if (!input2instr.count(out->id)) {

diff --git a/paddle/cinn/frontend/pass/gemm_rewriter.cc b/paddle/cinn/frontend/pass/gemm_rewriter.cc
@@ -98,8 +98,10 @@ class GemmRewriterPass : public ProgramPass {
   bool DoGemmFusion(NetBuilder* builder,
                     const Instruction& instr,
                     const std::unordered_set<std::string>& fetch_ids) {
-    CHECK_EQ(instr->inputs.size(), 2)
-        << "elementwise should have only two inputs";
+    PADDLE_ENFORCE_EQ(instr->inputs.size(),
+                      2,
+                      phi::errors::InvalidArgument(
+                          "elementwise should have only two inputs."));
     std::vector<Variable> inputs;
     bool trans_a = false;
     bool trans_b = false;
@@ -111,10 +113,13 @@ class GemmRewriterPass : public ProgramPass {
       if (it != output2instr_.end() && dot_instrs.count(it->second->op_type)) {
         // If the output var of matmul is consumed by more than one instruction
         // or a fetch var, just skip to fuse it.
-        CHECK_GT(var_used_count_.count(var.get()), 0)
-            << "The input(" << var->id << ")"
-            << "should be included in var_used_count_. Please check the "
-               "CollectInfo method.";
+        PADDLE_ENFORCE_GT(
+            var_used_count_.count(var.get()),
+            0,
+            phi::errors::InvalidArgument(
+                "The value of var used count's var get() is incorrect."
+                "Expected value is larger than 0, but receive %d.",
+                var_used_count_.count(var.get())));
         if ((var_used_count_.at(var.get()) > 1) || fetch_ids.count(var->id)) {
           continue;
         }
@@ -176,7 +181,10 @@ class GemmRewriterPass : public ProgramPass {
       return true;
     }
 
-    CHECK_EQ(inputs.size(), 0) << "The gemm should only have three inputs.";
+    PADDLE_ENFORCE_EQ(inputs.size(),
+                      0,
+                      phi::errors::InvalidArgument(
+                          "The gemm should only have three inputs."));
     return false;
   }
 

diff --git a/paddle/cinn/frontend/pass/remove_identity.cc b/paddle/cinn/frontend/pass/remove_identity.cc
@@ -223,10 +223,18 @@ class RemoveIdentityPass : public ProgramPass {
       if (!identity_ops.at(instr->op_type)(instr)) {
         continue;
       }
-      CHECK_EQ(instr->inputs.size(), 1)
-          << instr->op_type << " should have only 1 input. But here " << instr;
-      CHECK_EQ(instr->outputs.size(), 1)
-          << instr->op_type << " should have only 1 output. But here " << instr;
+      PADDLE_ENFORCE_EQ(
+          instr->inputs.size(),
+          1,
+          phi::errors::InvalidArgument("The size of instr inputs is incorrect."
+                                       "Expected size is 1, but receive %d.",
+                                       instr->inputs.size()));
+      PADDLE_ENFORCE_EQ(
+          instr->outputs.size(),
+          1,
+          phi::errors::InvalidArgument("The size of instr outputs is incorrect."
+                                       "Expected size is 1, but receive %d.",
+                                       instr->outputs.size()));
 
       auto& input_var = instr->inputs[0];
       auto& output_var = instr->outputs[0];

diff --git a/paddle/cinn/frontend/pass/test_helper.h b/paddle/cinn/frontend/pass/test_helper.h
@@ -153,7 +153,14 @@ class PassTest {
 
   void CheckOutput(const std::vector<float>& actual,
                    const std::vector<float>& expect) {
-    CHECK_EQ(actual.size(), expect.size());
+    PADDLE_ENFORCE_EQ(
+        actual.size(),
+        expect.size(),
+        phi::errors::InvalidArgument(
+            "The size of actual and expect is not equal,"
+            "where the size of actual:%d but the size of expect:%d.",
+            actual.size(),
+            expect.size()));
     for (size_t i = 0; i < expect.size(); ++i) {
       ASSERT_FLOAT_EQ(actual[i], expect[i]);
     }

diff --git a/paddle/cinn/frontend/pass/transpose_collapsing.cc b/paddle/cinn/frontend/pass/transpose_collapsing.cc
@@ -191,10 +191,14 @@ class TransposeCollapsingPass : public ProgramPass {
     const auto& output_name = output->id;
 
     const auto& axis = transpose->GetAttrs<ShapeType>("axis");
-    CHECK_EQ(axis.size(), input->shape.size())
-        << "The transpose's axis size should equal with input variable's shape "
-           "size, but the transpose of ["
-        << input->id << "] not ! Please check.";
+    PADDLE_ENFORCE_EQ(
+        axis.size(),
+        input->shape.size(),
+        phi::errors::InvalidArgument(
+            "The size of axis and input shape is not equal,"
+            "where the size of axis:%d but the size of input shape:%d.",
+            axis.size(),
+            input->shape.size()));
 
     bool can_remove = !fetch_ids.count(output_name);
 
@@ -372,9 +376,14 @@ class TransposeCollapsingPass : public ProgramPass {
   // 1, 0] = [1, 2, 0]
   ShapeType FuseTransposeAxis(const ShapeType& old_axis,
                               const ShapeType& new_axis) const {
-    CHECK_EQ(old_axis.size(), new_axis.size())
-        << "The transpose axis size should be " << old_axis.size()
-        << ", but here " << new_axis.size();
+    PADDLE_ENFORCE_EQ(
+        old_axis.size(),
+        new_axis.size(),
+        phi::errors::InvalidArgument(
+            "The size of old axis and new axis is not equal,"
+            "where the size of old axis:%d but the size of new axis:%d.",
+            old_axis.size(),
+            new_axis.size()));
 
     ShapeType axis = old_axis;
     for (int i = 0; i < new_axis.size(); ++i) {

diff --git a/paddle/cinn/frontend/pass/transpose_folding_base.h b/paddle/cinn/frontend/pass/transpose_folding_base.h
@@ -103,10 +103,18 @@ class TransposeFoldingBase : public ProgramPass {
         !skip_instrs_.count((*instr)->op_type)) {
       return {};
     }
-    CHECK_EQ((*instr)->inputs.size(), 1UL)
-        << "The op " << (*instr)->op_type << " should has 1 input.";
-    CHECK_EQ((*instr)->outputs.size(), 1UL)
-        << "The op " << (*instr)->op_type << " should has 1 output.";
+    PADDLE_ENFORCE_EQ(
+        (*instr)->inputs.size(),
+        1UL,
+        phi::errors::InvalidArgument("The size of *instr's inputs is incorrect."
+                                     "Expected size is 1, but receive %d.",
+                                     (*instr)->inputs.size()));
+    PADDLE_ENFORCE_EQ((*instr)->outputs.size(),
+                      1UL,
+                      phi::errors::InvalidArgument(
+                          "The size of *instr's outputs is incorrect."
+                          "Expected size is 1, but receive %d.",
+                          (*instr)->outputs.size()));
 
     VLOG(5) << "Try get matmul's folding instructions begin from ["
             << (*instr)->inputs[0]->id << "]";

diff --git a/paddle/cinn/frontend/pass/transpose_folding_input.cc b/paddle/cinn/frontend/pass/transpose_folding_input.cc
@@ -68,8 +68,12 @@ class TransposeFoldingInputPass : public TransposeFoldingBase {
       const In2InstrType& in2instr,
       const std::unordered_set<std::string>& fetch_ids,
       absl::flat_hash_set<Instruction*>* remove_instrs) const override {
-    CHECK_EQ((*dot)->inputs.size(), 2UL)
-        << "The matmul should only have two inputs.";
+    PADDLE_ENFORCE_EQ(
+        (*dot)->inputs.size(),
+        2UL,
+        phi::errors::InvalidArgument("The size of *dot's inputs is incorrect."
+                                     "Expected size is 2, but receive %d.",
+                                     (*dot)->inputs.size()));
 
     auto debug_info = [](const std::vector<Instruction*>& instrs) {
       std::stringstream ss;