Skip to content

Commit

Permalink
Merge branch 'develop' into unbind
Browse files Browse the repository at this point in the history
  • Loading branch information
Eddie-Wang1120 authored May 21, 2024
2 parents f2e9a11 + 79a8490 commit 82c3749
Show file tree
Hide file tree
Showing 193 changed files with 4,938 additions and 2,694 deletions.
22 changes: 21 additions & 1 deletion cmake/cblas.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,11 @@
# Find the CBlas and lapack libraries
#
# It will search MKLML, OpenBlas, reference-cblas, extern-openblas in order.
# On APPLE, accelerate framework (apple's blas implementation) will be
# used, if applicable.
#
# If any cblas implementation found, the following variable will be set.
# CBLAS_PROVIDER # one of MKLML, OPENBLAS, REFERENCE
# CBLAS_PROVIDER # one of MKLML, ACCELERATE, OPENBLAS, REFERENCE
# CBLAS_INC_DIR # the include directory for cblas.
# CBLAS_LIBS # a list of libraries should be linked by paddle.
# # Each library should be full path to object file.
Expand Down Expand Up @@ -45,6 +47,24 @@ if(WITH_MKLML)
"(include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBRARIES})")
endif()

## find accelerate on apple
if(APPLE AND NOT DEFINED CBLAS_PROVIDER)
find_library(ACCELERATE_FRAMEWORK Accelerate)
if(ACCELERATE_FRAMEWORK)
message(STATUS "Accelerate framework found " "${ACCELERATE_FRAMEWORK}")

set(CBLAS_PROVIDER ACCELERATE)
# no need to setup include dir if it's accelerate
# set(CBLAS_INC_DIR "")
set(CBLAS_LIBRARIES ${ACCELERATE_FRAMEWORK})

add_definitions(-DPADDLE_USE_ACCELERATE)
add_definitions(-DLAPACK_FOUND)
else()
message(WARNING "Accelerate framework not found")
endif()
endif()

## Then find openblas.
if(NOT DEFINED CBLAS_PROVIDER)
set(OPENBLAS_ROOT
Expand Down
13 changes: 0 additions & 13 deletions cmake/cuda.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -173,8 +173,6 @@ function(select_nvcc_arch_flags out_variable out_arch_bin)
elseif(${CUDA_ARCH_NAME} STREQUAL "Turing")
set(cuda_arch_bin "75")
elseif(${CUDA_ARCH_NAME} STREQUAL "Ampere")
message(STATUS "Add Define CUDA_BFLOAT16_AVALIABLE")
add_definitions("-DCUDA_BFLOAT16_AVALIABLE")
if(WITH_NV_JETSON)
set(cuda_arch_bin "87")
else()
Expand All @@ -185,8 +183,6 @@ function(select_nvcc_arch_flags out_variable out_arch_bin)
endif()
endif()
elseif(${CUDA_ARCH_NAME} STREQUAL "Hopper")
message(STATUS "Add Define CUDA_BFLOAT16_AVALIABLE")
add_definitions("-DCUDA_BFLOAT16_AVALIABLE")
set(cuda_arch_bin "90")
elseif(${CUDA_ARCH_NAME} STREQUAL "All")
set(cuda_arch_bin ${paddle_known_gpu_archs})
Expand All @@ -200,17 +196,8 @@ function(select_nvcc_arch_flags out_variable out_arch_bin)
to get a full wheel package to resolve this warning.
While, this version will still work on local GPU architecture.")
detect_installed_gpus(cuda_arch_bin)
if(${cuda_arch_bin} MATCHES "[ ]*(8\.0|8\.6|8\.9|9\.0)[ ]*")
message(STATUS "Add Define CUDA_BFLOAT16_AVALIABLE")
add_definitions("-DCUDA_BFLOAT16_AVALIABLE")
endif()
else() # (${CUDA_ARCH_NAME} STREQUAL "Manual")
set(cuda_arch_bin ${CUDA_ARCH_BIN})

if(${CUDA_ARCH_BIN} MATCHES "[ ]*(80|86|89|90)[ ]*")
message(STATUS "Add Define CUDA_BFLOAT16_AVALIABLE")
add_definitions("-DCUDA_BFLOAT16_AVALIABLE")
endif()
endif()

if(NEW_RELEASE_JIT)
Expand Down
18 changes: 12 additions & 6 deletions paddle/cinn/frontend/pass/auto_broadcast.cc
Original file line number Diff line number Diff line change
Expand Up @@ -46,10 +46,13 @@ class AutoBroadcastPass : public ProgramPass {
if (axis == -1) {
axis = output_shape.size() - input_shape.size();
}
CHECK_LE(axis + input_shape.size(), output_shape.size())
<< "Cannot Broadcast from shape=["
<< cinn::utils::Join(input_shape, ", ") << "] to shape=["
<< cinn::utils::Join(output_shape, ", ") << "] with axis=" << axis;
PADDLE_ENFORCE_LE(
axis + input_shape.size(),
output_shape.size(),
phi::errors::InvalidArgument(
"The size of axis + input shape and output shape is incorrect."
"Expected axis + input shape size <= output shape size, "
"but receive axis + input shape size > output shape size."));
for (int idx = 0; idx < input_shape.size(); ++idx) {
broadcast_axes.push_back(axis++);
}
Expand Down Expand Up @@ -77,8 +80,11 @@ class AutoBroadcastPass : public ProgramPass {
}

const auto& outputs = instr.GetOutputs();
CHECK_EQ(outputs.size(), 1)
<< "The broadcast operator should has and only has one output";
PADDLE_ENFORCE_EQ(
outputs.size(),
1,
phi::errors::InvalidArgument(
"The broadcast operator should has and only has one output."));
const auto& output = outputs.front();

int axis = -1;
Expand Down
38 changes: 23 additions & 15 deletions paddle/cinn/frontend/pass/auto_cast.cc
Original file line number Diff line number Diff line change
Expand Up @@ -120,10 +120,12 @@ static std::unordered_map<std::string, CastImplFunc> need_cast_list = {
}

// Except input [X], BatchNormTrain's Input should all be fp32
CHECK_EQ(instr->inputs.size(), 5UL)
<< "The number of the given inputs is not equal to the required for "
"op "
<< instr->op_type;
PADDLE_ENFORCE_EQ(
instr->inputs.size(),
5UL,
phi::errors::InvalidArgument("The size of instr inputs is incorrect."
"Expected size is 5, but receive %d.",
instr->inputs.size()));
CHECK(instr->inputs[1]->type.is_float(32))
<< instr->op_type << "'s input [scale] should be float32, but here "
<< instr->inputs[1]->type;
Expand Down Expand Up @@ -161,10 +163,12 @@ static std::unordered_map<std::string, CastImplFunc> need_cast_list = {
}

// Except input [X], BatchNormTrain's Input should all be fp32
CHECK_EQ(instr->inputs.size(), 5UL)
<< "The number of the given inputs is not equal to the required for "
"op "
<< instr->op_type;
PADDLE_ENFORCE_EQ(
instr->inputs.size(),
5UL,
phi::errors::InvalidArgument("The size of instr inputs is incorrect."
"Expected size is 5, but receive %d.",
instr->inputs.size()));
CHECK(instr->inputs[1]->type.is_float(32))
<< instr->op_type << "'s input [scale] should be float32, but here "
<< instr->inputs[1]->type;
Expand Down Expand Up @@ -206,13 +210,17 @@ static std::unordered_map<std::string, CastImplFunc> need_cast_list = {
}

// Except input [X], BatchNormTrain's Input should all be fp32
CHECK_EQ(instr->inputs.size(), 5UL)
<< "The number of the given inputs is not equal to the required for "
"op "
<< instr->op_type;
CHECK_EQ(instr->inputs[0]->type, instr->inputs[1]->type)
<< instr->op_type
<< "'s input [Y@GRAD] and input [X] 's type should be the same";
PADDLE_ENFORCE_EQ(
instr->inputs.size(),
5UL,
phi::errors::InvalidArgument("The size of instr inputs is incorrect."
"Expected size is 5, but receive %d.",
instr->inputs.size()));
PADDLE_ENFORCE_EQ(instr->inputs[0]->type,
instr->inputs[1]->type,
phi::errors::InvalidArgument(
"instr op type's input [Y@GRAD] and input [X] 's "
"type should be the same."));
CHECK(instr->inputs[2]->type.is_float(32))
<< instr->op_type << "'s input [scale] should be float32, but here "
<< instr->inputs[1]->type;
Expand Down
9 changes: 6 additions & 3 deletions paddle/cinn/frontend/pass/fill_constant_folding.cc
Original file line number Diff line number Diff line change
Expand Up @@ -113,9 +113,12 @@ class FillConstantFoldingPass : public ProgramPass {
continue;
}

CHECK_EQ(instr->outputs.size(), 1UL)
<< "The fill_constant op should has one, and only one output ! "
"Please check.";
PADDLE_ENFORCE_EQ(
instr->outputs.size(),
1UL,
phi::errors::InvalidArgument("The size of instr outputs is incorrect."
"Expected size is 1, but receive %d.",
instr->outputs.size()));

const auto& shape = instr.GetAttrs<ShapeType>("shape");
auto value = instr->attrs.at("value");
Expand Down
13 changes: 10 additions & 3 deletions paddle/cinn/frontend/pass/fill_constant_rewriter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -206,9 +206,16 @@ class FillConstantRewriterPass : public ProgramPass {
const Input2Instr& input2instr,
const std::unordered_set<std::string>& fetch_ids,
std::unordered_set<const Instruction*>* remove_instr) {
CHECK_EQ(fill_constant->op_type, std::string("fill_constant"));
CHECK_EQ(fill_constant->outputs.size(), 1UL)
<< "The fill_constant op should just has one output! Please check.";
PADDLE_ENFORCE_EQ(fill_constant->op_type,
std::string("fill_constant"),
phi::errors::InvalidArgument(
"The type of fill constant op is incorrect."));
PADDLE_ENFORCE_EQ(fill_constant->outputs.size(),
1UL,
phi::errors::InvalidArgument(
"The size of fill constant outputs is incorrect."
"Expected size is 1, but receive %d.",
fill_constant->outputs.size()));
const auto& out = fill_constant->outputs[0];

if (!input2instr.count(out->id)) {
Expand Down
22 changes: 15 additions & 7 deletions paddle/cinn/frontend/pass/gemm_rewriter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -98,8 +98,10 @@ class GemmRewriterPass : public ProgramPass {
bool DoGemmFusion(NetBuilder* builder,
const Instruction& instr,
const std::unordered_set<std::string>& fetch_ids) {
CHECK_EQ(instr->inputs.size(), 2)
<< "elementwise should have only two inputs";
PADDLE_ENFORCE_EQ(instr->inputs.size(),
2,
phi::errors::InvalidArgument(
"elementwise should have only two inputs."));
std::vector<Variable> inputs;
bool trans_a = false;
bool trans_b = false;
Expand All @@ -111,10 +113,13 @@ class GemmRewriterPass : public ProgramPass {
if (it != output2instr_.end() && dot_instrs.count(it->second->op_type)) {
// If the output var of matmul is consumed by more than one instruction
// or a fetch var, just skip to fuse it.
CHECK_GT(var_used_count_.count(var.get()), 0)
<< "The input(" << var->id << ")"
<< "should be included in var_used_count_. Please check the "
"CollectInfo method.";
PADDLE_ENFORCE_GT(
var_used_count_.count(var.get()),
0,
phi::errors::InvalidArgument(
"The value of var used count's var get() is incorrect."
"Expected value is larger than 0, but receive %d.",
var_used_count_.count(var.get())));
if ((var_used_count_.at(var.get()) > 1) || fetch_ids.count(var->id)) {
continue;
}
Expand Down Expand Up @@ -176,7 +181,10 @@ class GemmRewriterPass : public ProgramPass {
return true;
}

CHECK_EQ(inputs.size(), 0) << "The gemm should only have three inputs.";
PADDLE_ENFORCE_EQ(inputs.size(),
0,
phi::errors::InvalidArgument(
"The gemm should only have three inputs."));
return false;
}

Expand Down
16 changes: 12 additions & 4 deletions paddle/cinn/frontend/pass/remove_identity.cc
Original file line number Diff line number Diff line change
Expand Up @@ -223,10 +223,18 @@ class RemoveIdentityPass : public ProgramPass {
if (!identity_ops.at(instr->op_type)(instr)) {
continue;
}
CHECK_EQ(instr->inputs.size(), 1)
<< instr->op_type << " should have only 1 input. But here " << instr;
CHECK_EQ(instr->outputs.size(), 1)
<< instr->op_type << " should have only 1 output. But here " << instr;
PADDLE_ENFORCE_EQ(
instr->inputs.size(),
1,
phi::errors::InvalidArgument("The size of instr inputs is incorrect."
"Expected size is 1, but receive %d.",
instr->inputs.size()));
PADDLE_ENFORCE_EQ(
instr->outputs.size(),
1,
phi::errors::InvalidArgument("The size of instr outputs is incorrect."
"Expected size is 1, but receive %d.",
instr->outputs.size()));

auto& input_var = instr->inputs[0];
auto& output_var = instr->outputs[0];
Expand Down
9 changes: 8 additions & 1 deletion paddle/cinn/frontend/pass/test_helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,14 @@ class PassTest {

void CheckOutput(const std::vector<float>& actual,
const std::vector<float>& expect) {
CHECK_EQ(actual.size(), expect.size());
PADDLE_ENFORCE_EQ(
actual.size(),
expect.size(),
phi::errors::InvalidArgument(
"The size of actual and expect is not equal,"
"where the size of actual:%d but the size of expect:%d.",
actual.size(),
expect.size()));
for (size_t i = 0; i < expect.size(); ++i) {
ASSERT_FLOAT_EQ(actual[i], expect[i]);
}
Expand Down
23 changes: 16 additions & 7 deletions paddle/cinn/frontend/pass/transpose_collapsing.cc
Original file line number Diff line number Diff line change
Expand Up @@ -191,10 +191,14 @@ class TransposeCollapsingPass : public ProgramPass {
const auto& output_name = output->id;

const auto& axis = transpose->GetAttrs<ShapeType>("axis");
CHECK_EQ(axis.size(), input->shape.size())
<< "The transpose's axis size should equal with input variable's shape "
"size, but the transpose of ["
<< input->id << "] not ! Please check.";
PADDLE_ENFORCE_EQ(
axis.size(),
input->shape.size(),
phi::errors::InvalidArgument(
"The size of axis and input shape is not equal,"
"where the size of axis:%d but the size of input shape:%d.",
axis.size(),
input->shape.size()));

bool can_remove = !fetch_ids.count(output_name);

Expand Down Expand Up @@ -372,9 +376,14 @@ class TransposeCollapsingPass : public ProgramPass {
// 1, 0] = [1, 2, 0]
ShapeType FuseTransposeAxis(const ShapeType& old_axis,
const ShapeType& new_axis) const {
CHECK_EQ(old_axis.size(), new_axis.size())
<< "The transpose axis size should be " << old_axis.size()
<< ", but here " << new_axis.size();
PADDLE_ENFORCE_EQ(
old_axis.size(),
new_axis.size(),
phi::errors::InvalidArgument(
"The size of old axis and new axis is not equal,"
"where the size of old axis:%d but the size of new axis:%d.",
old_axis.size(),
new_axis.size()));

ShapeType axis = old_axis;
for (int i = 0; i < new_axis.size(); ++i) {
Expand Down
16 changes: 12 additions & 4 deletions paddle/cinn/frontend/pass/transpose_folding_base.h
Original file line number Diff line number Diff line change
Expand Up @@ -103,10 +103,18 @@ class TransposeFoldingBase : public ProgramPass {
!skip_instrs_.count((*instr)->op_type)) {
return {};
}
CHECK_EQ((*instr)->inputs.size(), 1UL)
<< "The op " << (*instr)->op_type << " should has 1 input.";
CHECK_EQ((*instr)->outputs.size(), 1UL)
<< "The op " << (*instr)->op_type << " should has 1 output.";
PADDLE_ENFORCE_EQ(
(*instr)->inputs.size(),
1UL,
phi::errors::InvalidArgument("The size of *instr's inputs is incorrect."
"Expected size is 1, but receive %d.",
(*instr)->inputs.size()));
PADDLE_ENFORCE_EQ((*instr)->outputs.size(),
1UL,
phi::errors::InvalidArgument(
"The size of *instr's outputs is incorrect."
"Expected size is 1, but receive %d.",
(*instr)->outputs.size()));

VLOG(5) << "Try get matmul's folding instructions begin from ["
<< (*instr)->inputs[0]->id << "]";
Expand Down
8 changes: 6 additions & 2 deletions paddle/cinn/frontend/pass/transpose_folding_input.cc
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,12 @@ class TransposeFoldingInputPass : public TransposeFoldingBase {
const In2InstrType& in2instr,
const std::unordered_set<std::string>& fetch_ids,
absl::flat_hash_set<Instruction*>* remove_instrs) const override {
CHECK_EQ((*dot)->inputs.size(), 2UL)
<< "The matmul should only have two inputs.";
PADDLE_ENFORCE_EQ(
(*dot)->inputs.size(),
2UL,
phi::errors::InvalidArgument("The size of *dot's inputs is incorrect."
"Expected size is 2, but receive %d.",
(*dot)->inputs.size()));

auto debug_info = [](const std::vector<Instruction*>& instrs) {
std::stringstream ss;
Expand Down
Loading

0 comments on commit 82c3749

Please sign in to comment.