diff --git a/paddle/fluid/framework/operator.h b/paddle/fluid/framework/operator.h index 7f47ef640c19c..31c6bb4ef8806 100644 --- a/paddle/fluid/framework/operator.h +++ b/paddle/fluid/framework/operator.h @@ -103,7 +103,7 @@ constexpr char kEnableCacheRuntimeContext[] = "@ENABLE_CACHE_RUNTIME_CONTEXT@"; /// TODO(luotao): Note that this temporal attribute would be deleted after all /// ops contain it. constexpr char kAllKernelsMustComputeRuntimeShape[] = - "@ALL_KERNELS_MUST_COMPUTE_RUNTIME_SHAPE@"; + "ALL_KERNELS_MUST_COMPUTE_RUNTIME_SHAPE"; // define some kernel priority /* Define multiple kernel type fallback order*/ diff --git a/paddle/fluid/operators/compat/fc.pbtxt b/paddle/fluid/operators/compat/fc.pbtxt index b7b9fe7acda73..babd80260d771 100644 --- a/paddle/fluid/operators/compat/fc.pbtxt +++ b/paddle/fluid/operators/compat/fc.pbtxt @@ -27,7 +27,7 @@ extra { type: BOOLEAN } attrs { - name: "@ALL_KERNELS_MUST_COMPUTE_RUNTIME_SHAPE@" + name: "ALL_KERNELS_MUST_COMPUTE_RUNTIME_SHAPE" type: BOOLEAN } attrs { diff --git a/paddle/fluid/pir/transforms/fusion/fc_elementwise_layernorm_fuse_pass.cc b/paddle/fluid/pir/transforms/fusion/fc_elementwise_layernorm_fuse_pass.cc index c3bef294a8db9..fdb4621fb350b 100644 --- a/paddle/fluid/pir/transforms/fusion/fc_elementwise_layernorm_fuse_pass.cc +++ b/paddle/fluid/pir/transforms/fusion/fc_elementwise_layernorm_fuse_pass.cc @@ -31,14 +31,7 @@ class FcElementwiseLayerNormFusePattern { {"in_num_col_dims", pat.Attr("in_num_col_dims")}, {"activation_type", pat.Attr("activation_type")}, - {"use_mkldnn", pat.Attr("use_mkldnn")}, {"padding_weights", pat.Attr("padding_weights")}, - {"use_quantizer", pat.Attr("use_quantizer")}, - {"mkldnn_data_type", pat.Attr("mkldnn_data_type")}, - {"scale_in", pat.Attr("scale_in")}, - {"scale_weights", pat.Attr("scale_weights")}, - {"scale_out", pat.Attr("scale_out")}, - {"force_fp32_output", pat.Attr("force_fp32_output")}, }); const auto &add = pat.Op(paddle::dialect::AddOp::name()); const auto &layernorm = @@ -104,14 +97,7 @@ class FcElementwiseLayerNormFuse2Pattern { {"in_num_col_dims", pat.Attr("in_num_col_dims")}, {"activation_type", pat.Attr("activation_type")}, - {"use_mkldnn", pat.Attr("use_mkldnn")}, {"padding_weights", pat.Attr("padding_weights")}, - {"use_quantizer", pat.Attr("use_quantizer")}, - {"mkldnn_data_type", pat.Attr("mkldnn_data_type")}, - {"scale_in", pat.Attr("scale_in")}, - {"scale_weights", pat.Attr("scale_weights")}, - {"scale_out", pat.Attr("scale_out")}, - {"force_fp32_output", pat.Attr("force_fp32_output")}, }); const auto &add = pat.Op(paddle::dialect::AddOp::name()); const auto &layernorm = diff --git a/paddle/fluid/pir/transforms/fusion/fc_fuse_pass.cc b/paddle/fluid/pir/transforms/fusion/fc_fuse_pass.cc index 269ffd8633da8..2a320b75d6cc3 100644 --- a/paddle/fluid/pir/transforms/fusion/fc_fuse_pass.cc +++ b/paddle/fluid/pir/transforms/fusion/fc_fuse_pass.cc @@ -65,32 +65,15 @@ class MatmulAddPattern : public pir::drr::DrrPatternBase { const auto &false_attr = res.Attr( [](const pir::drr::MatchContext &match_ctx) -> bool { return false; }); - const auto &fc = res.Op( - paddle::dialect::FcOp::name(), - {{ - {"in_num_col_dims", in_num_col_dims_attr}, - {"activation_type", - res.Attr([](const pir::drr::MatchContext &match_ctx) - -> std::string { return ""; })}, - {"use_mkldnn", false_attr}, - {"padding_weights", false_attr}, - {"use_quantizer", false_attr}, - {"mkldnn_data_type", - res.Attr([](const pir::drr::MatchContext &match_ctx) - -> std::string { return "float32"; })}, - {"scale_in", - res.Attr([](const pir::drr::MatchContext &match_ctx) -> float { - return 1.0f; - })}, - {"scale_weights", - res.Attr([](const pir::drr::MatchContext &match_ctx) - -> std::vector { return {1.0f}; })}, - {"scale_out", - res.Attr([](const pir::drr::MatchContext &match_ctx) -> float { - return 1.0f; - })}, - {"force_fp32_output", false_attr}, - }}); + const auto &fc = + res.Op(paddle::dialect::FcOp::name(), + {{ + {"in_num_col_dims", in_num_col_dims_attr}, + {"activation_type", + res.Attr([](const pir::drr::MatchContext &match_ctx) + -> std::string { return ""; })}, + {"padding_weights", false_attr}, + }}); fc({&res.Tensor("x"), &res.Tensor("w"), &res.Tensor("y")}, {&res.Tensor("add_out")}); } @@ -105,14 +88,7 @@ class FcWithReluPattern : public pir::drr::DrrPatternBase { {{ {"in_num_col_dims", pat.Attr("in_num_col_dims")}, {"activation_type", pat.Attr("activation_type")}, - {"use_mkldnn", pat.Attr("use_mkldnn")}, {"padding_weights", pat.Attr("padding_weights")}, - {"use_quantizer", pat.Attr("use_quantizer")}, - {"mkldnn_data_type", pat.Attr("mkldnn_data_type")}, - {"scale_in", pat.Attr("scale_in")}, - {"scale_weights", pat.Attr("scale_weights")}, - {"scale_out", pat.Attr("scale_out")}, - {"force_fp32_output", pat.Attr("force_fp32_output")}, }}); fc({&pat.Tensor("x"), &pat.Tensor("w"), &pat.Tensor("y")}, {&pat.Tensor("fc_out")}); @@ -133,14 +109,7 @@ class FcWithReluPattern : public pir::drr::DrrPatternBase { {"activation_type", res.Attr([](const pir::drr::MatchContext &match_ctx) -> std::string { return "relu"; })}, - {"use_mkldnn", pat.Attr("use_mkldnn")}, {"padding_weights", pat.Attr("padding_weights")}, - {"use_quantizer", pat.Attr("use_quantizer")}, - {"mkldnn_data_type", pat.Attr("mkldnn_data_type")}, - {"scale_in", pat.Attr("scale_in")}, - {"scale_weights", pat.Attr("scale_weights")}, - {"scale_out", pat.Attr("scale_out")}, - {"force_fp32_output", pat.Attr("force_fp32_output")}, }}); fc_with_relu({&res.Tensor("x"), &res.Tensor("w"), &res.Tensor("y")}, {&res.Tensor("relu_out")}); diff --git a/paddle/fluid/pir/transforms/fusion/fc_with_special_op_fuse_pass.cc b/paddle/fluid/pir/transforms/fusion/fc_with_special_op_fuse_pass.cc index 59994c5e5d924..6bb2b3a6d512d 100644 --- a/paddle/fluid/pir/transforms/fusion/fc_with_special_op_fuse_pass.cc +++ b/paddle/fluid/pir/transforms/fusion/fc_with_special_op_fuse_pass.cc @@ -94,32 +94,15 @@ class SqueezeFcFusePattern const auto &false_attr = res.Attr( [](const pir::drr::MatchContext &match_ctx) -> bool { return false; }); - const auto &fc = res.Op( - paddle::dialect::FcOp::name(), - {{ - {"in_num_col_dims", in_num_col_dims_attr}, - {"activation_type", - res.Attr([](const pir::drr::MatchContext &match_ctx) - -> std::string { return ""; })}, - {"use_mkldnn", false_attr}, - {"padding_weights", false_attr}, - {"use_quantizer", false_attr}, - {"mkldnn_data_type", - res.Attr([](const pir::drr::MatchContext &match_ctx) - -> std::string { return "float32"; })}, - {"scale_in", - res.Attr([](const pir::drr::MatchContext &match_ctx) -> float { - return 1.0f; - })}, - {"scale_weights", - res.Attr([](const pir::drr::MatchContext &match_ctx) - -> std::vector { return {1.0f}; })}, - {"scale_out", - res.Attr([](const pir::drr::MatchContext &match_ctx) -> float { - return 1.0f; - })}, - {"force_fp32_output", false_attr}, - }}); + const auto &fc = + res.Op(paddle::dialect::FcOp::name(), + {{ + {"in_num_col_dims", in_num_col_dims_attr}, + {"activation_type", + res.Attr([](const pir::drr::MatchContext &match_ctx) + -> std::string { return ""; })}, + {"padding_weights", false_attr}, + }}); fc({&res.Tensor("x"), &res.Tensor("w"), &res.Tensor("bias")}, {&res.Tensor("add_out")}); } @@ -248,32 +231,15 @@ class ReshapeFcFusePattern const auto &false_attr = res.Attr( [](const pir::drr::MatchContext &match_ctx) -> bool { return false; }); - const auto &fc = res.Op( - paddle::dialect::FcOp::name(), - {{ - {"in_num_col_dims", in_num_col_dims_attr}, - {"activation_type", - res.Attr([](const pir::drr::MatchContext &match_ctx) - -> std::string { return ""; })}, - {"use_mkldnn", false_attr}, - {"padding_weights", false_attr}, - {"use_quantizer", false_attr}, - {"mkldnn_data_type", - res.Attr([](const pir::drr::MatchContext &match_ctx) - -> std::string { return "float32"; })}, - {"scale_in", - res.Attr([](const pir::drr::MatchContext &match_ctx) -> float { - return 1.0f; - })}, - {"scale_weights", - res.Attr([](const pir::drr::MatchContext &match_ctx) - -> std::vector { return {1.0f}; })}, - {"scale_out", - res.Attr([](const pir::drr::MatchContext &match_ctx) -> float { - return 1.0f; - })}, - {"force_fp32_output", false_attr}, - }}); + const auto &fc = + res.Op(paddle::dialect::FcOp::name(), + {{ + {"in_num_col_dims", in_num_col_dims_attr}, + {"activation_type", + res.Attr([](const pir::drr::MatchContext &match_ctx) + -> std::string { return ""; })}, + {"padding_weights", false_attr}, + }}); fc({&res.Tensor("x"), &res.Tensor("w"), &res.Tensor("bias")}, {&res.Tensor("add_out")}); } @@ -336,32 +302,15 @@ class FlattenFcFusePattern const auto &false_attr = res.Attr( [](const pir::drr::MatchContext &match_ctx) -> bool { return false; }); - const auto &fc = res.Op( - paddle::dialect::FcOp::name(), - {{ - {"in_num_col_dims", in_num_col_dims_attr}, - {"activation_type", - res.Attr([](const pir::drr::MatchContext &match_ctx) - -> std::string { return ""; })}, - {"use_mkldnn", false_attr}, - {"padding_weights", false_attr}, - {"use_quantizer", false_attr}, - {"mkldnn_data_type", - res.Attr([](const pir::drr::MatchContext &match_ctx) - -> std::string { return "float32"; })}, - {"scale_in", - res.Attr([](const pir::drr::MatchContext &match_ctx) -> float { - return 1.0f; - })}, - {"scale_weights", - res.Attr([](const pir::drr::MatchContext &match_ctx) - -> std::vector { return {1.0f}; })}, - {"scale_out", - res.Attr([](const pir::drr::MatchContext &match_ctx) -> float { - return 1.0f; - })}, - {"force_fp32_output", false_attr}, - }}); + const auto &fc = + res.Op(paddle::dialect::FcOp::name(), + {{ + {"in_num_col_dims", in_num_col_dims_attr}, + {"activation_type", + res.Attr([](const pir::drr::MatchContext &match_ctx) + -> std::string { return ""; })}, + {"padding_weights", false_attr}, + }}); fc({&res.Tensor("x"), &res.Tensor("w"), &res.Tensor("bias")}, {&res.Tensor("add_out")}); } diff --git a/paddle/phi/api/yaml/fused_ops.yaml b/paddle/phi/api/yaml/fused_ops.yaml index 366e3564aff3e..a31dee6a4c27d 100644 --- a/paddle/phi/api/yaml/fused_ops.yaml +++ b/paddle/phi/api/yaml/fused_ops.yaml @@ -122,7 +122,7 @@ data_type : x - op : fc - args : (Tensor input, Tensor w, Tensor bias, int in_num_col_dims = 1, str activation_type = "", bool use_mkldnn = false, bool padding_weights = false, bool use_quantizer = false, str mkldnn_data_type = "float32", float scale_in = 1.0f, float[] scale_weights = {1.0f}, float scale_out = 1.0f, bool force_fp32_output = false) + args : (Tensor input, Tensor w, Tensor bias, int in_num_col_dims = 1, str activation_type = "", bool padding_weights = false) output : Tensor(out) infer_meta : func : FCInferMeta diff --git a/paddle/phi/api/yaml/op_compat.yaml b/paddle/phi/api/yaml/op_compat.yaml index e64e837b24d49..3ef6e4c7d5921 100755 --- a/paddle/phi/api/yaml/op_compat.yaml +++ b/paddle/phi/api/yaml/op_compat.yaml @@ -1043,12 +1043,8 @@ bias : Bias outputs : out : Out - attrs : - scale_in : Scale_in - scale_weights : Scale_weights - scale_out : Scale_out extra : - [bool @ALL_KERNELS_MUST_COMPUTE_RUNTIME_SHAPE@ = true] + attrs : [bool ALL_KERNELS_MUST_COMPUTE_RUNTIME_SHAPE = true, bool use_mkldnn = false, bool use_quantizer = false, str mkldnn_data_type = "float32", float Scale_in = 1.0f, "float[] Scale_weights = {1.0f}", float Scale_out = 1.0f, bool force_fp32_output = false] - op : feed outputs: {out: Out} diff --git a/paddle/phi/infermeta/fusion.cc b/paddle/phi/infermeta/fusion.cc index 7847a5bbb7805..f38ffe0f1fc9d 100644 --- a/paddle/phi/infermeta/fusion.cc +++ b/paddle/phi/infermeta/fusion.cc @@ -3425,14 +3425,7 @@ void FCInferMeta(const MetaTensor& input, const MetaTensor& bias, const int in_num_col_dims, const std::string& activation_type, - const bool use_mkldnn, const bool padding_weights, - const bool use_quantizer, - const std::string& mkldnn_data_type, - const float scale_in, - const std::vector& sclae_weights, - const float scale_out, - const bool force_fp32_output, MetaTensor* out) { PADDLE_ENFORCE_GE( in_num_col_dims, @@ -3441,15 +3434,7 @@ void FCInferMeta(const MetaTensor& input, "The in_num_col_dims is expected to equal or greater than 1. " "But received the in_num_col_dims is %d. ", in_num_col_dims)); - std::string mkldnn_data_type_list[] = {"float32", "int8", "bfloat16"}; - PADDLE_ENFORCE_EQ( - std::find(std::begin(mkldnn_data_type_list), - std::end(mkldnn_data_type_list), - mkldnn_data_type) != std::end(mkldnn_data_type_list), - true, - phi::errors::InvalidArgument("The mkldnn_data_type shoule be [float32, " - "int8, bfloat16], but found %s.", - mkldnn_data_type.c_str())); + auto w_dims = w.dims(); PADDLE_ENFORCE_EQ( w_dims.size(), @@ -3522,18 +3507,6 @@ void FCInferMeta(const MetaTensor& input, activation_type.c_str())); } - if (use_mkldnn) { - PADDLE_ENFORCE_EQ( - in_dims.size() >= 2 && in_dims.size() <= 4, - true, - phi::errors::Unimplemented( - "The Input of fc is expected to be a 2-D, 3-D or 4-D tensor when " - "use_mkldnn is set. But received the number of Input's " - "dimensions is %d, Input's shape is %s.", - in_dims.size(), - in_dims)); - } - std::vector output_dims; phi::funcs::FCOutputSize( in_dims, w_dims, output_dims, in_num_col_dims, padding_weights); diff --git a/paddle/phi/infermeta/fusion.h b/paddle/phi/infermeta/fusion.h index 002cc96eab4fe..ade4e38d457a6 100644 --- a/paddle/phi/infermeta/fusion.h +++ b/paddle/phi/infermeta/fusion.h @@ -807,14 +807,7 @@ void FCInferMeta(const MetaTensor& input, const MetaTensor& bias, const int in_num_col_dims, const std::string& activation_type, - const bool use_mkldnn, const bool padding_weights, - const bool use_quantizer, - const std::string& mkldnn_data_type, - const float scale_in, - const std::vector& sclae_weights, - const float scale_out, - const bool force_fp32_output, MetaTensor* out); void VariableLengthMemoryEfficientAttentionInferMeta( diff --git a/paddle/phi/kernels/fusion/onednn/fc_kernel.cc b/paddle/phi/kernels/fusion/onednn/fc_kernel.cc index 6eed95b9b1c9a..0d39677276ead 100644 --- a/paddle/phi/kernels/fusion/onednn/fc_kernel.cc +++ b/paddle/phi/kernels/fusion/onednn/fc_kernel.cc @@ -567,17 +567,61 @@ void FCKernel(const Context& dev_ctx, const paddle::optional& bias, const int in_num_col_dims, const std::string& activation_type, - const bool use_mkldnn, const bool padding_weights, - const bool use_quantizer, - const std::string& mkldnn_data_type, - const float scale_in, - const std::vector& scale_weights, - const float scale_out, - const bool force_fp32_output, DenseTensor* out) { + const bool use_mkldnn = + dev_ctx.HasDnnAttr("use_mkldnn") + ? PADDLE_GET_CONST(bool, dev_ctx.GetDnnAttr("use_mkldnn")) + : false; + const bool use_quantizer = + dev_ctx.HasDnnAttr("use_quantizer") + ? PADDLE_GET_CONST(bool, dev_ctx.GetDnnAttr("use_quantizer")) + : false; + const std::string mkldnn_data_type = + dev_ctx.HasDnnAttr("mkldnn_data_type") + ? PADDLE_GET_CONST(std::string, + dev_ctx.GetDnnAttr("mkldnn_data_type")) + : "float32"; + const float scale_in = + dev_ctx.HasDnnAttr("Scale_in") + ? PADDLE_GET_CONST(float, dev_ctx.GetDnnAttr("Scale_in")) + : 1.0f; + std::vector tmp_scale_weights = {1.0f}; + const std::vector scale_weights = + dev_ctx.HasDnnAttr("Scale_weights") + ? PADDLE_GET_CONST(std::vector, + dev_ctx.GetDnnAttr("Scale_weights")) + : tmp_scale_weights; + const float scale_out = + dev_ctx.HasDnnAttr("Scale_out") + ? PADDLE_GET_CONST(float, dev_ctx.GetDnnAttr("Scale_out")) + : 1.0f; + const bool force_fp32_output = + dev_ctx.HasDnnAttr("force_fp32_output") + ? PADDLE_GET_CONST(bool, dev_ctx.GetDnnAttr("force_fp32_output")) + : false; + std::string mkldnn_data_type_list[] = {"float32", "int8", "bfloat16"}; + PADDLE_ENFORCE_EQ( + std::find(std::begin(mkldnn_data_type_list), + std::end(mkldnn_data_type_list), + mkldnn_data_type) != std::end(mkldnn_data_type_list), + true, + phi::errors::InvalidArgument("The mkldnn_data_type shoule be [float32, " + "int8, bfloat16], but found %s.", + mkldnn_data_type.c_str())); + auto in_dims = input.dims(); + if (use_mkldnn) { + PADDLE_ENFORCE_EQ( + in_dims.size() >= 2 && in_dims.size() <= 4, + true, + phi::errors::Unimplemented( + "The Input of fc is expected to be a 2-D, 3-D or 4-D tensor when " + "use_mkldnn is set. But received the number of Input's " + "dimensions is %d, Input's shape is %s.", + in_dims.size(), + in_dims)); + } bool fuse_relu = activation_type == "relu"; - IF_CHANGE_FC_TW_TYPENAME((std::is_same::value), ([&] { if (force_fp32_output) { // NOLINT RunKernel(dev_ctx, diff --git a/paddle/phi/kernels/impl/fc_kernel_impl.h b/paddle/phi/kernels/impl/fc_kernel_impl.h index c30da9d4e5000..3709a15880b4c 100644 --- a/paddle/phi/kernels/impl/fc_kernel_impl.h +++ b/paddle/phi/kernels/impl/fc_kernel_impl.h @@ -30,14 +30,7 @@ void FCKernel(const Context& dev_ctx, const paddle::optional& bias, const int in_num_col_dims, const std::string& activation_type, - const bool use_mkldnn, const bool padding_weights, - const bool use_quantizer, - const std::string& mkldnn_data_type, - const float scale_in, - const std::vector& scale_weights, - const float scale_out, - const bool force_fp32_output, DenseTensor* out) { bool with_relu = (activation_type == "relu") ? true : false;