PaddlePaddle · SigureMo · Apr 10, 2024 · Apr 8, 2024 · Apr 8, 2024 · Apr 8, 2024
diff --git a/paddle/cinn/hlir/dialect/operator/transforms/pd_to_cinn_pass.cc b/paddle/cinn/hlir/dialect/operator/transforms/pd_to_cinn_pass.cc
@@ -761,8 +761,8 @@ class FullWithTensorOpPattern
 
   bool MatchAndRewrite(paddle::dialect::FullWithTensorOp op,
                        pir::PatternRewriter &rewriter) const override {
-    auto shape = op->operand_source(0);
-    auto value = op->operand_source(1);
+    auto value = op->operand_source(0);
+    auto shape = op->operand_source(1);
 
     if (paddle::dialect::TransToPhiDataType(
             value.type()

diff --git a/paddle/fluid/ir_adaptor/translator/op_translator.cc b/paddle/fluid/ir_adaptor/translator/op_translator.cc
@@ -2023,6 +2023,19 @@ struct FillConstant2FullWithTensorTranscriber : public OpTranscriber {
       const OpInputInfoList& input_infos,
       pir::Block* block) override {
     std::vector<pir::Value> op_inputs;
+    if (op_desc.HasInput("ValueTensor", true) &&
+        op_desc.Input("ValueTensor", true).size() > 0) {
+      auto value_tensor_vars = op_desc.Input("ValueTensor", true);
+      auto defining_info = (*param_map)[value_tensor_vars[0]];
+      op_inputs.push_back(defining_info.value);
+    } else {
+      float value = PADDLE_GET_CONST(float, op_desc.GetAttr("value"));
+      pir::Attribute new_attr = pir::FloatAttribute::get(ctx, value);
+      auto defining_op =
+          InsertFullOperationForAttributeInput(ctx, block, new_attr);
+      op_inputs.push_back(defining_op->result(0));
+    }
+
     if (op_desc.HasInput("ShapeTensor", true) &&
         op_desc.Input("ShapeTensor", true).size() > 0) {
       auto shape_tensor_vars = op_desc.Input("ShapeTensor", true);
@@ -2044,18 +2057,6 @@ struct FillConstant2FullWithTensorTranscriber : public OpTranscriber {
       op_inputs.push_back(defining_op->result(0));
     }
 
-    if (op_desc.HasInput("ValueTensor", true) &&
-        op_desc.Input("ValueTensor", true).size() > 0) {
-      auto value_tensor_vars = op_desc.Input("ValueTensor", true);
-      auto defining_info = (*param_map)[value_tensor_vars[0]];
-      op_inputs.push_back(defining_info.value);
-    } else {
-      float value = PADDLE_GET_CONST(float, op_desc.GetAttr("value"));
-      pir::Attribute new_attr = pir::FloatAttribute::get(ctx, value);
-      auto defining_op =
-          InsertFullOperationForAttributeInput(ctx, block, new_attr);
-      op_inputs.push_back(defining_op->result(0));
-    }
     return op_inputs;
   }
 

diff --git a/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/multiary_infer_sym.cc b/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/multiary_infer_sym.cc
@@ -261,7 +261,7 @@ bool ConcatOpInferSymbolicShape(
 
 bool FullWithTensorOpInferSymbolicShape(
     pir::Operation *op, pir::ShapeConstraintIRAnalysis *shape_analysis) {
-  pir::Value operand_source = op->operand_source(0);
+  pir::Value operand_source = op->operand_source(1);
   const symbol::ShapeOrDataDimExprs &operand_shape_or_data =
       shape_analysis->GetShapeOrDataForValue(operand_source);
 

diff --git a/paddle/fluid/pir/dialect/operator/ir/ops.yaml b/paddle/fluid/pir/dialect/operator/ir/ops.yaml
@@ -766,7 +766,7 @@
     skip_transform : x
 
 - op : full_with_tensor
-  args : (Tensor shape, Tensor value, DataType dtype=DataType::FLOAT32)
+  args : (Tensor value, IntArray shape, DataType dtype=DataType::FLOAT32)
   output: Tensor(out)
   infer_meta :
     func : FullWithTensorInferMeta

diff --git a/paddle/fluid/primitive/backend/manual/manual_static_prim_backend.cc b/paddle/fluid/primitive/backend/manual/manual_static_prim_backend.cc
@@ -43,7 +43,7 @@ Tensor full_with_tensor<LazyTensor>(const Tensor& shape,
       std::static_pointer_cast<LazyTensor>(shape.impl())->value();
   pir::Value value_res = paddle::dialect::full(
       std::vector<int64_t>{}, value.to<float>(), dtype, place);
-  auto op_res = paddle::dialect::full_with_tensor(shape_res, value_res, dtype);
+  auto op_res = paddle::dialect::full_with_tensor(value_res, shape_res, dtype);
   Tensor out(std::make_shared<LazyTensor>(op_res));
   return out;
 }

diff --git a/paddle/fluid/pybind/manual_static_op_function.h b/paddle/fluid/pybind/manual_static_op_function.h
@@ -159,7 +159,7 @@ PyObject *static_api_full(PyObject *self, PyObject *args, PyObject *kwargs) {
       CallStackRecorder callstack_recoder("full_with_tensor");
       callstack_recoder.Record();
       auto static_api_out =
-          paddle::dialect::full_with_tensor(shape, value, dtype);
+          paddle::dialect::full_with_tensor(value, shape, dtype);
       callstack_recoder.AttachToOps();
 
       return ToPyObject(static_api_out);

diff --git a/paddle/phi/api/yaml/legacy_ops.yaml b/paddle/phi/api/yaml/legacy_ops.yaml
@@ -551,7 +551,7 @@
     skip_transform : x
 
 - op : full_with_tensor
-  args : (Tensor shape, Tensor value, DataType dtype=DataType::FLOAT32)
+  args : (Tensor value, IntArray shape, DataType dtype=DataType::FLOAT32)
   output: Tensor(out)
   infer_meta :
     func : FullWithTensorInferMeta

diff --git a/paddle/phi/api/yaml/op_compat.yaml b/paddle/phi/api/yaml/op_compat.yaml
@@ -1279,6 +1279,12 @@
       data_type : float
       support_tensor : true
 
+- op : full_with_tensor
+  int_array:
+    shape :
+      data_type : int64_t
+      support_tensor : true
+
 - op : fused_adam_(fused_adam)
   inputs :
     {params : Params, grads : Grads, learning_rate : LearningRate, moments1 : Moments1,

diff --git a/paddle/phi/infermeta/multiary.cc b/paddle/phi/infermeta/multiary.cc
@@ -4921,10 +4921,10 @@ void MaskedMultiheadAttentionInferMeta(const MetaTensor& x,
   }
 }
 
-void FullWithTensorInferMeta(const MetaTensor& shape,
+void FullWithTensorInferMeta(const IntArray& shape,
                              DataType dtype,
                              MetaTensor* out) {
-  out->set_dims(common::make_ddim(std::vector<int64_t>(shape.numel(), -1)));
+  out->set_dims(common::make_ddim(shape.GetData()));
   out->set_dtype(dtype);
 }
 

diff --git a/paddle/phi/infermeta/multiary.h b/paddle/phi/infermeta/multiary.h
@@ -952,7 +952,7 @@ void MaskedMultiheadAttentionInferMeta(const MetaTensor& x,
                                        MetaTensor* cache_kv_out,
                                        MetaTensor* beam_cache_offset_out);
 
-void FullWithTensorInferMeta(const MetaTensor& shape,
+void FullWithTensorInferMeta(const IntArray& shape,
                              DataType dtype,
                              MetaTensor* out);
 

diff --git a/paddle/phi/kernels/cpu/full_kernel.cc b/paddle/phi/kernels/cpu/full_kernel.cc
@@ -18,7 +18,7 @@ limitations under the License. */
 #include "paddle/phi/core/kernel_registry.h"
 #include "paddle/phi/kernels/funcs/eigen/common.h"
 #include "paddle/phi/kernels/funcs/eigen/eigen_function.h"
-#include "paddle/phi/kernels/impl/full_whit_tensor_kernel_impl.h"
+#include "paddle/phi/kernels/impl/full_with_tensor_kernel_impl.h"
 
 namespace phi {
 

diff --git a/paddle/phi/kernels/full_kernel.h b/paddle/phi/kernels/full_kernel.h
@@ -33,8 +33,8 @@ void FullKernel(const Context& dev_ctx,
 
 template <typename T, typename Context>
 void FullWithTensorKernel(const Context& dev_ctx,
-                          const DenseTensor& shape,
                           const DenseTensor& value,
+                          const IntArray& shape,
                           DataType dtype,
                           DenseTensor* out);
 

diff --git a/paddle/phi/kernels/gpu/full_kernel.cu b/paddle/phi/kernels/gpu/full_kernel.cu
@@ -17,7 +17,7 @@ limitations under the License. */
 #include "paddle/phi/backends/gpu/gpu_context.h"
 #include "paddle/phi/core/kernel_registry.h"
 #include "paddle/phi/kernels/funcs/elementwise_base.h"
-#include "paddle/phi/kernels/impl/full_whit_tensor_kernel_impl.h"
+#include "paddle/phi/kernels/impl/full_with_tensor_kernel_impl.h"
 
 namespace phi {
 

diff --git a/...rnels/impl/full_whit_tensor_kernel_impl.h → ...rnels/impl/full_with_tensor_kernel_impl.h b/...rnels/impl/full_whit_tensor_kernel_impl.h → ...rnels/impl/full_with_tensor_kernel_impl.h
@@ -20,12 +20,11 @@ namespace phi {
 
 template <typename T, typename Context>
 void FullWithTensorKernel(const Context& dev_ctx,
-                          const DenseTensor& shape,
                           const DenseTensor& value,
+                          const IntArray& shape,
                           DataType dtype,
                           DenseTensor* out) {
-  auto shape_tmp = IntArray(shape);
-  out->Resize(common::make_ddim(shape_tmp.GetData()));
-  FullKernel<T, Context>(dev_ctx, shape_tmp, Scalar(value), dtype, out);
+  out->Resize(common::make_ddim(shape.GetData()));
+  FullKernel<T, Context>(dev_ctx, shape, Scalar(value), dtype, out);
 }
 }  // namespace phi
diff --git a/paddle/phi/kernels/selected_rows/full_kernel.cc b/paddle/phi/kernels/selected_rows/full_kernel.cc
@@ -37,12 +37,12 @@ void FullKernel(const Context& dev_ctx,
 
 template <typename T, typename Context>
 void FullWithTensorKernel(const Context& dev_ctx,
-                          const DenseTensor& shape,
                           const DenseTensor& value,
+                          const IntArray& shape,
                           DataType dtype,
                           SelectedRows* out) {
   phi::FullWithTensorKernel<T>(
-      dev_ctx, shape, value, dtype, out->mutable_value());
+      dev_ctx, value, shape, dtype, out->mutable_value());
 }
 
 }  // namespace sr

diff --git a/paddle/phi/kernels/selected_rows/full_kernel.h b/paddle/phi/kernels/selected_rows/full_kernel.h
@@ -30,8 +30,8 @@ void FullKernel(const Context& dev_ctx,
 
 template <typename T, typename Context>
 void FullWithTensorKernel(const Context& dev_ctx,
-                          const DenseTensor& shape,
                           const DenseTensor& value,
+                          const IntArray& shape,
                           DataType dtype,
                           SelectedRows* out);
 }  // namespace sr

diff --git a/paddle/phi/kernels/xpu/full_kernel.cc b/paddle/phi/kernels/xpu/full_kernel.cc
@@ -23,7 +23,7 @@
 #include "paddle/phi/common/scalar.h"
 #include "paddle/phi/core/kernel_registry.h"
 #include "paddle/phi/core/visit_type.h"
-#include "paddle/phi/kernels/impl/full_whit_tensor_kernel_impl.h"
+#include "paddle/phi/kernels/impl/full_with_tensor_kernel_impl.h"
 
 namespace phi {
 

diff --git a/test/ir/pir/cinn/symbolic/simple_llama.config b/test/ir/pir/cinn/symbolic/simple_llama.config
@@ -26,24 +26,24 @@
     (%24) = "pd_op.slice" (%18, %22, %23) {axes:[(Int64)0],decrease_axis:[(Int64)0],infer_flags:[(Int64)1],is_persistable:[false],stop_gradient:[false]} : (builtin.tensor<2xi32>, builtin.tensor<1xi64>, builtin.tensor<1xi64>) -> builtin.tensor<i32>
     (%25) = "pd_op.cast" (%24) {dtype:(pd_op.DataType)int64,is_persistable:[false],stop_gradient:[false]} : (builtin.tensor<i32>) -> builtin.tensor<i64>
     (%26) = "pd_op.full_int_array" () {dtype:(pd_op.DataType)int64,place:(pd_op.Place)Place(cpu),stop_gradient:[true],value:[(Int64)1]} : () -> builtin.tensor<1xi64>
-    (%27) = "pd_op.full_with_tensor" (%26, %25) {dtype:(pd_op.DataType)int64,is_persistable:[false],stop_gradient:[false]} : (builtin.tensor<1xi64>, builtin.tensor<i64>) -> builtin.tensor<1xi64>
+    (%27) = "pd_op.full_with_tensor" (%25, %26) {dtype:(pd_op.DataType)int64,is_persistable:[false],stop_gradient:[false]} : (builtin.tensor<i64>, builtin.tensor<1xi64>) -> builtin.tensor<1xi64>
     (%28) = "pd_op.shape" (%17) {is_persistable:[false],stop_gradient:[false]} : (builtin.tensor<-1x-1xi64>) -> builtin.tensor<2xi32>
     (%29) = "pd_op.full_int_array" () {dtype:(pd_op.DataType)int64,place:(pd_op.Place)Place(cpu),stop_gradient:[true],value:[(Int64)1]} : () -> builtin.tensor<1xi64>
     (%30) = "pd_op.full_int_array" () {dtype:(pd_op.DataType)int64,place:(pd_op.Place)Place(cpu),stop_gradient:[true],value:[(Int64)2]} : () -> builtin.tensor<1xi64>
     (%31) = "pd_op.slice" (%28, %29, %30) {axes:[(Int64)0],decrease_axis:[(Int64)0],infer_flags:[(Int64)1],is_persistable:[false],stop_gradient:[false]} : (builtin.tensor<2xi32>, builtin.tensor<1xi64>, builtin.tensor<1xi64>) -> builtin.tensor<i32>
     (%32) = "pd_op.cast" (%31) {dtype:(pd_op.DataType)int64,is_persistable:[false],stop_gradient:[false]} : (builtin.tensor<i32>) -> builtin.tensor<i64>
     (%33) = "pd_op.full_int_array" () {dtype:(pd_op.DataType)int64,place:(pd_op.Place)Place(cpu),stop_gradient:[true],value:[(Int64)1]} : () -> builtin.tensor<1xi64>
-    (%34) = "pd_op.full_with_tensor" (%33, %32) {dtype:(pd_op.DataType)int64,is_persistable:[false],stop_gradient:[false]} : (builtin.tensor<1xi64>, builtin.tensor<i64>) -> builtin.tensor<1xi64>
+    (%34) = "pd_op.full_with_tensor" (%32, %33) {dtype:(pd_op.DataType)int64,is_persistable:[false],stop_gradient:[false]} : (builtin.tensor<i64>, builtin.tensor<1xi64>) -> builtin.tensor<1xi64>
     (%35) = "pd_op.full" () {dtype:(pd_op.DataType)int32,is_persistable:[false],place:(pd_op.Place)Place(cpu),shape:(pd_op.IntArray)[],stop_gradient:[false],value:(Float)1} : () -> builtin.tensor<i32>
     (%36) = "builtin.combine" (%21, %35) {} : (builtin.tensor<i32>, builtin.tensor<i32>) -> vec[builtin.tensor<i32>,builtin.tensor<i32>]
     (%37) = "pd_op.stack" (%36) {axis:(Int32)0,stop_gradient:[true]} : (vec[builtin.tensor<i32>,builtin.tensor<i32>]) -> builtin.tensor<2xi32>
     (%38) = "pd_op.full" () {dtype:(pd_op.DataType)float32,place:(pd_op.Place)Place(cpu),shape:(pd_op.IntArray)[1],stop_gradient:[true],value:(Float)1} : () -> builtin.tensor<1xf32>
-    (%39) = "pd_op.full_with_tensor" (%37, %38) {dtype:(pd_op.DataType)bool,is_persistable:[false],stop_gradient:[false]} : (builtin.tensor<2xi32>, builtin.tensor<1xf32>) -> builtin.tensor<-1x1xb>
+    (%39) = "pd_op.full_with_tensor" (%38, %37) {dtype:(pd_op.DataType)bool,is_persistable:[false],stop_gradient:[false]} : (builtin.tensor<1xf32>, builtin.tensor<2xi32>) -> builtin.tensor<-1x1xb>
     (%40) = "pd_op.full" () {dtype:(pd_op.DataType)int32,is_persistable:[false],place:(pd_op.Place)Place(cpu),shape:(pd_op.IntArray)[],stop_gradient:[false],value:(Float)1} : () -> builtin.tensor<i32>
     (%41) = "builtin.combine" (%21, %40) {} : (builtin.tensor<i32>, builtin.tensor<i32>) -> vec[builtin.tensor<i32>,builtin.tensor<i32>]
     (%42) = "pd_op.stack" (%41) {axis:(Int32)0,stop_gradient:[true]} : (vec[builtin.tensor<i32>,builtin.tensor<i32>]) -> builtin.tensor<2xi32>
     (%43) = "pd_op.full" () {dtype:(pd_op.DataType)float32,place:(pd_op.Place)Place(cpu),shape:(pd_op.IntArray)[1],stop_gradient:[true],value:(Float)0} : () -> builtin.tensor<1xf32>
-    (%44) = "pd_op.full_with_tensor" (%42, %43) {dtype:(pd_op.DataType)float16,is_persistable:[false],stop_gradient:[false]} : (builtin.tensor<2xi32>, builtin.tensor<1xf32>) -> builtin.tensor<-1x1xf16>
+    (%44) = "pd_op.full_with_tensor" (%43, %42) {dtype:(pd_op.DataType)float16,is_persistable:[false],stop_gradient:[false]} : (builtin.tensor<1xf32>, builtin.tensor<2xi32>) -> builtin.tensor<-1x1xf16>
     (%45) = "pd_op.shape" (%17) {is_persistable:[false],stop_gradient:[false]} : (builtin.tensor<-1x-1xi64>) -> builtin.tensor<2xi32>
     (%46) = "pd_op.full_int_array" () {dtype:(pd_op.DataType)int64,place:(pd_op.Place)Place(cpu),stop_gradient:[true],value:[(Int64)1]} : () -> builtin.tensor<1xi64>
     (%47) = "pd_op.full_int_array" () {dtype:(pd_op.DataType)int64,place:(pd_op.Place)Place(cpu),stop_gradient:[true],value:[(Int64)2]} : () -> builtin.tensor<1xi64>
@@ -222,7 +222,7 @@
     (%232) = "pd_op.full" () {dtype:(pd_op.DataType)int32,is_persistable:[false],place:(pd_op.Place)Place(cpu),shape:(pd_op.IntArray)[],stop_gradient:[false],value:(Float)1} : () -> builtin.tensor<i32>
     (%233) = "builtin.combine" (%230, %232) {} : (builtin.tensor<i32>, builtin.tensor<i32>) -> vec[builtin.tensor<i32>,builtin.tensor<i32>]
     (%234) = "pd_op.stack" (%233) {axis:(Int32)0,stop_gradient:[true]} : (vec[builtin.tensor<i32>,builtin.tensor<i32>]) -> builtin.tensor<2xi32>
-    (%235) = "pd_op.full_with_tensor" (%234, %231) {dtype:(pd_op.DataType)float16,is_persistable:[false],stop_gradient:[false]} : (builtin.tensor<2xi32>, builtin.tensor<1xf16>) -> builtin.tensor<-1x1xf16>
+    (%235) = "pd_op.full_with_tensor" (%231, %234) {dtype:(pd_op.DataType)float16,is_persistable:[false],stop_gradient:[false]} : (builtin.tensor<1xf16>, builtin.tensor<2xi32>) -> builtin.tensor<-1x1xf16>
     (%236, %237) = "pd_op.top_p_sampling" (%225, %235, <<NULL VALUE>>) {is_persistable:[false,false],seed:(Int32)-1,stop_gradient:[false,false]} : (builtin.tensor<-1x32000xf16>, builtin.tensor<-1x1xf16>, <<NULL TYPE>>) -> builtin.tensor<-1x1xf16>, builtin.tensor<-1x1xi64>
     (%238) = "pd_op.index_sample" (%226, %237) {is_persistable:[false],stop_gradient:[false]} : (builtin.tensor<-1x32000xf16>, builtin.tensor<-1x1xi64>) -> builtin.tensor<-1x1xf16>
     (%239) = "pd_op.subtract" (%27, %34) {is_persistable:[false],stop_gradient:[false]} : (builtin.tensor<1xi64>, builtin.tensor<1xi64>) -> builtin.tensor<1xi64>

diff --git a/test/legacy_test/test_zeros_op.py b/test/legacy_test/test_zeros_op.py
@@ -80,5 +80,13 @@ def test_shape_errors(self):
                 assert error_msg.find("expected to be no less than 0") > 0
 
 
+class ApiZerosWithDynamicShape(unittest.TestCase):
+    def test_dynamic_shape(self):
+        with paddle.pir_utils.IrGuard():
+            x = paddle.static.data("x", shape=[], dtype='int32')
+            out = paddle.zeros(shape=[101, x])
+            self.assertEqual(out.shape, [101, -1])
+
+
 if __name__ == '__main__':
     unittest.main()