[Paddle-TRT] support set_value dynamic shape (#60508)

zhink · web-flow · commit 7a363e76f0df · 2024-01-09T10:43:51.000+08:00
diff --git a/paddle/fluid/inference/tensorrt/convert/op_converter.h b/paddle/fluid/inference/tensorrt/convert/op_converter.h
@@ -651,6 +651,23 @@ class OpConverter {
             ->getOutput(0);
     return tensor;
   }
+
+  // Create an constant layer with shape_tensor and value
+  template <typename T>
+  nvinfer1::ITensor* FillConstantLayer(nvinfer1::ITensor* shape_tensor,
+                                       int tensor_rank,
+                                       T value) {
+    auto fill_layer = TRT_ENGINE_ADD_LAYER(
+        engine_, Fill, nvinfer1::Dims{}, nvinfer1::FillOperation::kLINSPACE);
+    fill_layer->setInput(0, *shape_tensor);
+    std::vector<T> beta_vec(tensor_rank);
+    std::vector<T> value_vec(1, value);
+    fill_layer->setInput(1, *Add1DConstantLayer(value_vec, "value_vec", true));
+    fill_layer->setInput(2, *Add1DConstantLayer(beta_vec, "beta_vec", false));
+    auto tensor = fill_layer->getOutput(0);
+    return tensor;
+  }
+
   template <typename T>
   // Create and add Multi-D constant float/int32 layer
   nvinfer1::ITensor* AddConstantLayer(const T* data,
diff --git a/paddle/fluid/inference/tensorrt/convert/set_value_op.cc b/paddle/fluid/inference/tensorrt/convert/set_value_op.cc
@@ -20,7 +20,15 @@ limitations under the License. */
     if (op_desc.HasAttr(#attr_name__)) {                                   \
       vec_##attr_name__ = PADDLE_GET_CONST(std::vector<int64_t>,           \
                                            op_desc.GetAttr(#attr_name__)); \
-      if (!vec_##attr_name__.empty()) attr_name__ = vec_##attr_name__[0];  \
+      if (vec_##attr_name__.size() > 0) {                                  \
+        attr_name__ = vec_##attr_name__[0];                                \
+        PADDLE_ENFORCE_EQ(vec_##attr_name__.size(),                        \
+                          1UL,                                             \
+                          platform::errors::InvalidArgument(               \
+                              "attr axes/starst/ends/steps 's size in "    \
+                              "set_value must be one, but got %d",         \
+                              vec_##attr_name__.size()));                  \
+      }                                                                    \
     }                                                                      \
   } while (0)
 
@@ -42,85 +50,200 @@ class SetValueConverter : public OpConverter {
                   bool test_mode) override {
     VLOG(3) << "convert a set value op to tensorrt";
     framework::OpDesc op_desc(op, nullptr);
-
-    auto* inputs = engine_->GetITensor(op_desc.Input("Input")[0]);
-    auto* updates = engine_->GetITensor(op_desc.Input("ValueTensor")[0]);
-    const auto decrease_axes = PADDLE_GET_CONST(
-        std::vector<int64_t>, op_desc.GetAttr("decrease_axes"));
-    std::vector<int32_t> decr_axes{decrease_axes.begin(), decrease_axes.end()};
-    auto value_rank = updates->getDimensions().nbDims;
-    auto input_rank = inputs->getDimensions().nbDims;
-    if (!decrease_axes.empty() && value_rank != input_rank) {
-      updates = Unsqueeze(updates, decr_axes);
-    }
-
     int64_t axes = 0;
     int64_t starts = 0;
     int64_t steps = 1;
     int64_t ends = 0;
-
     GET_ATTR_FROM_VECTOR(axes);
     GET_ATTR_FROM_VECTOR(starts);
     GET_ATTR_FROM_VECTOR(steps);
     GET_ATTR_FROM_VECTOR(ends);
 
-    // calculate dims
+    VLOG(3) << "axes is: " << axes;
+    VLOG(3) << "starts is: " << starts;
+    VLOG(3) << "steps is: " << steps;
+    VLOG(3) << "ends is: " << ends;
+
+    auto* inputs = engine_->GetITensor(op_desc.Input("Input")[0]);
+
     auto input_dims = inputs->getDimensions();
-    auto update_dims = updates->getDimensions();
 
     // check params and refill
-    if (axes == -1) {
-      axes = input_dims.nbDims - 1;
+    if (axes < 0) {
+      axes += input_dims.nbDims;
     }
 
-    if (ends == -1 || ends > input_dims.d[axes]) {
+    if (ends < 0) {
+      ends += input_dims.d[axes];
+    }
+    if (ends >= input_dims.d[axes]) {
       ends = input_dims.d[axes];
     }
 
-    if (axes >= input_dims.nbDims) {
-      platform::errors::InvalidArgument(
-          "The axes %d is larger than total axes %d", axes, input_dims.nbDims);
+    VLOG(3) << "after standardization" << axes;
+    VLOG(3) << "axes is: " << axes;
+    VLOG(3) << "starts is: " << starts;
+    VLOG(3) << "steps is: " << steps;
+    VLOG(3) << "ends is: " << ends;
+
+    auto output_name = op_desc.Output("Out")[0];
+    nvinfer1::ITensor* updates;
+    if (op_desc.HasInput("ValueTensor") &&
+        op_desc.Input("ValueTensor").size() > 0) {
+      updates = engine_->GetITensor(op_desc.Input("ValueTensor")[0]);
+    } else {
+      int dtype = PADDLE_GET_CONST(int, op_desc.GetAttr("dtype"));
+      PADDLE_ENFORCE_EQ(dtype,
+                        5,
+                        platform::errors::InvalidArgument(
+                            "set_value OP dtype must be float"));
+      float value = PADDLE_GET_CONST(std::vector<paddle::experimental::Scalar>,
+                                     op_desc.GetAttr("values"))[0]
+                        .to<float>();
+      VLOG(3) << "the attribute value is: " << value;
+
+      nvinfer1::ITensor* input_shape_tensor = Shape(inputs);
+      std::vector<nvinfer1::ITensor*> vec_tensor;
+      for (int32_t i = 0; i < input_dims.nbDims; ++i) {
+        vec_tensor.push_back(GetEleTensorOfShape(input_shape_tensor, i));
+      }
+      std::vector<int32_t> axes_vec(1, (ends - 1 - starts) / steps + 1);
+      vec_tensor[axes] = Add1DConstantLayer(axes_vec, "axes_vec", false);
+      nvinfer1::ITensor* output_shape_tensor = Concat(vec_tensor, 0);
+      updates = FillConstantLayer(
+          output_shape_tensor, inputs->getDimensions().nbDims, value);
+    }
+
+    // for log
+    {
+      std::vector<int> tmp_vec;
+      for (int i = 0; i < input_dims.nbDims; i++)
+        tmp_vec.push_back(input_dims.d[i]);
+      VLOG(3) << "Input(Name:" << op_desc.Input("Input")[0] << ")"
+              << "'s dimension is :[" << string::join_strings(tmp_vec, ',')
+              << "]";
+
+      tmp_vec.clear();
+      nvinfer1::Dims tmp_dims = updates->getDimensions();
+      for (int i = 0; i < tmp_dims.nbDims; i++)
+        tmp_vec.push_back(tmp_dims.d[i]);
+      VLOG(3) << "updates tensor"
+              << "'s dimension is :[" << string::join_strings(tmp_vec, ',')
+              << "]";
     }
-    if (starts >= input_dims.d[axes]) {
-      platform::errors::InvalidArgument(
-          "The start %d of dim %d is larger than origin shape %d",
-          starts,
-          axes,
-          input_dims.d[axes]);
+
+    const auto decrease_axes = PADDLE_GET_CONST(
+        std::vector<int64_t>, op_desc.GetAttr("decrease_axes"));
+    std::vector<int32_t> decr_axes{decrease_axes.begin(), decrease_axes.end()};
+    auto value_rank = updates->getDimensions().nbDims;
+    auto input_rank = inputs->getDimensions().nbDims;
+    // GLOG_vmodule=op_teller=6
+    VLOG(3) << "decrease_axes is: [" << string::join_strings(decrease_axes, ',')
+            << "]";
+
+    if (decrease_axes.size() > 0 && value_rank != input_rank) {
+      updates = Unsqueeze(updates, decr_axes);
     }
-    if (update_dims.d[axes] != (input_dims.d[axes] - starts) / steps) {
-      platform::errors::InvalidArgument("The update dim error, should be %d",
-                                        (input_dims.d[axes] - starts) / steps);
+
+    PADDLE_ENFORCE_EQ(
+        updates->getDimensions().nbDims,
+        input_rank,
+        platform::errors::InvalidArgument(
+            "ValueTensor‘s rank not equal to Input's rank, "
+            "you should try use C++ API "
+            "config.exp_disable_tensorrt_ops({\"%s\"}) to forbind this op "
+            "enter into TRT, "
+            "please find the %s's real name from .pdmodel or shape.txt",
+            output_name,
+            output_name));
+
+    // for log
+    {
+      auto tmp_dims = updates->getDimensions();
+      std::vector<int> tmp_vec;
+      tmp_vec.clear();
+      tmp_dims = updates->getDimensions();
+      for (int i = 0; i < tmp_dims.nbDims; i++)
+        tmp_vec.push_back(tmp_dims.d[i]);
+      VLOG(3) << "updates tensor"
+              << "'s dimension is :[" << string::join_strings(tmp_vec, ',')
+              << "]";
     }
+
+    // calculate dims
+    auto update_dims = updates->getDimensions();
+
+    PADDLE_ENFORCE_GT(
+        input_dims.d[axes],
+        0,
+        platform::errors::InvalidArgument(
+            "the input_dims.d[%d] must be greater than 0, but received %d",
+            axes,
+            input_dims.d[axes]));
+
+    PADDLE_ENFORCE_GT(
+        update_dims.d[axes],
+        0,
+        platform::errors::InvalidArgument(
+            "the update_dims.d[%d] must be greater than 0, but received %d",
+            axes,
+            update_dims.d[axes]));
+
+    PADDLE_ENFORCE_LE(axes,
+                      input_dims.nbDims,
+                      platform::errors::InvalidArgument(
+                          "The axes %d is larger than total axes %d",
+                          axes,
+                          input_dims.nbDims));
+
+    PADDLE_ENFORCE_LE(
+        starts,
+        input_dims.d[axes],
+        platform::errors::InvalidArgument(
+            "The start %d of dim %d is larger than origin shape %d",
+            starts,
+            axes,
+            input_dims.d[axes]));
+
+    PADDLE_ENFORCE_EQ(
+        update_dims.d[axes],
+        (ends - 1 - starts) / steps + 1,
+        platform::errors::InvalidArgument(
+            "the %dth axis of update dim error, should be %d, but we got %d",
+            axes,
+            (ends - 1 - starts) / steps + 1,
+            update_dims.d[axes]));
+
     if (engine_->with_dynamic_shape()) {
-      // generate indice
-      int post_size = 1;
-      for (int j = axes + 1; j < update_dims.nbDims; ++j) {
-        post_size = post_size * update_dims.d[j];
-      }
-      std::vector<int> axes_index;
-      for (int i = starts; i < ends; i += steps) {
-        for (int j = 0; j < post_size; ++j) {
-          axes_index.emplace_back(i);
-        }
+      nvinfer1::Dims shape_0;
+      shape_0.nbDims = update_dims.nbDims;
+      for (int i = 0; i < shape_0.nbDims; ++i) {
+        shape_0.d[i] = 1;
       }
-      int pre_size = 1;
-      for (int i = 0; i < axes; ++i) {
-        pre_size *= update_dims.d[i];
+      std::vector<float> tmp_0(1, 0);
+      auto zero_tensor = AddConstantLayer(tmp_0.data(), shape_0);
+      auto indice_tensor = Prod(zero_tensor, updates);
+      auto cast_layer = TRT_ENGINE_ADD_LAYER(engine_, Identity, *indice_tensor);
+      cast_layer->setOutputType(0, nvinfer1::DataType::kINT32);
+      indice_tensor = cast_layer->getOutput(0);
+
+      nvinfer1::Dims shape_1;
+      shape_1.nbDims = update_dims.nbDims;
+      for (int i = 0; i < update_dims.nbDims; ++i) {
+        shape_1.d[i] = 1;
       }
-      std::vector<int> indices;
-      for (int i = 0; i < pre_size; ++i) {
-        indices.insert(indices.end(), axes_index.begin(), axes_index.end());
+      shape_1.d[axes] = update_dims.d[axes];
+      std::vector<int> tmp_1;
+      for (int i = starts; i < ends; i += steps) {
+        tmp_1.push_back(i);
       }
-
-      auto output_name = op_desc.Output("Out")[0];
-      const auto const_layer = AddConstantLayer(
-          indices.data(), update_dims, "set_value_index_" + output_name);
+      auto one_tensor = AddConstantLayer(tmp_1.data(), shape_1);
+      indice_tensor = Sum(indice_tensor, one_tensor);
 
       auto* layer = TRT_ENGINE_ADD_LAYER(engine_,
                                          Scatter,
                                          *inputs,
-                                         *const_layer,
+                                         *indice_tensor,
                                          *updates,
                                          nvinfer1::ScatterMode::kELEMENT);
 
diff --git a/paddle/fluid/inference/tensorrt/op_teller.cc b/paddle/fluid/inference/tensorrt/op_teller.cc
@@ -2470,6 +2470,16 @@ struct SimpleOpTypeSetTeller : public Teller {
                    "starts or steps)";
         return false;
       }
+      if (desc.HasAttr("axes")) {
+        auto axes =
+            PADDLE_GET_CONST(std::vector<int64_t>, desc.GetAttr("axes"));
+        if (axes.size() != 1UL) {
+          VLOG(3) << "the set_value op"
+                  << "has more than one element in attribute axes, it can not "
+                     "enter into trt.";
+          return false;
+        }
+      }
     }
 
     if (op_type == "top_k_v2" || op_type == "top_k") {
diff --git a/test/ir/inference/test_trt_convert_set_value.py b/test/ir/inference/test_trt_convert_set_value.py