From 9f0990e67b9b66d18e7f4d58b7a123d9cd915a73 Mon Sep 17 00:00:00 2001
From: wangzijian <2087291150@qq.com>
Date: Sun, 21 Jul 2024 07:38:03 +0000
Subject: [PATCH 01/15] add yolov5 tensorrt version

---
 lite/models.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/lite/models.h b/lite/models.h
index 5314109d..4249202c 100644
--- a/lite/models.h
+++ b/lite/models.h
@@ -123,6 +123,7 @@
 #include "lite/trt/core/trt_utils.h"
 #include "lite/trt/core/trt_core.h"
 #include "lite/trt/cv/trt_yolofacev8.h"
+#include "lite/trt/cv/trt_yolov5.h"
 #endif
 
 // ENABLE_MNN
@@ -675,12 +676,14 @@ namespace lite{
         namespace cv
         {
             typedef trtcv::TRTYoloFaceV8 _TRT_YOLOFaceNet;
+            typedef trtcv::TRTYoloV5 _TRT_YOLOv5;
             namespace classification
             {
 
             }
             namespace detection
             {
+                typedef _TRT_YOLOv5 YOLOV5;
 
             }
             namespace face

From 606c74472b74c2796f15df46d9a77939974cbfcf Mon Sep 17 00:00:00 2001
From: wangzijian <2087291150@qq.com>
Date: Sun, 21 Jul 2024 07:38:18 +0000
Subject: [PATCH 02/15] add yolov5 tensorrt test code

---
 examples/lite/cv/test_lite_yolov5.cpp | 39 ++++++++++++++++++++++-----
 1 file changed, 32 insertions(+), 7 deletions(-)

diff --git a/examples/lite/cv/test_lite_yolov5.cpp b/examples/lite/cv/test_lite_yolov5.cpp
index f9c90450..23146abe 100644
--- a/examples/lite/cv/test_lite_yolov5.cpp
+++ b/examples/lite/cv/test_lite_yolov5.cpp
@@ -6,9 +6,9 @@
 
 static void test_default()
 {
-  std::string onnx_path = "../../../examples/hub/onnx/cv/yolov5s.onnx";
-  std::string test_img_path = "../../../examples/lite/resources/test_lite_yolov5_1.jpg";
-  std::string save_img_path = "../../../examples/logs/test_lite_yolov5_1.jpg";
+  std::string onnx_path = "/home/wangzijian/lite.ai.toolkit/examples/hub/onnx/cv/yolov5s.onnx";
+  std::string test_img_path = "/home/wangzijian/lite.ai.toolkit/examples/lite/resources/test_lite_yolov5_1.jpg";
+  std::string save_img_path = "/home/wangzijian/lite.ai.toolkit/examples/logs/test_lite_yolov5_1.jpg";
 
   // 1. Test Default Engine ONNXRuntime
   lite::cv::detection::YoloV5 *yolov5 = new lite::cv::detection::YoloV5(onnx_path); // default
@@ -129,13 +129,38 @@ static void test_tnn()
 #endif
 }
 
+
+static void test_tensorrt()
+{
+    std::string engine_path = "/home/wangzijian/lite.ai.toolkit/examples/hub/trt/yolov5s_fp32.engine";
+    std::string test_img_path = "/home/wangzijian/lite.ai.toolkit/examples/lite/resources/test_lite_yolov5_1.jpg";
+    std::string save_img_path = "/home/wangzijian/lite.ai.toolkit/examples/logs/test_lite_yolov5_1.jpg";
+
+    // 1. Test TensorRT Engine
+    lite::trt::cv::detection::YOLOV5  *yolov5 = new lite::trt::cv::detection::YOLOV5(engine_path);
+    std::vector<lite::types::Boxf> detected_boxes;
+    cv::Mat img_bgr = cv::imread(test_img_path);
+    yolov5->detect(img_bgr, detected_boxes);
+
+    lite::utils::draw_boxes_inplace(img_bgr, detected_boxes);
+
+    cv::imwrite(save_img_path, img_bgr);
+
+    std::cout << "Default Version Detected Boxes Num: " << detected_boxes.size() << std::endl;
+
+    delete yolov5;
+
+}
+
+
 static void test_lite()
 {
+  test_tensorrt();
   test_default();
-  test_onnxruntime();
-  test_mnn();
-  test_ncnn();
-  test_tnn();
+//  test_onnxruntime();
+//  test_mnn();
+//  test_ncnn();
+//  test_tnn();
 }
 
 int main(__unused int argc, __unused char *argv[])

From 9a4c5185384a93014bb65deb48e194b3a3e7512c Mon Sep 17 00:00:00 2001
From: wangzijian <2087291150@qq.com>
Date: Sun, 21 Jul 2024 07:38:43 +0000
Subject: [PATCH 03/15] add yolov5 in trt namespace

---
 lite/trt/core/trt_core.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/lite/trt/core/trt_core.h b/lite/trt/core/trt_core.h
index f69eca30..d12e76a5 100644
--- a/lite/trt/core/trt_core.h
+++ b/lite/trt/core/trt_core.h
@@ -11,6 +11,7 @@
 
 namespace trtcv{
     class LITE_EXPORTS TRTYoloFaceV8; // [1] * reference: https://github.com/derronqi/yolov8-face
+    class LITE_EXPORTS TRTYoloV5;     // [2] * reference: https://github.com/derronqi/yolov8-face
 }
 
 namespace trtcv{

From f7432fa3bf05d3e761726df862cddc566ba26546 Mon Sep 17 00:00:00 2001
From: wangzijian <2087291150@qq.com>
Date: Sun, 21 Jul 2024 07:39:14 +0000
Subject: [PATCH 04/15] add one input and mutli output trthandler

---
 lite/trt/core/trt_handler.cpp | 85 +++++++++++++++++++++++++----------
 lite/trt/core/trt_handler.h   | 10 +++--
 2 files changed, 69 insertions(+), 26 deletions(-)

diff --git a/lite/trt/core/trt_handler.cpp b/lite/trt/core/trt_handler.cpp
index aa4f9993..88293ef1 100644
--- a/lite/trt/core/trt_handler.cpp
+++ b/lite/trt/core/trt_handler.cpp
@@ -15,8 +15,9 @@ BasicTRTHandler::BasicTRTHandler(const std::string &_trt_model_path, unsigned in
 
 BasicTRTHandler::~BasicTRTHandler() {
     // don't need free by manunly
-    cudaFree(buffers[0]);
-    cudaFree(buffers[1]);
+    for (auto buffer : buffers) {
+        cudaFree(buffer);
+    }
     cudaStreamDestroy(stream);
 }
 
@@ -50,31 +51,69 @@ void BasicTRTHandler::initialize_handler() {
     }
     cudaStreamCreate(&stream);
 
+    // make the flexible one input and multi output
+    int num_io_tensors = trt_engine->getNbIOTensors(); // get the input and output's num
+    buffers.resize(num_io_tensors);
 
-    auto input_name = trt_engine->getIOTensorName(0);
-    auto output_name = trt_engine->getIOTensorName(1);
-
-
-    nvinfer1::Dims input_dims = trt_engine->getTensorShape(input_name);
-    nvinfer1::Dims output_dims = trt_engine->getTensorShape(output_name);
 
-    input_tensor_size = 1;
-    for (int i = 0; i < input_dims.nbDims; ++i) {
-        input_node_dims.push_back(input_dims.d[i]);
-        input_tensor_size *= input_dims.d[i];
-    }
-
-    output_tensor_size = 1;
-    for (int i = 0; i < output_dims.nbDims; ++i) {
-        output_node_dims.push_back(output_dims.d[i]);
-        output_tensor_size *= output_dims.d[i];
+//    auto input_name = trt_engine->getIOTensorName(0);
+//    auto output_name = trt_engine->getIOTensorName(1);
+//
+//
+//    nvinfer1::Dims input_dims = trt_engine->getTensorShape(input_name);
+//    nvinfer1::Dims output_dims = trt_engine->getTensorShape(output_name);
+//
+//    input_tensor_size = 1;
+//    for (int i = 0; i < input_dims.nbDims; ++i) {
+//        input_node_dims.push_back(input_dims.d[i]);
+//        input_tensor_size *= input_dims.d[i];
+//    }
+//
+//    output_tensor_size = 1;
+//    for (int i = 0; i < output_dims.nbDims; ++i) {
+//        output_node_dims.push_back(output_dims.d[i]);
+//        output_tensor_size *= output_dims.d[i];
+//    }
+//
+//    cudaMalloc(&buffers[0], input_tensor_size * sizeof(float));
+//    cudaMalloc(&buffers[1], output_tensor_size * sizeof(float));
+//
+//    trt_context->setTensorAddress(input_name, buffers[0]);
+//    trt_context->setTensorAddress(output_name, buffers[1]);
+
+
+    for (int i = 0; i < num_io_tensors; ++i) {
+        auto tensor_name = trt_engine->getIOTensorName(i);
+        nvinfer1::Dims tensor_dims = trt_engine->getTensorShape(tensor_name);
+
+        // input
+        if (i==0)
+        {
+            size_t tensor_size = 1;
+            for (int j = 0; j < tensor_dims.nbDims; ++j) {
+                tensor_size *= tensor_dims.d[j];
+                input_node_dims.push_back(tensor_dims.d[j]);
+            }
+            cudaMalloc(&buffers[i], tensor_size * sizeof(float));
+            trt_context->setTensorAddress(tensor_name, buffers[i]);
+            continue;
+        }
+
+        // output
+        size_t tensor_size = 1;
+
+        std::vector<int64_t> output_node;
+        for (int j = 0; j < tensor_dims.nbDims; ++j) {
+            output_node.push_back(tensor_dims.d[j]);
+            tensor_size *= tensor_dims.d[j];
+        }
+        output_node_dims.push_back(output_node);
+
+        cudaMalloc(&buffers[i], tensor_size * sizeof(float));
+        trt_context->setTensorAddress(tensor_name, buffers[i]);
+        output_tensor_size++;
     }
 
-    cudaMalloc(&buffers[0], input_tensor_size * sizeof(float));
-    cudaMalloc(&buffers[1], output_tensor_size * sizeof(float));
-
-    trt_context->setTensorAddress(input_name, buffers[0]);
-    trt_context->setTensorAddress(output_name, buffers[1]);
 
 }
 
diff --git a/lite/trt/core/trt_handler.h b/lite/trt/core/trt_handler.h
index 8bdc2d28..486e7eb0 100644
--- a/lite/trt/core/trt_handler.h
+++ b/lite/trt/core/trt_handler.h
@@ -17,13 +17,17 @@ namespace trtcore{
 
         Logger trt_logger;
         // single input and single output
-        void* buffers[2];
+//        void* buffers[2];
+        std::vector<void*> buffers;
         cudaStream_t stream;
 
         std::vector<int64_t> input_node_dims;
-        std::vector<int64_t> output_node_dims;
+        std::vector<std::vector<int64_t>> output_node_dims;
+//        std::vector<int64_t> output_node_dims;
         std::size_t input_tensor_size = 1;
-        std::size_t output_tensor_size = 1;
+        std::size_t output_tensor_size = 0;
+
+
 
         const char* trt_model_path = nullptr;
         const char* log_id = nullptr;

From 8bbc226cec4eaaf5e8cfd082b99e76a4d649d914 Mon Sep 17 00:00:00 2001
From: wangzijian <2087291150@qq.com>
Date: Sun, 21 Jul 2024 07:39:28 +0000
Subject: [PATCH 05/15] add trt version yolov5

---
 lite/trt/cv/trt_yolov5.cpp | 198 +++++++++++++++++++++++++++++++++++++
 lite/trt/cv/trt_yolov5.h   |  80 +++++++++++++++
 2 files changed, 278 insertions(+)
 create mode 100644 lite/trt/cv/trt_yolov5.cpp
 create mode 100644 lite/trt/cv/trt_yolov5.h

diff --git a/lite/trt/cv/trt_yolov5.cpp b/lite/trt/cv/trt_yolov5.cpp
new file mode 100644
index 00000000..1b1d7301
--- /dev/null
+++ b/lite/trt/cv/trt_yolov5.cpp
@@ -0,0 +1,198 @@
+//
+// Created by root on 7/20/24.
+//
+
+#include "trt_yolov5.h"
+using trtcv::TRTYoloV5;
+
+void TRTYoloV5::resize_unscale(const cv::Mat &mat, cv::Mat &mat_rs,
+                            int target_height, int target_width,
+                            YoloV5ScaleParams &scale_params)
+{
+    if (mat.empty()) return;
+    int img_height = static_cast<int>(mat.rows);
+    int img_width = static_cast<int>(mat.cols);
+
+    mat_rs = cv::Mat(target_height, target_width, CV_8UC3,
+                     cv::Scalar(114, 114, 114));
+    // scale ratio (new / old) new_shape(h,w)
+    float w_r = (float) target_width / (float) img_width;
+    float h_r = (float) target_height / (float) img_height;
+    float r = std::min(w_r, h_r);
+    // compute padding
+    int new_unpad_w = static_cast<int>((float) img_width * r); // floor
+    int new_unpad_h = static_cast<int>((float) img_height * r); // floor
+    int pad_w = target_width - new_unpad_w; // >=0
+    int pad_h = target_height - new_unpad_h; // >=0
+
+    int dw = pad_w / 2;
+    int dh = pad_h / 2;
+
+    // resize with unscaling
+    cv::Mat new_unpad_mat;
+    // cv::Mat new_unpad_mat = mat.clone(); // may not need clone.
+    cv::resize(mat, new_unpad_mat, cv::Size(new_unpad_w, new_unpad_h));
+    new_unpad_mat.copyTo(mat_rs(cv::Rect(dw, dh, new_unpad_w, new_unpad_h)));
+
+    // record scale params.
+    scale_params.r = r;
+    scale_params.dw = dw;
+    scale_params.dh = dh;
+    scale_params.new_unpad_w = new_unpad_w;
+    scale_params.new_unpad_h = new_unpad_h;
+    scale_params.flag = true;
+}
+
+void TRTYoloV5::nms(std::vector<types::Boxf> &input, std::vector<types::Boxf> &output,
+                 float iou_threshold, unsigned int topk, unsigned int nms_type)
+{
+    if (nms_type == NMS::BLEND) lite::utils::blending_nms(input, output, iou_threshold, topk);
+    else if (nms_type == NMS::OFFSET) lite::utils::offset_nms(input, output, iou_threshold, topk);
+    else lite::utils::hard_nms(input, output, iou_threshold, topk);
+}
+
+
+cv::Mat TRTYoloV5::normalized(const cv::Mat input_image) {
+    cv::Mat canvas;
+    cv::cvtColor(input_image,canvas,cv::COLOR_BGR2RGB);
+    canvas.convertTo(canvas,CV_32F,1.0 / 255.0,0);
+    return canvas;
+}
+
+
+void writeFloatArrayToFile(const float* input, size_t length, const std::string& filename) {
+    // 打开文件进行写入
+    std::ofstream outFile(filename);
+
+    // 检查文件是否成功打开
+    if (!outFile.is_open()) {
+        std::cerr << "Error: Could not open the file for writing." << std::endl;
+        return;
+    }
+    // 写入浮点数到文件中，每个浮点数占一行
+    for (size_t i = 0; i < length; ++i) {
+        outFile << input[i] << std::endl;
+    }
+    // 关闭文件
+    outFile.close();
+}
+
+void TRTYoloV5::generate_bboxes(const trtcv::TRTYoloV5::YoloV5ScaleParams &scale_params,
+                                std::vector<types::Boxf> &bbox_collection, float* output, float score_threshold,
+                                int img_height, int img_width) {
+    auto pred_dims = output_node_dims[0];
+    const unsigned int num_anchors = pred_dims.at(1); // n = ?
+    const unsigned int num_classes = pred_dims.at(2) - 5;
+
+    float r_ = scale_params.r;
+    int dw_ = scale_params.dw;
+    int dh_ = scale_params.dh;
+
+    bbox_collection.clear();
+    unsigned int count = 0;
+    for (unsigned int i = 0; i < num_anchors; ++i)
+    {
+        float obj_conf = output[i * pred_dims.at(2) + 4];
+        if (obj_conf < score_threshold) continue; // filter first.
+
+        float cls_conf = output[i * pred_dims.at(2) + 5];
+        unsigned int label = 0;
+        for (unsigned int j = 0; j < num_classes; ++j)
+        {
+            float tmp_conf = output[i * pred_dims.at(2) + 5 + j];
+            if (tmp_conf > cls_conf)
+            {
+                cls_conf = tmp_conf;
+                label = j;
+            }
+        }
+        float conf = obj_conf * cls_conf; // cls_conf (0.,1.)
+        if (conf < score_threshold) continue; // filter
+
+        float cx = output[i * pred_dims.at(2)];
+        float cy = output[i * pred_dims.at(2) + 1];
+        float w = output[i * pred_dims.at(2) + 2];
+        float h = output[i * pred_dims.at(2) + 3];
+        float x1 = ((cx - w / 2.f) - (float) dw_) / r_;
+        float y1 = ((cy - h / 2.f) - (float) dh_) / r_;
+        float x2 = ((cx + w / 2.f) - (float) dw_) / r_;
+        float y2 = ((cy + h / 2.f) - (float) dh_) / r_;
+
+        types::Boxf box;
+        box.x1 = std::max(0.f, x1);
+        box.y1 = std::max(0.f, y1);
+        box.x2 = std::min(x2, (float) img_width - 1.f);
+        box.y2 = std::min(y2, (float) img_height - 1.f);
+        box.score = conf;
+        box.label = label;
+        box.label_text = class_names[label];
+        box.flag = true;
+        bbox_collection.push_back(box);
+
+        count += 1; // limit boxes for nms.
+        if (count > max_nms)
+            break;
+    }
+
+#if LITETRT_DEBUG
+    std::cout << "detected num_anchors: " << num_anchors << "\n";
+    std::cout << "generate_bboxes num: " << bbox_collection.size() << "\n";
+#endif
+
+}
+
+
+
+void TRTYoloV5::detect(const cv::Mat &mat, std::vector<types::Boxf> &detected_boxes, float score_threshold,
+                       float iou_threshold, unsigned int topk, unsigned int nms_type) {
+
+    if (mat.empty()) return;
+    const int input_height = input_node_dims.at(2);
+    const int input_width = input_node_dims.at(3);
+    int img_height = static_cast<int>(mat.rows);
+    int img_width = static_cast<int>(mat.cols);
+
+    // resize & unscale
+    cv::Mat mat_rs;
+    YoloV5ScaleParams scale_params;
+    resize_unscale(mat, mat_rs, input_height, input_width, scale_params);
+
+    cv::Mat normalized_image = normalized(mat_rs);
+
+    //1. make the input
+    auto input = trtcv::utils::transform::create_tensor(normalized_image,input_node_dims,trtcv::utils::transform::CHW);
+
+
+    //2. infer
+    cudaMemcpyAsync(buffers[0], input, input_node_dims[0] * input_node_dims[1] * input_node_dims[2] * input_node_dims[3] * sizeof(float),
+                    cudaMemcpyHostToDevice, stream);
+    cudaStreamSynchronize(stream);
+
+    bool status = trt_context->enqueueV3(stream);
+    cudaStreamSynchronize(stream);
+    if (!status){
+        std::cerr << "Failed to infer by TensorRT." << std::endl;
+        return;
+    }
+
+    // Synchronize the stream to ensure all operations are complete
+    cudaStreamSynchronize(stream);
+    // get the first output dim
+    auto pred_dims = output_node_dims[0];
+
+
+    float* output = new float[pred_dims[0] * pred_dims[1] * pred_dims[2]];
+
+    cudaMemcpyAsync(output, buffers[1], pred_dims[0] * pred_dims[1] * pred_dims[2] * sizeof(float),
+                    cudaMemcpyDeviceToHost, stream);
+    cudaStreamSynchronize(stream);
+    writeFloatArrayToFile(output,pred_dims[0] * pred_dims[1] * pred_dims[2],"/home/wangzijian/lite.ai.toolkit/output-test-0720.txt");
+
+    //3. generate the boxes
+    std::vector<types::Boxf> bbox_collection;
+    generate_bboxes(scale_params, bbox_collection, output, score_threshold, img_height, img_width);
+    nms(bbox_collection, detected_boxes, iou_threshold, topk, nms_type);
+
+}
+
+
diff --git a/lite/trt/cv/trt_yolov5.h b/lite/trt/cv/trt_yolov5.h
new file mode 100644
index 00000000..e7c6111d
--- /dev/null
+++ b/lite/trt/cv/trt_yolov5.h
@@ -0,0 +1,80 @@
+//
+// Created by root on 7/20/24.
+//
+
+#ifndef LITE_AI_TOOLKIT_TRT_YOLOV5_H
+#define LITE_AI_TOOLKIT_TRT_YOLOV5_H
+
+#include "lite/trt/core/trt_core.h"
+#include "lite/utils.h"
+#include "lite/trt/core/trt_utils.h"
+
+namespace trtcv
+{
+    class LITE_EXPORTS TRTYoloV5 : public BasicTRTHandler
+    {
+    public:
+        explicit TRTYoloV5(const std::string &_onnx_path, unsigned int _num_threads = 1) :
+                BasicTRTHandler(_onnx_path, _num_threads)
+        {};
+
+        ~TRTYoloV5() override = default;
+
+    private:
+        // nested classes
+        typedef struct
+        {
+            float r;
+            int dw;
+            int dh;
+            int new_unpad_w;
+            int new_unpad_h;
+            bool flag;
+        } YoloV5ScaleParams;
+
+    private:
+        static constexpr const float mean_val = 0.f;
+        static constexpr const float scale_val = 1.0 / 255.f;
+        const char *class_names[80] = {
+                "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
+                "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow",
+                "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
+                "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard",
+                "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
+                "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch",
+                "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard",
+                "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase",
+                "scissors", "teddy bear", "hair drier", "toothbrush"
+        };
+        enum NMS
+        {
+            HARD = 0, BLEND = 1, OFFSET = 2
+        };
+        static constexpr const unsigned int max_nms = 30000;
+
+    private:
+        void resize_unscale(const cv::Mat &mat,
+                            cv::Mat &mat_rs,
+                            int target_height,
+                            int target_width,
+                            YoloV5ScaleParams &scale_params);
+
+        cv::Mat normalized(const cv::Mat input_image);
+
+        void generate_bboxes(const YoloV5ScaleParams &scale_params,
+                             std::vector<types::Boxf> &bbox_collection,
+                             float* output,
+                             float score_threshold, int img_height,
+                             int img_width); // rescale & exclude
+
+        void nms(std::vector<types::Boxf> &input, std::vector<types::Boxf> &output,
+                 float iou_threshold, unsigned int topk, unsigned int nms_type);
+
+    public:
+        void detect(const cv::Mat &mat, std::vector<types::Boxf> &detected_boxes,
+                    float score_threshold = 0.25f, float iou_threshold = 0.45f,
+                    unsigned int topk = 100, unsigned int nms_type = NMS::OFFSET);
+    };
+}
+
+#endif //LITE_AI_TOOLKIT_TRT_YOLOV5_H

From d78981dbc99a2ac50c7e02ae749ebc03d6e946e0 Mon Sep 17 00:00:00 2001
From: wangzijian <2087291150@qq.com>
Date: Sun, 21 Jul 2024 08:46:50 +0000
Subject: [PATCH 06/15] update trt_yolofacev8.cpp to one input and mutli output

---
 lite/trt/cv/trt_yolofacev8.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/lite/trt/cv/trt_yolofacev8.cpp b/lite/trt/cv/trt_yolofacev8.cpp
index 8ea369f3..59e8c2e8 100644
--- a/lite/trt/cv/trt_yolofacev8.cpp
+++ b/lite/trt/cv/trt_yolofacev8.cpp
@@ -95,7 +95,7 @@ cv::Mat TRTYoloFaceV8::normalize(cv::Mat srcimg) {
 void TRTYoloFaceV8::generate_box(float *trt_outputs, std::vector<lite::types::Boxf> &boxes, float conf_threshold,
                                  float iou_threshold) {
 
-    int num_box = output_node_dims[2];
+    int num_box = output_node_dims[0][2];
     std::vector<lite::types::BoundingBoxType<float, float>> bounding_box_raw;
     std::vector<float> score_raw;
     for (int i = 0; i < num_box; i++)
@@ -152,9 +152,9 @@ void TRTYoloFaceV8::detect(const cv::Mat &mat, std::vector<lite::types::Boxf> &b
         return;
     }
 
-    float* output = new float[output_node_dims[0] * output_node_dims[1] * output_node_dims[2]];
+    float* output = new float[output_node_dims[0][0] * output_node_dims[0][1] * output_node_dims[0][2]];
 
-    cudaMemcpyAsync(output, buffers[1], output_node_dims[0] * output_node_dims[1] * output_node_dims[2] * sizeof(float),
+    cudaMemcpyAsync(output, buffers[1], output_node_dims[0][0] * output_node_dims[0][1] * output_node_dims[0][2] * sizeof(float),
                     cudaMemcpyDeviceToHost, stream);
     // 4. generate box
     generate_box(output,boxes,0.45f,0.5f);

From a5e0e5f39e2e6186d95eb31056f000b38420611b Mon Sep 17 00:00:00 2001
From: wangzijian <2087291150@qq.com>
Date: Sun, 21 Jul 2024 08:51:20 +0000
Subject: [PATCH 07/15] update tensorrt yolov5 test code

---
 examples/lite/cv/test_lite_yolov5.cpp | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/examples/lite/cv/test_lite_yolov5.cpp b/examples/lite/cv/test_lite_yolov5.cpp
index 23146abe..02686df6 100644
--- a/examples/lite/cv/test_lite_yolov5.cpp
+++ b/examples/lite/cv/test_lite_yolov5.cpp
@@ -6,9 +6,9 @@
 
 static void test_default()
 {
-  std::string onnx_path = "/home/wangzijian/lite.ai.toolkit/examples/hub/onnx/cv/yolov5s.onnx";
-  std::string test_img_path = "/home/wangzijian/lite.ai.toolkit/examples/lite/resources/test_lite_yolov5_1.jpg";
-  std::string save_img_path = "/home/wangzijian/lite.ai.toolkit/examples/logs/test_lite_yolov5_1.jpg";
+  std::string onnx_path = "../../../examples/hub/onnx/cv/yolov5s.onnx";
+  std::string test_img_path = "../../../examples/lite/resources/test_lite_yolov5_1.jpg";
+  std::string save_img_path = "../../../examples/logs/test_lite_yolov5_1647_onnx.jpg";
 
   // 1. Test Default Engine ONNXRuntime
   lite::cv::detection::YoloV5 *yolov5 = new lite::cv::detection::YoloV5(onnx_path); // default
@@ -132,9 +132,9 @@ static void test_tnn()
 
 static void test_tensorrt()
 {
-    std::string engine_path = "/home/wangzijian/lite.ai.toolkit/examples/hub/trt/yolov5s_fp32.engine";
-    std::string test_img_path = "/home/wangzijian/lite.ai.toolkit/examples/lite/resources/test_lite_yolov5_1.jpg";
-    std::string save_img_path = "/home/wangzijian/lite.ai.toolkit/examples/logs/test_lite_yolov5_1.jpg";
+    std::string engine_path = "../../../examples/hub/trt/yolov5s_fp32.engine";
+    std::string test_img_path = "../../../examples/lite/resources/test_lite_yolov5_1.jpg";
+    std::string save_img_path = "../../../examples/logs/test_lite_yolov5_1647.jpg";
 
     // 1. Test TensorRT Engine
     lite::trt::cv::detection::YOLOV5  *yolov5 = new lite::trt::cv::detection::YOLOV5(engine_path);

From af59453a74a9017a9b05c44309fea5178bd07005 Mon Sep 17 00:00:00 2001
From: wangzijian <2087291150@qq.com>
Date: Sun, 21 Jul 2024 11:32:00 +0000
Subject: [PATCH 08/15] update tensorrt yolov5 code,del useless func and update
 format

---
 examples/lite/cv/test_lite_yolov5.cpp | 11 ++++++-----
 lite/trt/cv/trt_yolov5.cpp            | 20 --------------------
 2 files changed, 6 insertions(+), 25 deletions(-)

diff --git a/examples/lite/cv/test_lite_yolov5.cpp b/examples/lite/cv/test_lite_yolov5.cpp
index 02686df6..63367b4c 100644
--- a/examples/lite/cv/test_lite_yolov5.cpp
+++ b/examples/lite/cv/test_lite_yolov5.cpp
@@ -132,6 +132,7 @@ static void test_tnn()
 
 static void test_tensorrt()
 {
+#ifdef ENABLE_TENSORRT
     std::string engine_path = "../../../examples/hub/trt/yolov5s_fp32.engine";
     std::string test_img_path = "../../../examples/lite/resources/test_lite_yolov5_1.jpg";
     std::string save_img_path = "../../../examples/logs/test_lite_yolov5_1647.jpg";
@@ -149,7 +150,7 @@ static void test_tensorrt()
     std::cout << "Default Version Detected Boxes Num: " << detected_boxes.size() << std::endl;
 
     delete yolov5;
-
+#endif
 }
 
 
@@ -157,10 +158,10 @@ static void test_lite()
 {
   test_tensorrt();
   test_default();
-//  test_onnxruntime();
-//  test_mnn();
-//  test_ncnn();
-//  test_tnn();
+  test_onnxruntime();
+  test_mnn();
+  test_ncnn();
+  test_tnn();
 }
 
 int main(__unused int argc, __unused char *argv[])
diff --git a/lite/trt/cv/trt_yolov5.cpp b/lite/trt/cv/trt_yolov5.cpp
index 1b1d7301..28806fcb 100644
--- a/lite/trt/cv/trt_yolov5.cpp
+++ b/lite/trt/cv/trt_yolov5.cpp
@@ -60,23 +60,6 @@ cv::Mat TRTYoloV5::normalized(const cv::Mat input_image) {
 }
 
 
-void writeFloatArrayToFile(const float* input, size_t length, const std::string& filename) {
-    // 打开文件进行写入
-    std::ofstream outFile(filename);
-
-    // 检查文件是否成功打开
-    if (!outFile.is_open()) {
-        std::cerr << "Error: Could not open the file for writing." << std::endl;
-        return;
-    }
-    // 写入浮点数到文件中，每个浮点数占一行
-    for (size_t i = 0; i < length; ++i) {
-        outFile << input[i] << std::endl;
-    }
-    // 关闭文件
-    outFile.close();
-}
-
 void TRTYoloV5::generate_bboxes(const trtcv::TRTYoloV5::YoloV5ScaleParams &scale_params,
                                 std::vector<types::Boxf> &bbox_collection, float* output, float score_threshold,
                                 int img_height, int img_width) {
@@ -180,19 +163,16 @@ void TRTYoloV5::detect(const cv::Mat &mat, std::vector<types::Boxf> &detected_bo
     // get the first output dim
     auto pred_dims = output_node_dims[0];
 
-
     float* output = new float[pred_dims[0] * pred_dims[1] * pred_dims[2]];
 
     cudaMemcpyAsync(output, buffers[1], pred_dims[0] * pred_dims[1] * pred_dims[2] * sizeof(float),
                     cudaMemcpyDeviceToHost, stream);
     cudaStreamSynchronize(stream);
-    writeFloatArrayToFile(output,pred_dims[0] * pred_dims[1] * pred_dims[2],"/home/wangzijian/lite.ai.toolkit/output-test-0720.txt");
 
     //3. generate the boxes
     std::vector<types::Boxf> bbox_collection;
     generate_bboxes(scale_params, bbox_collection, output, score_threshold, img_height, img_width);
     nms(bbox_collection, detected_boxes, iou_threshold, topk, nms_type);
-
 }
 
 

From 2407c103ae365df9a062a6ee3b36f860dc0fb506 Mon Sep 17 00:00:00 2001
From: wangzijian <2087291150@qq.com>
Date: Sun, 21 Jul 2024 12:12:26 +0000
Subject: [PATCH 09/15] delete the useless code

---
 lite/trt/core/trt_handler.cpp | 27 ---------------------------
 lite/trt/core/trt_handler.h   |  5 -----
 2 files changed, 32 deletions(-)

diff --git a/lite/trt/core/trt_handler.cpp b/lite/trt/core/trt_handler.cpp
index 88293ef1..f8655d3f 100644
--- a/lite/trt/core/trt_handler.cpp
+++ b/lite/trt/core/trt_handler.cpp
@@ -55,33 +55,6 @@ void BasicTRTHandler::initialize_handler() {
     int num_io_tensors = trt_engine->getNbIOTensors(); // get the input and output's num
     buffers.resize(num_io_tensors);
 
-
-//    auto input_name = trt_engine->getIOTensorName(0);
-//    auto output_name = trt_engine->getIOTensorName(1);
-//
-//
-//    nvinfer1::Dims input_dims = trt_engine->getTensorShape(input_name);
-//    nvinfer1::Dims output_dims = trt_engine->getTensorShape(output_name);
-//
-//    input_tensor_size = 1;
-//    for (int i = 0; i < input_dims.nbDims; ++i) {
-//        input_node_dims.push_back(input_dims.d[i]);
-//        input_tensor_size *= input_dims.d[i];
-//    }
-//
-//    output_tensor_size = 1;
-//    for (int i = 0; i < output_dims.nbDims; ++i) {
-//        output_node_dims.push_back(output_dims.d[i]);
-//        output_tensor_size *= output_dims.d[i];
-//    }
-//
-//    cudaMalloc(&buffers[0], input_tensor_size * sizeof(float));
-//    cudaMalloc(&buffers[1], output_tensor_size * sizeof(float));
-//
-//    trt_context->setTensorAddress(input_name, buffers[0]);
-//    trt_context->setTensorAddress(output_name, buffers[1]);
-
-
     for (int i = 0; i < num_io_tensors; ++i) {
         auto tensor_name = trt_engine->getIOTensorName(i);
         nvinfer1::Dims tensor_dims = trt_engine->getTensorShape(tensor_name);
diff --git a/lite/trt/core/trt_handler.h b/lite/trt/core/trt_handler.h
index 486e7eb0..85b639b9 100644
--- a/lite/trt/core/trt_handler.h
+++ b/lite/trt/core/trt_handler.h
@@ -16,19 +16,14 @@ namespace trtcore{
         std::unique_ptr<nvinfer1::IExecutionContext> trt_context;
 
         Logger trt_logger;
-        // single input and single output
-//        void* buffers[2];
         std::vector<void*> buffers;
         cudaStream_t stream;
 
         std::vector<int64_t> input_node_dims;
         std::vector<std::vector<int64_t>> output_node_dims;
-//        std::vector<int64_t> output_node_dims;
         std::size_t input_tensor_size = 1;
         std::size_t output_tensor_size = 0;
 
-
-
         const char* trt_model_path = nullptr;
         const char* log_id = nullptr;
         const unsigned int num_threads;

From fe2a96a1d78998dbf41c1d1fc50af42747dc146f Mon Sep 17 00:00:00 2001
From: wangzijian <2087291150@qq.com>
Date: Mon, 22 Jul 2024 03:34:10 +0000
Subject: [PATCH 10/15] update save image name

---
 examples/lite/cv/test_lite_yolov5.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/lite/cv/test_lite_yolov5.cpp b/examples/lite/cv/test_lite_yolov5.cpp
index 63367b4c..9e017cf4 100644
--- a/examples/lite/cv/test_lite_yolov5.cpp
+++ b/examples/lite/cv/test_lite_yolov5.cpp
@@ -8,7 +8,7 @@ static void test_default()
 {
   std::string onnx_path = "../../../examples/hub/onnx/cv/yolov5s.onnx";
   std::string test_img_path = "../../../examples/lite/resources/test_lite_yolov5_1.jpg";
-  std::string save_img_path = "../../../examples/logs/test_lite_yolov5_1647_onnx.jpg";
+  std::string save_img_path = "../../../examples/logs/test_lite_yolov5_1.jpg";
 
   // 1. Test Default Engine ONNXRuntime
   lite::cv::detection::YoloV5 *yolov5 = new lite::cv::detection::YoloV5(onnx_path); // default
@@ -135,7 +135,7 @@ static void test_tensorrt()
 #ifdef ENABLE_TENSORRT
     std::string engine_path = "../../../examples/hub/trt/yolov5s_fp32.engine";
     std::string test_img_path = "../../../examples/lite/resources/test_lite_yolov5_1.jpg";
-    std::string save_img_path = "../../../examples/logs/test_lite_yolov5_1647.jpg";
+    std::string save_img_path = "../../../examples/logs/test_lite_yolov5_1_trt.jpg";
 
     // 1. Test TensorRT Engine
     lite::trt::cv::detection::YOLOV5  *yolov5 = new lite::trt::cv::detection::YOLOV5(engine_path);

From e290ac5644a8eadf675d91275b0931d51e59c207 Mon Sep 17 00:00:00 2001
From: wangzijian <2087291150@qq.com>
Date: Mon, 22 Jul 2024 03:34:37 +0000
Subject: [PATCH 11/15] modify reference website

---
 lite/trt/core/trt_core.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lite/trt/core/trt_core.h b/lite/trt/core/trt_core.h
index d12e76a5..24c2fefe 100644
--- a/lite/trt/core/trt_core.h
+++ b/lite/trt/core/trt_core.h
@@ -11,7 +11,7 @@
 
 namespace trtcv{
     class LITE_EXPORTS TRTYoloFaceV8; // [1] * reference: https://github.com/derronqi/yolov8-face
-    class LITE_EXPORTS TRTYoloV5;     // [2] * reference: https://github.com/derronqi/yolov8-face
+    class LITE_EXPORTS TRTYoloV5;     // [2] * reference: https://github.com/ultralytics/yolov5
 }
 
 namespace trtcv{

From a4a1abf924207ded7b496126b89c0bc1af30e85c Mon Sep 17 00:00:00 2001
From: wangzijian <2087291150@qq.com>
Date: Mon, 22 Jul 2024 03:35:12 +0000
Subject: [PATCH 12/15] free pointer

---
 lite/trt/cv/trt_yolofacev8.cpp | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/lite/trt/cv/trt_yolofacev8.cpp b/lite/trt/cv/trt_yolofacev8.cpp
index 59e8c2e8..26d21d94 100644
--- a/lite/trt/cv/trt_yolofacev8.cpp
+++ b/lite/trt/cv/trt_yolofacev8.cpp
@@ -147,6 +147,9 @@ void TRTYoloFaceV8::detect(const cv::Mat &mat, std::vector<lite::types::Boxf> &b
                     cudaMemcpyHostToDevice, stream);
     bool status = trt_context->enqueueV3(stream);
 
+    delete[] input;
+    input = nullptr;
+
     if (!status){
         std::cerr << "Failed to infer by TensorRT." << std::endl;
         return;
@@ -159,4 +162,8 @@ void TRTYoloFaceV8::detect(const cv::Mat &mat, std::vector<lite::types::Boxf> &b
     // 4. generate box
     generate_box(output,boxes,0.45f,0.5f);
 
+    // free pointer
+    delete[] output;
+    output = nullptr;
+
 }

From a6a439dbdc7446dd252578c16c89f84205eef121 Mon Sep 17 00:00:00 2001
From: wangzijian <2087291150@qq.com>
Date: Mon, 22 Jul 2024 03:35:31 +0000
Subject: [PATCH 13/15] update commiter name

---
 lite/trt/cv/trt_yolov5.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lite/trt/cv/trt_yolov5.cpp b/lite/trt/cv/trt_yolov5.cpp
index 28806fcb..5f621220 100644
--- a/lite/trt/cv/trt_yolov5.cpp
+++ b/lite/trt/cv/trt_yolov5.cpp
@@ -1,5 +1,5 @@
 //
-// Created by root on 7/20/24.
+// Created by wangzijian on 7/20/24.
 //
 
 #include "trt_yolov5.h"

From 763b0b62f545d7113918ac7ab252eb553a91be18 Mon Sep 17 00:00:00 2001
From: DefTruth <31974251+DefTruth@users.noreply.github.com>
Date: Mon, 22 Jul 2024 12:13:42 +0800
Subject: [PATCH 14/15] Update test_lite_yolov5.cpp

---
 examples/lite/cv/test_lite_yolov5.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/lite/cv/test_lite_yolov5.cpp b/examples/lite/cv/test_lite_yolov5.cpp
index 9e017cf4..699c8e6c 100644
--- a/examples/lite/cv/test_lite_yolov5.cpp
+++ b/examples/lite/cv/test_lite_yolov5.cpp
@@ -156,12 +156,12 @@ static void test_tensorrt()
 
 static void test_lite()
 {
-  test_tensorrt();
   test_default();
   test_onnxruntime();
   test_mnn();
   test_ncnn();
   test_tnn();
+  test_tensorrt();
 }
 
 int main(__unused int argc, __unused char *argv[])

From d3aeddf55ff72b14a36b950d42e0e2c83e92bd8e Mon Sep 17 00:00:00 2001
From: wangzijian <2087291150@qq.com>
Date: Mon, 22 Jul 2024 04:53:31 +0000
Subject: [PATCH 15/15] free pointer

---
 lite/trt/cv/trt_yolov5.cpp | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/lite/trt/cv/trt_yolov5.cpp b/lite/trt/cv/trt_yolov5.cpp
index 5f621220..87438466 100644
--- a/lite/trt/cv/trt_yolov5.cpp
+++ b/lite/trt/cv/trt_yolov5.cpp
@@ -145,11 +145,12 @@ void TRTYoloV5::detect(const cv::Mat &mat, std::vector<types::Boxf> &detected_bo
     //1. make the input
     auto input = trtcv::utils::transform::create_tensor(normalized_image,input_node_dims,trtcv::utils::transform::CHW);
 
-
     //2. infer
     cudaMemcpyAsync(buffers[0], input, input_node_dims[0] * input_node_dims[1] * input_node_dims[2] * input_node_dims[3] * sizeof(float),
                     cudaMemcpyHostToDevice, stream);
     cudaStreamSynchronize(stream);
+    delete[] input;
+    input = nullptr;
 
     bool status = trt_context->enqueueV3(stream);
     cudaStreamSynchronize(stream);
@@ -173,6 +174,8 @@ void TRTYoloV5::detect(const cv::Mat &mat, std::vector<types::Boxf> &detected_bo
     std::vector<types::Boxf> bbox_collection;
     generate_bboxes(scale_params, bbox_collection, output, score_threshold, img_height, img_width);
     nms(bbox_collection, detected_boxes, iou_threshold, topk, nms_type);
+    delete[] output;
+    output = nullptr;
 }