fpthink
diff --git a/‎LICENSE
+21 b/‎LICENSE
+21
diff --git a/‎README.md
+120 b/‎README.md
+120
diff --git a/‎V2B_main/__pycache__/test_tracking.cpython-37.pyc
5.68 KB b/‎V2B_main/__pycache__/test_tracking.cpython-37.pyc
5.68 KB
diff --git a/‎V2B_main/__pycache__/test_tracking.cpython-38.pyc
249 Bytes b/‎V2B_main/__pycache__/test_tracking.cpython-38.pyc
249 Bytes
diff --git a/‎V2B_main/__pycache__/train_tracking.cpython-37.pyc
2.52 KB b/‎V2B_main/__pycache__/train_tracking.cpython-37.pyc
2.52 KB
diff --git a/‎V2B_main/__pycache__/train_tracking.cpython-38.pyc
2.71 KB b/‎V2B_main/__pycache__/train_tracking.cpython-38.pyc
2.71 KB
diff --git a/‎V2B_main/datasets/__init__.py b/‎V2B_main/datasets/__init__.py
diff --git a/‎V2B_main/datasets/__pycache__/__init__.cpython-37.pyc
144 Bytes b/‎V2B_main/datasets/__pycache__/__init__.cpython-37.pyc
144 Bytes
diff --git a/‎V2B_main/datasets/__pycache__/__init__.cpython-38.pyc
147 Bytes b/‎V2B_main/datasets/__pycache__/__init__.cpython-38.pyc
147 Bytes
diff --git a/‎V2B_main/datasets/__pycache__/base_dataset.cpython-37.pyc
5.93 KB b/‎V2B_main/datasets/__pycache__/base_dataset.cpython-37.pyc
5.93 KB
diff --git a/‎V2B_main/datasets/__pycache__/get_dataset_v2b.cpython-38.pyc
892 Bytes b/‎V2B_main/datasets/__pycache__/get_dataset_v2b.cpython-38.pyc
892 Bytes
diff --git a/‎V2B_main/datasets/__pycache__/get_v2b_db.cpython-37.pyc
945 Bytes b/‎V2B_main/datasets/__pycache__/get_v2b_db.cpython-37.pyc
945 Bytes
diff --git a/‎V2B_main/datasets/__pycache__/get_v2b_db.cpython-38.pyc
887 Bytes b/‎V2B_main/datasets/__pycache__/get_v2b_db.cpython-38.pyc
887 Bytes
diff --git a/‎V2B_main/datasets/__pycache__/v2b_dataset.cpython-37.pyc
10.6 KB b/‎V2B_main/datasets/__pycache__/v2b_dataset.cpython-37.pyc
10.6 KB
diff --git a/‎V2B_main/datasets/base_dataset.py
+193 b/‎V2B_main/datasets/base_dataset.py
+193
diff --git a/‎V2B_main/datasets/get_v2b_db.py
+25 b/‎V2B_main/datasets/get_v2b_db.py
+25
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2021 fpthink
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
@@ -0,0 +1,120 @@
+# 3D Siamese Voxel-to-BEV Tracker for Sparse Point Clouds
+
+## Introduction
+
+This repository is released for V2B in our [NeurIPS 2021 paper (poster)](https://arxiv.org/pdf/2111.04426.pdf). 
+
+**Note**: In order to make the code structure clearer and more reasonable, we refactored the entire project. If you are more familiar with [P2B](https://github.com/HaozheQi/P2B) and the code of our previously published version, you can continue to refer to the code of the [first version](https://drive.google.com/file/d/1yPjC3hES0pU4pzbQsUv9hbCNYu0p4XUo/view?usp=sharing).
+
+## Environment settings
+* Create an environment for v2b
+```
+conda create -n V2B python=3.7
+conda activate V2B
+```
+
+* Install pytorch and torchvision
+```
+conda install pytorch==1.4.0 torchvision==0.5.0 cudatoolkit=10.0
+```
+
+* Install dependencies.
+```
+pip install -r requirements.txt
+```
+
+* Build `_ext` module.
+```
+cd V2B_main/lib/pointops
+python setup.py install
+cd ../../
+```
+
+## Data preparation
+### [KITTI dataset](https://projet.liris.cnrs.fr/imagine/pub/proceedings/CVPR2012/data/papers/424_O3C-04.pdf)
+* Download the [velodyne](http://www.cvlibs.net/download.php?file=data_tracking_velodyne.zip), [calib](http://www.cvlibs.net/download.php?file=data_tracking_calib.zip) and [label_02](http://www.cvlibs.net/download.php?file=data_tracking_label_2.zip) from [KITTI Tracking](http://www.cvlibs.net/datasets/kitti/eval_tracking.php). Unzip the downloaded files and place them under the same parent folder.
+
+### [nuScenes dataset](https://openaccess.thecvf.com/content_CVPR_2020/papers/Caesar_nuScenes_A_Multimodal_Dataset_for_Autonomous_Driving_CVPR_2020_paper.pdf)
+* Download the Full dataset (v1.0) from [nuScenes](https://www.nuscenes.org/).
+  
+    Note that base on the offical code [nuscenes-devkit](https://github.com/nutonomy/nuscenes-devkit), we modify and use it to convert nuScenes format to KITTI format. It requires metadata from nuScenes-lidarseg. Thus, you should replace *category.json* and *lidarseg.json* in the Full dataset (v1.0). We provide these two json files in the nuscenes_json folder.
+
+    Executing the following code to convert nuScenes format to KITTI format
+    ```
+    cd nuscenes-devkit-master/python-sdk/nuscenes/scripts
+    python export_kitti.py --nusc_dir=<nuScenes dataset path> --nusc_kitti_dir=<output dir> --split=<dataset split>
+    ```
+
+    Note that the parameter of "split" should be "train_track" or "val". In our paper, we use the model trained on the KITTI dataset to evaluate the generalization of the model on the nuScenes dataset.
+	
+### [Waymo open dataset](https://openaccess.thecvf.com/content_CVPR_2020/papers/Sun_Scalability_in_Perception_for_Autonomous_Driving_Waymo_Open_Dataset_CVPR_2020_paper.pdf)
+* We follow the benchmark created by [LiDAR-SOT](https://github.com/TuSimple/LiDAR_SOT) based on the waymo open dataset. You can download and process the waymo dataset as guided by [their code](https://github.com/TuSimple/LiDAR_SOT), and use our code to test model performance on this benchmark.
+* The benchmark they built have many things that we don't use, but the following processing results are necessary:
+```
+[waymo_sot]
+    [benchmark]
+        [validation]
+            [vehicle]
+                bench_list.json
+                easy.json
+                medium.json
+                hard.json
+            [pedestrian]
+                bench_list.json
+                easy.json
+                medium.json
+                hard.json
+    [pc]
+        [raw_pc]
+            Here are some segment.npz files containing raw point cloud data
+    [gt_info]
+        Here are some segment.npz files containing tracklet and bbox data
+```
+
+**Node**: After you get the dataset, please modify the path variable ```data_dir&val_data_dir``` about the dataset under configuration file ```V2B_main/utils/options```.
+
+## Evaluation
+
+Train a new model:
+```
+python main.py --which_dataset KITTI/NUSCENES --category_name category_name
+```
+
+Test a model:
+```
+python main.py --which_dataset KITTI/NUSCENES/WAYMO --category_name category_name --train_test test
+```
+For more preset parameters or command debugging parameters, please refer to the relevant code and change it according to your needs.
+
+**Recommendations**: 
+- We have provided some pre-trained models under ```V2B_main/results``` folder, you can use and test them directly.  
+- Since both kitti and waymo are datasets constructed from 64-line LiDAR, nuScenes is a 32-line LiDAR. We recommend you: train your model on KITTI and verify the generalization ability of your model on waymo. Train on nuScenes or simply skip this dataset. We do not recommend that you verify the generalization ability of your model on nuScenes. 
+
+## Todo
+
+```
+1. Provide visualization codes.
+2. Provide test results on waymo open dataset so that you can use the results directly in your paper.
+```
+
+## Citation
+
+If you find the code or trained models useful, please consider citing:
+
+```
+@inproceedings{hui2021v2b,
+  title={3D Siamese Voxel-to-BEV Tracker for Sparse Point Clouds},
+  author={Hui, Le and Wang, Lingpeng and Cheng, Mingmei and Xie, Jin and Yang, Jian},
+  booktitle={NeurIPS},
+  year={2021}
+}
+```
+
+## Acknowledgements
+
+- Thank Qi for his implementation of [P2B](https://github.com/HaozheQi/P2B).
+- Thank Pang for the [3D-SOT benchmark](https://arxiv.org/pdf/2103.06028.pdf) based on the waymo open dataset.
+
+## License
+This repository is released under MIT License.
+
@@ -0,0 +1,193 @@
+import os
+import pickle
+
+import numpy as np
+import pandas as pd
+from pyquaternion import Quaternion
+
+from torch.utils.data import Dataset
+
+from utils.data_classes import PointCloud, BoundingBox
+
+class kittiDataset():
+    def __init__(self, path, which_dataset):
+        self.which_dataset = which_dataset
+        self.KITTI_Folder = path
+        self.KITTI_velo = os.path.join(self.KITTI_Folder, "velodyne")
+        self.KITTI_label = os.path.join(self.KITTI_Folder, "label_02")
+        self.KITTI_calib = os.path.join(self.KITTI_Folder, "calib")
+
+    def getSceneID(self, split):
+        if self.which_dataset.upper() == 'NUSCENES':
+            if "TRAIN" in split.upper():  
+                # Training SET
+                if "TINY" in split.upper():
+                    sceneID = [0]
+                else:
+                    sceneID = list(range(0, 350))
+            elif "VALID" in split.upper():  
+                # Validation Set
+                if "TINY" in split.upper():
+                    sceneID = [0]
+                else:
+                    sceneID = list(range(0, 10))
+            elif "TEST" in split.upper():  
+                # Testing Set
+                if "TINY" in split.upper():
+                    sceneID = [0]
+                else:
+                    sceneID = list(range(0, 150))
+        else:
+            # KITTI dataset
+            if "TRAIN" in split.upper():  
+                # Training SET
+                if "TINY" in split.upper():
+                    sceneID = list(range(0 ,1))
+                else:
+                    sceneID = list(range(0, 17))
+            elif "VALID" in split.upper():  
+                # Validation Set
+                if "TINY" in split.upper():
+                    sceneID = list(range(0, 1))
+                else:
+                    sceneID = list(range(17, 19))
+            elif "TEST" in split.upper():  
+                # Testing Set
+                if "TINY" in split.upper():
+                    sceneID = list(range(0, 1))
+                else:
+                    sceneID = list(range(19, 21))
+            else:  
+                # Full Dataset
+                sceneID = list(range(21))
+        return sceneID
+
+    def getListOfAnno(self, sceneID, category_name="Car"):
+        list_of_scene = [
+            path for path in os.listdir(self.KITTI_velo)
+            if os.path.isdir(os.path.join(self.KITTI_velo, path)) and
+            int(path) in sceneID
+        ]
+        
+        list_of_tracklet_anno = []
+        for scene in list_of_scene:
+            # read the label file
+            label_file = os.path.join(self.KITTI_label, scene + ".txt")
+            if self.which_dataset.upper()=='NUSCENES':
+                df = pd.read_csv(
+                    label_file,
+                    sep=' ',
+                    names=[
+                        "frame", "track_id", "type", "truncated", "occluded",
+                        "alpha", "bbox_left", "bbox_top", "bbox_right",
+                        "bbox_bottom", "height", "width", "length", "x", "y", "z",
+                        "rotation_y","score",'num_lidar_pts','is_key_frame'
+                    ])
+            else:
+                df = pd.read_csv(
+                    label_file,
+                    sep=' ',
+                    names=[
+                        "frame", "track_id", "type", "truncated", "occluded",
+                        "alpha", "bbox_left", "bbox_top", "bbox_right",
+                        "bbox_bottom", "height", "width", "length", "x", "y", "z",
+                        "rotation_y", "score"
+                    ])
+                
+            df = df[df["type"] == category_name]
+            # insert the scene dim
+            df.insert(loc=0, column="scene", value=scene)
+            for track_id in df.track_id.unique():
+                df_tracklet = df[df["track_id"] == track_id]
+                df_tracklet = df_tracklet.reset_index(drop=True)    
+                tracklet_anno = [anno for index, anno in df_tracklet.iterrows()]
+                list_of_tracklet_anno.append(tracklet_anno)
+
+        return list_of_tracklet_anno
+
+    def read_calib_file(self, filepath):
+        """Read in a calibration file and parse into a dictionary."""
+        data = {}
+        with open(filepath, 'r') as f:
+            for line in f.readlines():
+                values = line.split()
+                
+                try:
+                    ind = values[0].find(':')
+                    if ind != -1:
+                        data[values[0][:ind]] = np.array(
+                            [float(x) for x in values[1:]]).reshape(3, 4)
+                    else:
+                        data[values[0]] = np.array(
+                            [float(x) for x in values[1:]]).reshape(3, 4)
+                except ValueError:
+                    data[values[0]] = np.array(
+                        [float(x) for x in values[1:]]).reshape(3, 3)
+        return data
+
+    def getBBandPC(self, anno):
+        calib_path = os.path.join(self.KITTI_calib, anno['scene'] + ".txt")
+        calib = self.read_calib_file(calib_path)
+        # get the Tr_velo_cam matrix, which transforms the point cloud from the velo coordinate system to the cam coordinate system
+        transf_mat = np.vstack((calib["Tr_velo_cam"], np.array([0, 0, 0, 1])))  # 3*4 --> 4*4
+        PC, bbox = self.getPCandBBfromPandas(anno, transf_mat)
+        return PC, bbox
+
+    def getPCandBBfromPandas(self, box, calib):
+        center = [box["x"], box["y"] - box["height"] / 2, box["z"]] 
+        size = [box["width"], box["length"], box["height"]]
+        orientation = Quaternion(axis=[0, 1, 0], radians=box["rotation_y"]) * Quaternion(axis=[1, 0, 0], radians=np.pi / 2)
+        BB = BoundingBox(center, size, orientation)
+
+        try:
+            # VELODYNE PointCloud
+            velodyne_path = os.path.join(self.KITTI_velo, box["scene"], f'{box["frame"]:06}.bin')
+            PC = PointCloud(np.fromfile(velodyne_path, dtype=np.float32).reshape(-1, 4).T)
+            # use calib(Tr_velo_cam matrix) rotate from the velo coordinate system to the cam coordinate system
+            PC.transform(calib) 
+        except :
+            PC = PointCloud(np.array([[0, 0, 0]]).T)
+
+        return PC, BB
+
+    def load_data(self, path):
+        file = open(path, "rb")
+        data = pickle.load(file)
+        file.close()
+        return data
+
+    def save_data(self, path, data):
+        file = open(path, "wb")
+        pickle.dump(data, file)
+        file.close()
+
+class BaseDataset(Dataset):
+    def __init__(self, which_dataset, path, split, category_name="Car", offset_BB=np.zeros(1), scale_BB=np.ones(1)):
+
+        self.dataset = kittiDataset(path=path, which_dataset=which_dataset)
+
+        self.split = split
+        self.category_name = category_name
+
+        self.getBBandPC = self.dataset.getBBandPC
+
+        self.sceneID = self.dataset.getSceneID(split=split)
+        
+        '''every anno include:
+        "sceneID", "frame", "track_id", "type", 
+        "truncated", "occluded", "alpha", 
+        "bbox_left", "bbox_top", "bbox_right", "bbox_bottom", 
+        "height", "width", "length", "x", "y", "z", "rotation_y"
+        '''
+        # list, every object is a tracklet anno
+        self.list_of_tracklet_anno = self.dataset.getListOfAnno(self.sceneID, category_name)
+        self.list_of_anno = [
+            anno for tracklet_anno in self.list_of_tracklet_anno
+            for anno in tracklet_anno
+        ]
+
+    def isTiny(self):
+        return ("TINY" in self.split.upper())
+
+    def __getitem__(self, index):
+        return self.getitem(index)
@@ -0,0 +1,25 @@
+from torch.utils.data import DataLoader
+from datasets.v2b_dataset import TrainDataset, TestDataset, TestDataset_WOD
+
+def get_dataset(opts, partition, shuffle=False):
+    loader, db = None, None
+    
+    if opts.use_tiny:
+        split = "Tiny_" + partition
+    else:
+        split = "Full_" + partition
+    
+    
+    if partition in ["Train", "Valid"]:
+        db = TrainDataset(opts, split)
+        loader = DataLoader(db, batch_size=opts.batch_size, shuffle=shuffle, num_workers=opts.n_workers, pin_memory=True)
+    else:
+        # Test dataset
+        if opts.which_dataset.upper() in ['KITTI', 'NUSCENES']:
+            db = TestDataset(opts, split)
+            loader = DataLoader(db, batch_size=1, shuffle=shuffle, num_workers=opts.n_workers, pin_memory=True, collate_fn=lambda x: x)
+        else:
+            # waymo test
+            db = TestDataset_WOD(opts, pc_type='raw_pc')
+
+    return loader, db