From c5f00a8d7091b46b0ecd175f722d98ea8bea485e Mon Sep 17 00:00:00 2001
From: HaodongDuan <duanhaodong@sensetime.com>
Date: Fri, 16 Oct 2020 17:35:47 +0800
Subject: [PATCH 1/3] resolve comments

---
 tools/data/hvu/generate_sub_file_list.py | 49 ++++++++++++++++++++++++
 1 file changed, 49 insertions(+)
 create mode 100644 tools/data/hvu/generate_sub_file_list.py

diff --git a/tools/data/hvu/generate_sub_file_list.py b/tools/data/hvu/generate_sub_file_list.py
new file mode 100644
index 0000000000..77c7bed651
--- /dev/null
+++ b/tools/data/hvu/generate_sub_file_list.py
@@ -0,0 +1,49 @@
+import argparse
+import os.path as osp
+
+import mmcv
+
+
+def main(annotation_file, category):
+    assert category in [
+        'action', 'attribute', 'concept', 'event', 'object', 'scene'
+    ]
+
+    data = mmcv.load(annotation_file)
+    basename = osp.basename(annotation_file)
+    dirname = osp.dirname(annotation_file)
+    basename = basename.replace('hvu', f'hvu_{category}')
+
+    target_file = osp.join(dirname, basename)
+
+    def parse_item(item, category):
+        label = item['label']
+        if category in label:
+            item['label'] = label[category]
+            return item
+        else:
+            return None
+
+    result = []
+    for item in data:
+        label = item['label']
+        if category in label:
+            item['label'] = label[category]
+            result.append(item)
+
+    mmcv.dump(data, target_file)
+
+
+if __name__ == '__main__':
+    description = 'Helper script for generating HVU per-category file list.'
+    p = argparse.ArgumentParser(description=description)
+    p.add_argument(
+        'annotation_file',
+        type=str,
+        help=('The annotation file which contains tags of all categories.'))
+    p.add_argument(
+        'category',
+        type=str,
+        choices=['action', 'attribute', 'concept', 'event', 'object', 'scene'],
+        help='The tag category that you want to generate file list for.')
+    main(**vars(p.parse_args()))

From 05575c18bf7dbba7e6e9f9a0f1c4f973668d2c44 Mon Sep 17 00:00:00 2001
From: HaodongDuan <duanhaodong@sensetime.com>
Date: Fri, 16 Oct 2020 17:37:19 +0800
Subject: [PATCH 2/3] update changelog

---
 docs/changelog.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/changelog.md b/docs/changelog.md
index d0fe20a249..3cff48664d 100644
--- a/docs/changelog.md
+++ b/docs/changelog.md
@@ -4,6 +4,7 @@
 
 **Improvements**
 - Set default values of 'average_clips' in each config file so that there is no need to set it explicitly during testing in most cases ([#232](https://github.com/open-mmlab/mmaction2/pull/232))
+- Extend HVU datatools to generate individual file list for each tag category ([#258](https://github.com/open-mmlab/mmaction2/pull/258))
 
 **Bug Fixes**
 - Fix the potential bug for default value in dataset_setting ([#245](https://github.com/open-mmlab/mmaction2/pull/245))

From d5218d48d35374fee76d2368d38c7c234cc40738 Mon Sep 17 00:00:00 2001
From: Kenny <dhd.efz@gmail.com>
Date: Thu, 18 Mar 2021 20:18:25 +0800
Subject: [PATCH 3/3] update

---
 tools/data/hvu/README_zh_CN.md | 110 +++++++++++++++++++++++++++++++++
 1 file changed, 110 insertions(+)
 create mode 100644 tools/data/hvu/README_zh_CN.md

diff --git a/tools/data/hvu/README_zh_CN.md b/tools/data/hvu/README_zh_CN.md
new file mode 100644
index 0000000000..14ba22b3d2
--- /dev/null
+++ b/tools/data/hvu/README_zh_CN.md
@@ -0,0 +1,110 @@
+# 准备 HVU 数据集
+
+## 简介
+
+[DATASET]
+
+```BibTeX
+@article{Diba2019LargeSH,
+  title={Large Scale Holistic Video Understanding},
+  author={Ali Diba and M. Fayyaz and Vivek Sharma and Manohar Paluri and Jurgen Gall and R. Stiefelhagen and L. Gool},
+  journal={arXiv: Computer Vision and Pattern Recognition},
+  year={2019}
+}
+```
+
+请参照 [官方项目](https://github.com/holistic-video-understanding/HVU-Dataset/) 及 [原论文](https://arxiv.org/abs/1904.11451) 以获取数据集基本信息。
+在开始之前，用户需确保当前目录为 `$MMACTION2/tools/data/hvu/`。
+
+## 1. 准备标注文件
+
+首先，用户可以使用如下脚本下载标注文件并进行预处理：
+
+```shell
+bash download_annotations.sh
+```
+
+此外，用户可使用如下命令解析 HVU 的标签列表：
+
+```shell
+python parse_tag_list.py
+```
+
+## 2. 准备视频
+
+用户可以使用以下脚本准备视频，视频准备代码修改自 [ActivityNet 爬虫](https://github.com/activitynet/ActivityNet/tree/master/Crawler/Kinetics)。
+注意这一步骤将花费较长时间。
+
+```shell
+bash download_videos.sh
+```
+
+## 3. 提取 RGB 帧和光流
+
+如果用户仅使用 video loader，则可以跳过本步。
+
+在提取之前，请参考 [安装教程](/docs_zh_CN/install.md) 安装 [denseflow](https://github.com/open-mmlab/denseflow)。
+
+用户可使用如下脚本同时抽取 RGB 帧和光流：
+
+```shell
+bash extract_frames.sh
+```
+
+该脚本默认生成短边长度为 256 的帧，可参考 [数据准备](/docs_zh_CN/data_preparation.md) 获得更多细节。
+
+## 4. 生成文件列表
+
+用户可以使用以下两个脚本分别为视频和帧文件夹生成文件列表：
+
+```shell
+bash generate_videos_filelist.sh
+# 为帧文件夹生成文件列表
+bash generate_rawframes_filelist.sh
+```
+
+## 5. 为每个 tag 种类生成文件列表
+
+若用户需要为 HVU 数据集的每个 tag 种类训练识别模型，则需要进行此步骤。
+
+步骤 4 中生成的文件列表包含不同类型的标签，仅支持使用 HVUDataset 进行涉及多个标签种类的多任务学习。加载数据的过程中需要使用 `LoadHVULabel` 类进行多类别标签的加载，训练过程中使用 `HVULoss` 作为损失函数。
+
+如果用户仅需训练某一特定类别的标签，例如训练一识别模型用于识别 HVU 中 `action` 类别的标签，则建议使用如下脚本为特定标签种类生成文件列表。新生成的列表将只含有特定类别的标签，因此可使用 `VideoDataset` 或 `RawframeDataset` 进行加载。训训练过程中使用 `BCELossWithLogits` 作为损失函数。
+
+以下脚本为类别为 ${category} 的标签生成文件列表，注意仅支持 HVU 数据集包含的 6 种标签类别: action, attribute, concept, event, object, scene。
+
+```shell
+python generate_sub_file_list.py path/to/filelist.json ${category}
+```
+
+对于类别 ${category}，生成的标签列表文件名中将使用 `hvu_${category}` 替代 `hvu`。例如，若原指定文件名为 `hvu_train.json`，则对于类别 action，生成的文件列表名为 `hvu_action_train.json`。
+
+## 6. 目录结构
+
+在完整完成 HVU 的数据处理后，将得到帧文件夹（RGB 帧和光流帧），视频以及标注文件。
+
+在整个项目目录下（仅针对 HVU），完整目录结构如下所示：
+
+```
+mmaction2
+├── mmaction
+├── tools
+├── configs
+├── data
+│   ├── hvu
+│   │   ├── hvu_train_video.json
+│   │   ├── hvu_val_video.json
+│   │   ├── hvu_train.json
+│   │   ├── hvu_val.json
+│   │   ├── annotations
+│   │   ├── videos_train
+│   │   │   ├── OLpWTpTC4P8_000570_000670.mp4
+│   │   │   ├── xsPKW4tZZBc_002330_002430.mp4
+│   │   │   ├── ...
+│   │   ├── videos_val
+│   │   ├── rawframes_train
+│   │   ├── rawframes_val
+
+```
+
+关于 HVU 数据集上的训练与测试，请参照 [基础教程](/docs_zh_CN/getting_started.md)。