From f6b9f88c8b37252bd45f186c6dbe071d359f3e79 Mon Sep 17 00:00:00 2001 From: HaodongDuan Date: Sun, 26 Jul 2020 11:10:41 +0800 Subject: [PATCH 1/7] add training & testing config --- .../tsn/data_benchmark/tsn_fix_multi_train.py | 119 ++++++++++++++++++ .../data_benchmark/tsn_fix_resize_train.py | 110 ++++++++++++++++ .../tsn/data_benchmark/tsn_fix_test_10c.py | 53 ++++++++ .../tsn/data_benchmark/tsn_fix_test_3c.py | 49 ++++++++ .../tsn/data_benchmark/tsn_se_multi_train.py | 115 +++++++++++++++++ .../tsn/data_benchmark/tsn_se_resize_train.py | 114 +++++++++++++++++ .../tsn/data_benchmark/tsn_se_test_10c.py | 53 ++++++++ .../tsn/data_benchmark/tsn_se_test_3c.py | 53 ++++++++ 8 files changed, 666 insertions(+) create mode 100644 configs/recognition/tsn/data_benchmark/tsn_fix_multi_train.py create mode 100644 configs/recognition/tsn/data_benchmark/tsn_fix_resize_train.py create mode 100644 configs/recognition/tsn/data_benchmark/tsn_fix_test_10c.py create mode 100644 configs/recognition/tsn/data_benchmark/tsn_fix_test_3c.py create mode 100644 configs/recognition/tsn/data_benchmark/tsn_se_multi_train.py create mode 100644 configs/recognition/tsn/data_benchmark/tsn_se_resize_train.py create mode 100644 configs/recognition/tsn/data_benchmark/tsn_se_test_10c.py create mode 100644 configs/recognition/tsn/data_benchmark/tsn_se_test_3c.py diff --git a/configs/recognition/tsn/data_benchmark/tsn_fix_multi_train.py b/configs/recognition/tsn/data_benchmark/tsn_fix_multi_train.py new file mode 100644 index 0000000000..e5e73a998c --- /dev/null +++ b/configs/recognition/tsn/data_benchmark/tsn_fix_multi_train.py @@ -0,0 +1,119 @@ +# model settings +model = dict( + type='Recognizer2D', + backbone=dict( + type='ResNet', + pretrained='torchvision://resnet50', + depth=50, + norm_eval=False), + cls_head=dict( + type='TSNHead', + num_classes=400, + in_channels=2048, + spatial_type='avg', + consensus=dict(type='AvgConsensus', dim=1), + dropout_ratio=0.4, + init_std=0.01)) +# model training and testing settings +train_cfg = None +test_cfg = dict(average_clips=None) +# dataset settings +dataset_type = 'RawframeDataset' +data_root = 'data/kinetics400/rawframes_train' +data_root_val = 'data/kinetics400/rawframes_val' +ann_file_train = 'data/kinetics400/kinetics400_train_list_rawframes.txt' +ann_file_val = 'data/kinetics400/kinetics400_val_list_rawframes.txt' +ann_file_test = 'data/kinetics400/kinetics400_val_list_rawframes.txt' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_bgr=False) +train_pipeline = [ + dict(type='SampleFrames', clip_len=1, frame_interval=1, num_clips=3), + dict(type='FrameSelector'), + dict(type='Resize', scale=(-1, 256)), + dict( + type='MultiScaleCrop', + input_size=224, + scales=(1, 0.875, 0.75, 0.66), + random_crop=False, + max_wh_scale_gap=1), + dict(type='Resize', scale=(224, 224), keep_ratio=False), + dict(type='Flip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='FormatShape', input_format='NCHW'), + dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]), + dict(type='ToTensor', keys=['imgs', 'label']) +] +val_pipeline = [ + dict( + type='SampleFrames', + clip_len=1, + frame_interval=1, + num_clips=3, + test_mode=True), + dict(type='FrameSelector'), + dict(type='Resize', scale=(-1, 256)), + dict(type='CenterCrop', crop_size=224), + dict(type='Flip', flip_ratio=0), + dict(type='Normalize', **img_norm_cfg), + dict(type='FormatShape', input_format='NCHW'), + dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]), + dict(type='ToTensor', keys=['imgs']) +] +test_pipeline = [ + dict( + type='SampleFrames', + clip_len=1, + frame_interval=1, + num_clips=25, + test_mode=True), + dict(type='FrameSelector'), + dict(type='Resize', scale=(-1, 256)), + dict(type='TenCrop', crop_size=224), + dict(type='Flip', flip_ratio=0), + dict(type='Normalize', **img_norm_cfg), + dict(type='FormatShape', input_format='NCHW'), + dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]), + dict(type='ToTensor', keys=['imgs']) +] +data = dict( + videos_per_gpu=32, + workers_per_gpu=4, + train=dict( + type=dataset_type, + ann_file=ann_file_train, + data_prefix=data_root, + pipeline=train_pipeline), + val=dict( + type=dataset_type, + ann_file=ann_file_val, + data_prefix=data_root_val, + pipeline=val_pipeline), + test=dict( + type=dataset_type, + ann_file=ann_file_test, + data_prefix=data_root_val, + pipeline=test_pipeline)) +# optimizer +optimizer = dict( + type='SGD', lr=0.01, momentum=0.9, + weight_decay=0.0001) # this lr is used for 8 gpus +optimizer_config = dict(grad_clip=dict(max_norm=40, norm_type=2)) +# learning policy +lr_config = dict(policy='step', step=[40, 80]) +total_epochs = 100 +checkpoint_config = dict(interval=5) +evaluation = dict( + interval=5, metrics=['top_k_accuracy', 'mean_class_accuracy'], topk=(1, 5)) +log_config = dict( + interval=20, + hooks=[ + dict(type='TextLoggerHook'), + # dict(type='TensorboardLoggerHook'), + ]) +# runtime settings +dist_params = dict(backend='nccl') +log_level = 'INFO' +work_dir = './work_dirs/tsn_fix_multi_train/' +load_from = None +resume_from = None +workflow = [('train', 1)] diff --git a/configs/recognition/tsn/data_benchmark/tsn_fix_resize_train.py b/configs/recognition/tsn/data_benchmark/tsn_fix_resize_train.py new file mode 100644 index 0000000000..2f43aee758 --- /dev/null +++ b/configs/recognition/tsn/data_benchmark/tsn_fix_resize_train.py @@ -0,0 +1,110 @@ +# model settings +model = dict( + type='Recognizer2D', + backbone=dict( + type='ResNet', + pretrained='torchvision://resnet50', + depth=50, + norm_eval=False), + cls_head=dict( + type='TSNHead', + num_classes=400, + in_channels=2048, + spatial_type='avg', + consensus=dict(type='AvgConsensus', dim=1), + dropout_ratio=0.4, + init_std=0.01)) +# model training and testing settings +train_cfg = None +test_cfg = dict(average_clips=None) +# dataset settings +dataset_type = 'RawframeDataset' +data_root = 'data/kinetics400/rawframes_train' +data_root_val = 'data/kinetics400/rawframes_val' +ann_file_train = 'data/kinetics400/kinetics400_train_list_rawframes.txt' +ann_file_val = 'data/kinetics400/kinetics400_val_list_rawframes.txt' +ann_file_test = 'data/kinetics400/kinetics400_val_list_rawframes.txt' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_bgr=False) +train_pipeline = [ + dict(type='SampleFrames', clip_len=1, frame_interval=1, num_clips=3), + dict(type='FrameSelector'), + dict(type='Resize', scale=(-1, 256)), + dict(type='RandomResizedCrop'), + dict(type='Resize', scale=(224, 224), keep_ratio=False), + dict(type='Flip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='FormatShape', input_format='NCHW'), + dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]), + dict(type='ToTensor', keys=['imgs', 'label']) +] +val_pipeline = [ + dict( + type='SampleFrames', + clip_len=1, + frame_interval=1, + num_clips=3, + test_mode=True), + dict(type='FrameSelector'), + dict(type='Resize', scale=(-1, 256)), + dict(type='CenterCrop', crop_size=224), + dict(type='Flip', flip_ratio=0), + dict(type='Normalize', **img_norm_cfg), + dict(type='FormatShape', input_format='NCHW'), + dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]), + dict(type='ToTensor', keys=['imgs']) +] +test_pipeline = [ + dict( + type='SampleFrames', + clip_len=1, + frame_interval=1, + num_clips=25, + test_mode=True), + dict(type='FrameSelector'), + dict(type='Resize', scale=(-1, 256)), + dict(type='TenCrop', crop_size=224), + dict(type='Flip', flip_ratio=0), + dict(type='Normalize', **img_norm_cfg), + dict(type='FormatShape', input_format='NCHW'), + dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]), + dict(type='ToTensor', keys=['imgs']) +] +data = dict( + videos_per_gpu=32, + workers_per_gpu=4, + train=dict( + type=dataset_type, + ann_file=ann_file_train, + data_prefix=data_root, + pipeline=train_pipeline), + val=dict( + type=dataset_type, + ann_file=ann_file_val, + data_prefix=data_root_val, + pipeline=val_pipeline), + test=dict( + type=dataset_type, + ann_file=ann_file_test, + data_prefix=data_root_val, + pipeline=test_pipeline)) +# optimizer +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) +optimizer_config = dict(grad_clip=dict(max_norm=40, norm_type=2)) +# learning policy +lr_config = dict(policy='step', step=[40, 80]) +total_epochs = 100 +checkpoint_config = dict(interval=5) +evaluation = dict( + interval=5, metrics=['top_k_accuracy', 'mean_class_accuracy'], topk=(1, 5)) +log_config = dict( + interval=20, hooks=[ + dict(type='TextLoggerHook'), + ]) +# runtime settings +dist_params = dict(backend='nccl') +log_level = 'INFO' +work_dir = './work_dirs/tsn_fix_resize_train' +load_from = None +resume_from = None +workflow = [('train', 5)] diff --git a/configs/recognition/tsn/data_benchmark/tsn_fix_test_10c.py b/configs/recognition/tsn/data_benchmark/tsn_fix_test_10c.py new file mode 100644 index 0000000000..84660c44af --- /dev/null +++ b/configs/recognition/tsn/data_benchmark/tsn_fix_test_10c.py @@ -0,0 +1,53 @@ +# model settings +model = dict( + type='Recognizer2D', + backbone=dict( + type='ResNet', + pretrained='torchvision://resnet50', + depth=50, + norm_eval=False), + cls_head=dict( + type='TSNHead', + num_classes=400, + in_channels=2048, + spatial_type='avg', + consensus=dict(type='AvgConsensus', dim=1), + dropout_ratio=0.4, + init_std=0.01)) +# model training and testing settings +train_cfg = None +test_cfg = dict(average_clips=None) +# dataset settings +dataset_type = 'RawframeDataset' +data_root_val = 'data/kinetics400/rawframes_val' +ann_file_test = 'data/kinetics400/kinetics400_val_list_rawframes.txt' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_bgr=False) +mc_cfg = dict( + server_list_cfg='/mnt/lustre/share/memcached_client/server_list.conf', + client_cfg='/mnt/lustre/share/memcached_client/client.conf', + sys_path='/mnt/lustre/share/pymc/py3') +test_pipeline = [ + dict( + type='SampleFrames', + clip_len=1, + frame_interval=1, + num_clips=25, + test_mode=True), + dict(type='FrameSelector'), + dict(type='Resize', scale=(-1, 256)), + dict(type='TenCrop', crop_size=224), + dict(type='Flip', flip_ratio=0), + dict(type='Normalize', **img_norm_cfg), + dict(type='FormatShape', input_format='NCHW'), + dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]), + dict(type='ToTensor', keys=['imgs']) +] +data = dict( + workers_per_gpu=4, + test=dict( + type=dataset_type, + ann_file=ann_file_test, + data_prefix=data_root_val, + pipeline=test_pipeline)) +dist_params = dict(backend='nccl') diff --git a/configs/recognition/tsn/data_benchmark/tsn_fix_test_3c.py b/configs/recognition/tsn/data_benchmark/tsn_fix_test_3c.py new file mode 100644 index 0000000000..16f5f97673 --- /dev/null +++ b/configs/recognition/tsn/data_benchmark/tsn_fix_test_3c.py @@ -0,0 +1,49 @@ +# model settings +model = dict( + type='Recognizer2D', + backbone=dict( + type='ResNet', + pretrained='torchvision://resnet50', + depth=50, + norm_eval=False), + cls_head=dict( + type='TSNHead', + num_classes=400, + in_channels=2048, + spatial_type='avg', + consensus=dict(type='AvgConsensus', dim=1), + dropout_ratio=0.4, + init_std=0.01)) +# model training and testing settings +train_cfg = None +test_cfg = dict(average_clips=None) +# dataset settings +dataset_type = 'RawframeDataset' +data_root_val = 'data/kinetics400/rawframes_val' +ann_file_test = 'data/kinetics400/kinetics400_val_list_rawframes.txt' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_bgr=False) +test_pipeline = [ + dict( + type='SampleFrames', + clip_len=1, + frame_interval=1, + num_clips=25, + test_mode=True), + dict(type='FrameSelector'), + dict(type='Resize', scale=(-1, 256)), + dict(type='ThreeCrop', crop_size=256), + dict(type='Flip', flip_ratio=0), + dict(type='Normalize', **img_norm_cfg), + dict(type='FormatShape', input_format='NCHW'), + dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]), + dict(type='ToTensor', keys=['imgs']) +] +data = dict( + workers_per_gpu=4, + test=dict( + type=dataset_type, + ann_file=ann_file_test, + data_prefix=data_root_val, + pipeline=test_pipeline)) +dist_params = dict(backend='nccl') diff --git a/configs/recognition/tsn/data_benchmark/tsn_se_multi_train.py b/configs/recognition/tsn/data_benchmark/tsn_se_multi_train.py new file mode 100644 index 0000000000..b6bc86f647 --- /dev/null +++ b/configs/recognition/tsn/data_benchmark/tsn_se_multi_train.py @@ -0,0 +1,115 @@ +# model settings +model = dict( + type='Recognizer2D', + backbone=dict( + type='ResNet', + pretrained='torchvision://resnet50', + depth=50, + norm_eval=False), + cls_head=dict( + type='TSNHead', + num_classes=400, + in_channels=2048, + spatial_type='avg', + consensus=dict(type='AvgConsensus', dim=1), + dropout_ratio=0.4, + init_std=0.01)) +# model training and testing settings +train_cfg = None +test_cfg = dict(average_clips=None) +# dataset settings +dataset_type = 'RawframeDataset' +data_root = 'data/kinetics400/rawframes_train_320p' +data_root_val = 'data/kinetics400/rawframes_val_320p' +ann_file_train = 'data/kinetics400/kinetics400_train_list_rawframes_320p.txt' +ann_file_val = 'data/kinetics400/kinetics400_val_list_rawframes_320p.txt' +ann_file_test = 'data/kinetics400/kinetics400_val_list_rawframes_320p.txt' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_bgr=False) +train_pipeline = [ + dict(type='SampleFrames', clip_len=1, frame_interval=1, num_clips=3), + dict(type='FrameSelector'), + dict(type='Resize', scale=(-1, 256)), + dict( + type='MultiScaleCrop', + input_size=224, + scales=(1, 0.875, 0.75, 0.66), + random_crop=False, + max_wh_scale_gap=1), + dict(type='Resize', scale=(224, 224), keep_ratio=False), + dict(type='Flip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='FormatShape', input_format='NCHW'), + dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]), + dict(type='ToTensor', keys=['imgs', 'label']) +] +val_pipeline = [ + dict( + type='SampleFrames', + clip_len=1, + frame_interval=1, + num_clips=3, + test_mode=True), + dict(type='FrameSelector'), + dict(type='Resize', scale=(-1, 256)), + dict(type='CenterCrop', crop_size=224), + dict(type='Flip', flip_ratio=0), + dict(type='Normalize', **img_norm_cfg), + dict(type='FormatShape', input_format='NCHW'), + dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]), + dict(type='ToTensor', keys=['imgs']) +] +test_pipeline = [ + dict( + type='SampleFrames', + clip_len=1, + frame_interval=1, + num_clips=25, + test_mode=True), + dict(type='FrameSelector'), + dict(type='Resize', scale=(-1, 256)), + dict(type='TenCrop', crop_size=224), + dict(type='Flip', flip_ratio=0), + dict(type='Normalize', **img_norm_cfg), + dict(type='FormatShape', input_format='NCHW'), + dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]), + dict(type='ToTensor', keys=['imgs']) +] +data = dict( + videos_per_gpu=32, + workers_per_gpu=4, + train=dict( + type=dataset_type, + ann_file=ann_file_train, + data_prefix=data_root, + pipeline=train_pipeline), + val=dict( + type=dataset_type, + ann_file=ann_file_val, + data_prefix=data_root_val, + pipeline=val_pipeline), + test=dict( + type=dataset_type, + ann_file=ann_file_test, + data_prefix=data_root_val, + pipeline=test_pipeline)) +# optimizer +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) +optimizer_config = dict(grad_clip=dict(max_norm=40, norm_type=2)) +# learning policy +lr_config = dict(policy='step', step=[40, 80]) +total_epochs = 100 +checkpoint_config = dict(interval=5) +evaluation = dict( + interval=5, metrics=['top_k_accuracy', 'mean_class_accuracy'], topk=(1, 5)) +log_config = dict( + interval=20, hooks=[ + dict(type='TextLoggerHook'), + ]) +# runtime settings +dist_params = dict(backend='nccl') +log_level = 'INFO' +work_dir = './work_dirs/tsn_se_multi_train/' +load_from = None +resume_from = None +workflow = [('train', 5)] diff --git a/configs/recognition/tsn/data_benchmark/tsn_se_resize_train.py b/configs/recognition/tsn/data_benchmark/tsn_se_resize_train.py new file mode 100644 index 0000000000..5d13536613 --- /dev/null +++ b/configs/recognition/tsn/data_benchmark/tsn_se_resize_train.py @@ -0,0 +1,114 @@ +# model settings +model = dict( + type='Recognizer2D', + backbone=dict( + type='ResNet', + pretrained='torchvision://resnet50', + depth=50, + norm_eval=False), + cls_head=dict( + type='TSNHead', + num_classes=400, + in_channels=2048, + spatial_type='avg', + consensus=dict(type='AvgConsensus', dim=1), + dropout_ratio=0.4, + init_std=0.01)) +# model training and testing settings +train_cfg = None +test_cfg = dict(average_clips=None) +# dataset settings +dataset_type = 'RawframeDataset' +data_root = 'data/kinetics400/rawframes_train_320p' +data_root_val = 'data/kinetics400/rawframes_val_320p' +ann_file_train = 'data/kinetics400/kinetics400_train_list_rawframes_320p.txt' +ann_file_val = 'data/kinetics400/kinetics400_val_list_rawframes_320p.txt' +ann_file_test = 'data/kinetics400/kinetics400_val_list_rawframes_320p.txt' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_bgr=False) +train_pipeline = [ + dict(type='SampleFrames', clip_len=1, frame_interval=1, num_clips=3), + dict(type='FrameSelector'), + dict(type='Resize', scale=(-1, 256)), + dict(type='RandomResizedCrop'), + dict(type='Resize', scale=(224, 224), keep_ratio=False), + dict(type='Flip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='FormatShape', input_format='NCHW'), + dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]), + dict(type='ToTensor', keys=['imgs', 'label']) +] +val_pipeline = [ + dict( + type='SampleFrames', + clip_len=1, + frame_interval=1, + num_clips=3, + test_mode=True), + dict(type='FrameSelector'), + dict(type='Resize', scale=(-1, 256)), + dict(type='CenterCrop', crop_size=256), + dict(type='Flip', flip_ratio=0), + dict(type='Normalize', **img_norm_cfg), + dict(type='FormatShape', input_format='NCHW'), + dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]), + dict(type='ToTensor', keys=['imgs']) +] +test_pipeline = [ + dict( + type='SampleFrames', + clip_len=1, + frame_interval=1, + num_clips=25, + test_mode=True), + dict(type='FrameSelector'), + dict(type='Resize', scale=(-1, 256)), + dict(type='ThreeCrop', crop_size=256), + dict(type='Flip', flip_ratio=0), + dict(type='Normalize', **img_norm_cfg), + dict(type='FormatShape', input_format='NCHW'), + dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]), + dict(type='ToTensor', keys=['imgs']) +] +data = dict( + videos_per_gpu=32, + workers_per_gpu=4, + train=dict( + type=dataset_type, + ann_file=ann_file_train, + data_prefix=data_root, + pipeline=train_pipeline), + val=dict( + type=dataset_type, + ann_file=ann_file_val, + data_prefix=data_root_val, + pipeline=val_pipeline), + test=dict( + type=dataset_type, + ann_file=ann_file_test, + data_prefix=data_root_val, + pipeline=test_pipeline)) +# optimizer +optimizer = dict( + type='SGD', lr=0.01, momentum=0.9, + weight_decay=0.0001) # this lr is used for 8 gpus +optimizer_config = dict(grad_clip=dict(max_norm=40, norm_type=2)) +# learning policy +lr_config = dict(policy='step', step=[40, 80]) +total_epochs = 100 +checkpoint_config = dict(interval=5) +evaluation = dict( + interval=5, metrics=['top_k_accuracy', 'mean_class_accuracy'], topk=(1, 5)) +log_config = dict( + interval=20, + hooks=[ + dict(type='TextLoggerHook'), + # dict(type='TensorboardLoggerHook'), + ]) +# runtime settings +dist_params = dict(backend='nccl') +log_level = 'INFO' +work_dir = './work_dirs/tsn_se_resize_train/' +load_from = None +resume_from = None +workflow = [('train', 1)] diff --git a/configs/recognition/tsn/data_benchmark/tsn_se_test_10c.py b/configs/recognition/tsn/data_benchmark/tsn_se_test_10c.py new file mode 100644 index 0000000000..7364bef41a --- /dev/null +++ b/configs/recognition/tsn/data_benchmark/tsn_se_test_10c.py @@ -0,0 +1,53 @@ +# model settings +model = dict( + type='Recognizer2D', + backbone=dict( + type='ResNet', + pretrained='torchvision://resnet50', + depth=50, + norm_eval=False), + cls_head=dict( + type='TSNHead', + num_classes=400, + in_channels=2048, + spatial_type='avg', + consensus=dict(type='AvgConsensus', dim=1), + dropout_ratio=0.4, + init_std=0.01)) +# model training and testing settings +train_cfg = None +test_cfg = dict(average_clips=None) +# dataset settings +dataset_type = 'RawframeDataset' +data_root_val = 'data/kinetics400/rawframes_val_320p' +ann_file_test = 'data/kinetics400/kinetics400_val_list_rawframes_320p.txt' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_bgr=False) +mc_cfg = dict( + server_list_cfg='/mnt/lustre/share/memcached_client/server_list.conf', + client_cfg='/mnt/lustre/share/memcached_client/client.conf', + sys_path='/mnt/lustre/share/pymc/py3') +test_pipeline = [ + dict( + type='SampleFrames', + clip_len=1, + frame_interval=1, + num_clips=25, + test_mode=True), + dict(type='FrameSelector'), + dict(type='Resize', scale=(-1, 256)), + dict(type='TenCrop', crop_size=224), + dict(type='Flip', flip_ratio=0), + dict(type='Normalize', **img_norm_cfg), + dict(type='FormatShape', input_format='NCHW'), + dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]), + dict(type='ToTensor', keys=['imgs']) +] +data = dict( + workers_per_gpu=4, + test=dict( + type=dataset_type, + ann_file=ann_file_test, + data_prefix=data_root_val, + pipeline=test_pipeline)) +dist_params = dict(backend='nccl') diff --git a/configs/recognition/tsn/data_benchmark/tsn_se_test_3c.py b/configs/recognition/tsn/data_benchmark/tsn_se_test_3c.py new file mode 100644 index 0000000000..8b29e28e27 --- /dev/null +++ b/configs/recognition/tsn/data_benchmark/tsn_se_test_3c.py @@ -0,0 +1,53 @@ +# model settings +model = dict( + type='Recognizer2D', + backbone=dict( + type='ResNet', + pretrained='torchvision://resnet50', + depth=50, + norm_eval=False), + cls_head=dict( + type='TSNHead', + num_classes=400, + in_channels=2048, + spatial_type='avg', + consensus=dict(type='AvgConsensus', dim=1), + dropout_ratio=0.4, + init_std=0.01)) +# model training and testing settings +train_cfg = None +test_cfg = dict(average_clips=None) +# dataset settings +dataset_type = 'RawframeDataset' +data_root_val = 'data/kinetics400/rawframes_val_320p' +ann_file_test = 'data/kinetics400/kinetics400_val_list_rawframes_320p.txt' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_bgr=False) +mc_cfg = dict( + server_list_cfg='/mnt/lustre/share/memcached_client/server_list.conf', + client_cfg='/mnt/lustre/share/memcached_client/client.conf', + sys_path='/mnt/lustre/share/pymc/py3') +test_pipeline = [ + dict( + type='SampleFrames', + clip_len=1, + frame_interval=1, + num_clips=25, + test_mode=True), + dict(type='FrameSelector'), + dict(type='Resize', scale=(-1, 256)), + dict(type='ThreeCrop', crop_size=256), + dict(type='Flip', flip_ratio=0), + dict(type='Normalize', **img_norm_cfg), + dict(type='FormatShape', input_format='NCHW'), + dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]), + dict(type='ToTensor', keys=['imgs']) +] +data = dict( + workers_per_gpu=4, + test=dict( + type=dataset_type, + ann_file=ann_file_test, + data_prefix=data_root_val, + pipeline=test_pipeline)) +dist_params = dict(backend='nccl') From 14500602082f018774b916b46536b4be3de44d03 Mon Sep 17 00:00:00 2001 From: HaodongDuan Date: Sun, 26 Jul 2020 12:09:20 +0800 Subject: [PATCH 2/7] update readme --- configs/recognition/tsn/README.md | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/configs/recognition/tsn/README.md b/configs/recognition/tsn/README.md index 5aea479900..00afc2e196 100644 --- a/configs/recognition/tsn/README.md +++ b/configs/recognition/tsn/README.md @@ -26,6 +26,21 @@ Here, We use [1: 1] to indicate that we combine rgb and flow score with coefficients 1: 1 to get the two-stream prediction (without applying softmax). +### Kinetics-400 Data Benchmark (8-gpus, ResNet50, ImageNet pretrain; 3 segments) + +| config | resolution | training augmentation | testing protocol | top1 acc | top5 acc | ckpt | log | json | +| :----------------------------------------------------------: | :------------: | :-------------------: | :--------------: | :------: | :------: | :------: | :-----: | :------: | +| [tsn_fix_multi_train](/configs/recognition/tsn/data_benchmark/tsn_fix_multi_train.py) | 340x256 | MultiScaleCrop | 25x10 frames | 70.60 | 89.26 | [ckpt]() | [log]() | [json]() | +| x | 340x256 | MultiScaleCrop | 25x3 frames | 70.52 | 89.39 | x | x | x | +| [tsn_fix_resize_train](/configs/recognition/tsn/data_benchmark/tsn_fix_resize_train.py) | 340x256 | RandomResizedCrop | 25x10 frames | 70.11 | 89.01 | [ckpt]() | [log]() | [json]() | +| x | 340x256 | RandomResizedCrop | 25x3 frames | 69.95 | 89.02 | x | x | x | +| [tsn_se_multi_train](/configs/recognition/tsn/data_benchmark/tsn_se_multi_train.py) | short-side 320 | MultiScaleCrop | 25x10 frames | 70.32 | 89.25 | [ckpt]() | [log]() | [json]() | +| x | short-side 320 | MultiScaleCrop | 25x3 frames | 70.54 | 89.39 | x | x | x | +| [tsn_se_resize_train](/configs/recognition/tsn/data_benchmark/tsn_se_resize_train.py) | short-side 320 | RandomResizedCrop | 25x10 frames | 70.44 | 89.23 | [ckpt]() | [log]() | [json]() | +| x | short-side 320 | RandomResizedCrop | 25x3 frames | 70.91 | 89.51 | x | x | x | + + + ### Something-Something V1 |config|resolution | gpus| backbone |pretrain| top1 acc| top5 acc | reference top1 acc | reference top5 acc | gpu_mem(M) | ckpt | log| json| From d00d0cb1cb81f5b9b835ad03869f9aaea6c3fac8 Mon Sep 17 00:00:00 2001 From: HaodongDuan Date: Sun, 26 Jul 2020 13:25:20 +0800 Subject: [PATCH 3/7] update readme, add description to data benchmark --- configs/recognition/tsn/README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/configs/recognition/tsn/README.md b/configs/recognition/tsn/README.md index 00afc2e196..cd8ebe962a 100644 --- a/configs/recognition/tsn/README.md +++ b/configs/recognition/tsn/README.md @@ -39,7 +39,9 @@ Here, We use [1: 1] to indicate that we combine rgb and flow score with coeffici | [tsn_se_resize_train](/configs/recognition/tsn/data_benchmark/tsn_se_resize_train.py) | short-side 320 | RandomResizedCrop | 25x10 frames | 70.44 | 89.23 | [ckpt]() | [log]() | [json]() | | x | short-side 320 | RandomResizedCrop | 25x3 frames | 70.91 | 89.51 | x | x | x | +Notes: +1. In data benchmark, we compare: 1. Different data preprocessing methods: (1) Resize video to 340x256, (2) Resize the short edge of video to 320px; 2. Different data augmentation methods: (1) MultiScaleCrop, (2) RandomResizedCrop; 3. Different testing protocols: (1) 25 clips x 10 crops, (2) 25 clips x 3 crops ### Something-Something V1 From 0d2562ae535b28cb1d2a0fb93d5d779bfd2e4e5e Mon Sep 17 00:00:00 2001 From: HaodongDuan Date: Mon, 3 Aug 2020 10:49:27 +0800 Subject: [PATCH 4/7] update README --- configs/recognition/tsn/README.md | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/configs/recognition/tsn/README.md b/configs/recognition/tsn/README.md index cd8ebe962a..f8ab7d0074 100644 --- a/configs/recognition/tsn/README.md +++ b/configs/recognition/tsn/README.md @@ -28,6 +28,8 @@ Here, We use [1: 1] to indicate that we combine rgb and flow score with coeffici ### Kinetics-400 Data Benchmark (8-gpus, ResNet50, ImageNet pretrain; 3 segments) +In data benchmark, we compare: 1. Different data preprocessing methods: (1) Resize video to 340x256, (2) Resize the short edge of video to 320px; 2. Different data augmentation methods: (1) MultiScaleCrop, (2) RandomResizedCrop; 3. Different testing protocols: (1) 25 clips x 10 crops, (2) 25 clips x 3 crops. In config names, **fix**(fix_ratio) and **se**(shortedge) denotes 340x256 data and short-edge 320px data respectively. + | config | resolution | training augmentation | testing protocol | top1 acc | top5 acc | ckpt | log | json | | :----------------------------------------------------------: | :------------: | :-------------------: | :--------------: | :------: | :------: | :------: | :-----: | :------: | | [tsn_fix_multi_train](/configs/recognition/tsn/data_benchmark/tsn_fix_multi_train.py) | 340x256 | MultiScaleCrop | 25x10 frames | 70.60 | 89.26 | [ckpt]() | [log]() | [json]() | @@ -39,10 +41,6 @@ Here, We use [1: 1] to indicate that we combine rgb and flow score with coeffici | [tsn_se_resize_train](/configs/recognition/tsn/data_benchmark/tsn_se_resize_train.py) | short-side 320 | RandomResizedCrop | 25x10 frames | 70.44 | 89.23 | [ckpt]() | [log]() | [json]() | | x | short-side 320 | RandomResizedCrop | 25x3 frames | 70.91 | 89.51 | x | x | x | -Notes: - -1. In data benchmark, we compare: 1. Different data preprocessing methods: (1) Resize video to 340x256, (2) Resize the short edge of video to 320px; 2. Different data augmentation methods: (1) MultiScaleCrop, (2) RandomResizedCrop; 3. Different testing protocols: (1) 25 clips x 10 crops, (2) 25 clips x 3 crops - ### Something-Something V1 |config|resolution | gpus| backbone |pretrain| top1 acc| top5 acc | reference top1 acc | reference top5 acc | gpu_mem(M) | ckpt | log| json| From 23c4afb227343bb1f312a7a9b41357c967ff0681 Mon Sep 17 00:00:00 2001 From: HaodongDuan Date: Mon, 3 Aug 2020 11:31:13 +0800 Subject: [PATCH 5/7] Update names --- configs/recognition/tsn/README.md | 10 +++++----- ...multi_train.py => tsn_320p_multiscalecrop_train.py} | 2 +- ...ze_train.py => tsn_320p_randomresizedcrop_train.py} | 2 +- ..._se_test_10c.py => tsn_320p_test_25frame_10crop.py} | 0 ...sn_se_test_3c.py => tsn_320p_test_25frame_3crop.py} | 0 ...ti_train.py => tsn_340x256_multiscalecrop_train.py} | 2 +- ...train.py => tsn_340x256_randomresizedcrop_train.py} | 2 +- ..._test_10c.py => tsn_340x256_test_25frame_10crop.py} | 0 ...ix_test_3c.py => tsn_340x256_test_25frame_3crop.py} | 0 9 files changed, 9 insertions(+), 9 deletions(-) rename configs/recognition/tsn/data_benchmark/{tsn_se_multi_train.py => tsn_320p_multiscalecrop_train.py} (98%) rename configs/recognition/tsn/data_benchmark/{tsn_se_resize_train.py => tsn_320p_randomresizedcrop_train.py} (98%) rename configs/recognition/tsn/data_benchmark/{tsn_se_test_10c.py => tsn_320p_test_25frame_10crop.py} (100%) rename configs/recognition/tsn/data_benchmark/{tsn_se_test_3c.py => tsn_320p_test_25frame_3crop.py} (100%) rename configs/recognition/tsn/data_benchmark/{tsn_fix_multi_train.py => tsn_340x256_multiscalecrop_train.py} (98%) rename configs/recognition/tsn/data_benchmark/{tsn_fix_resize_train.py => tsn_340x256_randomresizedcrop_train.py} (98%) rename configs/recognition/tsn/data_benchmark/{tsn_fix_test_10c.py => tsn_340x256_test_25frame_10crop.py} (100%) rename configs/recognition/tsn/data_benchmark/{tsn_fix_test_3c.py => tsn_340x256_test_25frame_3crop.py} (100%) diff --git a/configs/recognition/tsn/README.md b/configs/recognition/tsn/README.md index f8ab7d0074..75892f157c 100644 --- a/configs/recognition/tsn/README.md +++ b/configs/recognition/tsn/README.md @@ -28,17 +28,17 @@ Here, We use [1: 1] to indicate that we combine rgb and flow score with coeffici ### Kinetics-400 Data Benchmark (8-gpus, ResNet50, ImageNet pretrain; 3 segments) -In data benchmark, we compare: 1. Different data preprocessing methods: (1) Resize video to 340x256, (2) Resize the short edge of video to 320px; 2. Different data augmentation methods: (1) MultiScaleCrop, (2) RandomResizedCrop; 3. Different testing protocols: (1) 25 clips x 10 crops, (2) 25 clips x 3 crops. In config names, **fix**(fix_ratio) and **se**(shortedge) denotes 340x256 data and short-edge 320px data respectively. +In data benchmark, we compare: 1. Different data preprocessing methods: (1) Resize video to 340x256, (2) Resize the short edge of video to 320px; 2. Different data augmentation methods: (1) MultiScaleCrop, (2) RandomResizedCrop; 3. Different testing protocols: (1) 25 frames x 10 crops, (2) 25 frames x 3 crops. | config | resolution | training augmentation | testing protocol | top1 acc | top5 acc | ckpt | log | json | | :----------------------------------------------------------: | :------------: | :-------------------: | :--------------: | :------: | :------: | :------: | :-----: | :------: | -| [tsn_fix_multi_train](/configs/recognition/tsn/data_benchmark/tsn_fix_multi_train.py) | 340x256 | MultiScaleCrop | 25x10 frames | 70.60 | 89.26 | [ckpt]() | [log]() | [json]() | +| [tsn_340x256_multiscalecrop_train](data_benchmark/tsn_340x256_multiscalecrop_train.py) | 340x256 | MultiScaleCrop | 25x10 frames | 70.60 | 89.26 | [ckpt]() | [log]() | [json]() | | x | 340x256 | MultiScaleCrop | 25x3 frames | 70.52 | 89.39 | x | x | x | -| [tsn_fix_resize_train](/configs/recognition/tsn/data_benchmark/tsn_fix_resize_train.py) | 340x256 | RandomResizedCrop | 25x10 frames | 70.11 | 89.01 | [ckpt]() | [log]() | [json]() | +| [tsn_340x256_randomresizedcrop_train](data_benchmark/tsn_340x256_randomresizedcrop_train.py) | 340x256 | RandomResizedCrop | 25x10 frames | 70.11 | 89.01 | [ckpt]() | [log]() | [json]() | | x | 340x256 | RandomResizedCrop | 25x3 frames | 69.95 | 89.02 | x | x | x | -| [tsn_se_multi_train](/configs/recognition/tsn/data_benchmark/tsn_se_multi_train.py) | short-side 320 | MultiScaleCrop | 25x10 frames | 70.32 | 89.25 | [ckpt]() | [log]() | [json]() | +| [tsn_320p_multiscalecrop_train](data_benchmark/tsn_320p_multiscalecrop_train.py) | short-side 320 | MultiScaleCrop | 25x10 frames | 70.32 | 89.25 | [ckpt]() | [log]() | [json]() | | x | short-side 320 | MultiScaleCrop | 25x3 frames | 70.54 | 89.39 | x | x | x | -| [tsn_se_resize_train](/configs/recognition/tsn/data_benchmark/tsn_se_resize_train.py) | short-side 320 | RandomResizedCrop | 25x10 frames | 70.44 | 89.23 | [ckpt]() | [log]() | [json]() | +| [tsn_320p_randomresizedcrop_train](data_benchmark/tsn_320p_randomresizedcrop_train.py) | short-side 320 | RandomResizedCrop | 25x10 frames | 70.44 | 89.23 | [ckpt]() | [log]() | [json]() | | x | short-side 320 | RandomResizedCrop | 25x3 frames | 70.91 | 89.51 | x | x | x | ### Something-Something V1 diff --git a/configs/recognition/tsn/data_benchmark/tsn_se_multi_train.py b/configs/recognition/tsn/data_benchmark/tsn_320p_multiscalecrop_train.py similarity index 98% rename from configs/recognition/tsn/data_benchmark/tsn_se_multi_train.py rename to configs/recognition/tsn/data_benchmark/tsn_320p_multiscalecrop_train.py index b6bc86f647..367870d3de 100644 --- a/configs/recognition/tsn/data_benchmark/tsn_se_multi_train.py +++ b/configs/recognition/tsn/data_benchmark/tsn_320p_multiscalecrop_train.py @@ -109,7 +109,7 @@ # runtime settings dist_params = dict(backend='nccl') log_level = 'INFO' -work_dir = './work_dirs/tsn_se_multi_train/' +work_dir = './work_dirs/tsn_320p_multiscalecrop_train/' load_from = None resume_from = None workflow = [('train', 5)] diff --git a/configs/recognition/tsn/data_benchmark/tsn_se_resize_train.py b/configs/recognition/tsn/data_benchmark/tsn_320p_randomresizedcrop_train.py similarity index 98% rename from configs/recognition/tsn/data_benchmark/tsn_se_resize_train.py rename to configs/recognition/tsn/data_benchmark/tsn_320p_randomresizedcrop_train.py index 5d13536613..748665aa0d 100644 --- a/configs/recognition/tsn/data_benchmark/tsn_se_resize_train.py +++ b/configs/recognition/tsn/data_benchmark/tsn_320p_randomresizedcrop_train.py @@ -108,7 +108,7 @@ # runtime settings dist_params = dict(backend='nccl') log_level = 'INFO' -work_dir = './work_dirs/tsn_se_resize_train/' +work_dir = './work_dirs/tsn_320p_randomresizedcrop_train/' load_from = None resume_from = None workflow = [('train', 1)] diff --git a/configs/recognition/tsn/data_benchmark/tsn_se_test_10c.py b/configs/recognition/tsn/data_benchmark/tsn_320p_test_25frame_10crop.py similarity index 100% rename from configs/recognition/tsn/data_benchmark/tsn_se_test_10c.py rename to configs/recognition/tsn/data_benchmark/tsn_320p_test_25frame_10crop.py diff --git a/configs/recognition/tsn/data_benchmark/tsn_se_test_3c.py b/configs/recognition/tsn/data_benchmark/tsn_320p_test_25frame_3crop.py similarity index 100% rename from configs/recognition/tsn/data_benchmark/tsn_se_test_3c.py rename to configs/recognition/tsn/data_benchmark/tsn_320p_test_25frame_3crop.py diff --git a/configs/recognition/tsn/data_benchmark/tsn_fix_multi_train.py b/configs/recognition/tsn/data_benchmark/tsn_340x256_multiscalecrop_train.py similarity index 98% rename from configs/recognition/tsn/data_benchmark/tsn_fix_multi_train.py rename to configs/recognition/tsn/data_benchmark/tsn_340x256_multiscalecrop_train.py index e5e73a998c..377e3766ea 100644 --- a/configs/recognition/tsn/data_benchmark/tsn_fix_multi_train.py +++ b/configs/recognition/tsn/data_benchmark/tsn_340x256_multiscalecrop_train.py @@ -113,7 +113,7 @@ # runtime settings dist_params = dict(backend='nccl') log_level = 'INFO' -work_dir = './work_dirs/tsn_fix_multi_train/' +work_dir = './work_dirs/tsn_340x256_multiscalecrop_train/' load_from = None resume_from = None workflow = [('train', 1)] diff --git a/configs/recognition/tsn/data_benchmark/tsn_fix_resize_train.py b/configs/recognition/tsn/data_benchmark/tsn_340x256_randomresizedcrop_train.py similarity index 98% rename from configs/recognition/tsn/data_benchmark/tsn_fix_resize_train.py rename to configs/recognition/tsn/data_benchmark/tsn_340x256_randomresizedcrop_train.py index 2f43aee758..bcc9d19807 100644 --- a/configs/recognition/tsn/data_benchmark/tsn_fix_resize_train.py +++ b/configs/recognition/tsn/data_benchmark/tsn_340x256_randomresizedcrop_train.py @@ -104,7 +104,7 @@ # runtime settings dist_params = dict(backend='nccl') log_level = 'INFO' -work_dir = './work_dirs/tsn_fix_resize_train' +work_dir = './work_dirs/tsn_340x256_randomresizedcrop_train' load_from = None resume_from = None workflow = [('train', 5)] diff --git a/configs/recognition/tsn/data_benchmark/tsn_fix_test_10c.py b/configs/recognition/tsn/data_benchmark/tsn_340x256_test_25frame_10crop.py similarity index 100% rename from configs/recognition/tsn/data_benchmark/tsn_fix_test_10c.py rename to configs/recognition/tsn/data_benchmark/tsn_340x256_test_25frame_10crop.py diff --git a/configs/recognition/tsn/data_benchmark/tsn_fix_test_3c.py b/configs/recognition/tsn/data_benchmark/tsn_340x256_test_25frame_3crop.py similarity index 100% rename from configs/recognition/tsn/data_benchmark/tsn_fix_test_3c.py rename to configs/recognition/tsn/data_benchmark/tsn_340x256_test_25frame_3crop.py From fc852d1355cb9f4921f8d396921ee466dbef3e5e Mon Sep 17 00:00:00 2001 From: HaodongDuan Date: Mon, 3 Aug 2020 12:40:55 +0800 Subject: [PATCH 6/7] update names --- configs/recognition/tsn/README.md | 8 ++++---- ...r50_multiscalecrop_320p_1x1x3_100e_kinetics400_rgb.py} | 3 ++- ..._multiscalecrop_340x256_1x1x3_100e_kinetics400_rgb.py} | 3 ++- ..._randomresizedcrop_320p_1x1x3_100e_kinetics400_rgb.py} | 3 ++- ...ndomresizedcrop_340x256_1x1x3_100e_kinetics400_rgb.py} | 3 ++- ...n_r50_test_320p_1x1x25_10crop_100e_kinetics400_rgb.py} | 0 ...sn_r50_test_320p_1x1x25_3crop_100e_kinetics400_rgb.py} | 0 ...50_test_340x256_1x1x25_10crop_100e_kinetics400_rgb.py} | 0 ...r50_test_340x256_1x1x25_3crop_100e_kinetics400_rgb.py} | 0 9 files changed, 12 insertions(+), 8 deletions(-) rename configs/recognition/tsn/data_benchmark/{tsn_320p_multiscalecrop_train.py => tsn_r50_multiscalecrop_320p_1x1x3_100e_kinetics400_rgb.py} (97%) rename configs/recognition/tsn/data_benchmark/{tsn_340x256_multiscalecrop_train.py => tsn_r50_multiscalecrop_340x256_1x1x3_100e_kinetics400_rgb.py} (97%) rename configs/recognition/tsn/data_benchmark/{tsn_320p_randomresizedcrop_train.py => tsn_r50_randomresizedcrop_320p_1x1x3_100e_kinetics400_rgb.py} (97%) rename configs/recognition/tsn/data_benchmark/{tsn_340x256_randomresizedcrop_train.py => tsn_r50_randomresizedcrop_340x256_1x1x3_100e_kinetics400_rgb.py} (97%) rename configs/recognition/tsn/data_benchmark/{tsn_320p_test_25frame_10crop.py => tsn_r50_test_320p_1x1x25_10crop_100e_kinetics400_rgb.py} (100%) rename configs/recognition/tsn/data_benchmark/{tsn_320p_test_25frame_3crop.py => tsn_r50_test_320p_1x1x25_3crop_100e_kinetics400_rgb.py} (100%) rename configs/recognition/tsn/data_benchmark/{tsn_340x256_test_25frame_10crop.py => tsn_r50_test_340x256_1x1x25_10crop_100e_kinetics400_rgb.py} (100%) rename configs/recognition/tsn/data_benchmark/{tsn_340x256_test_25frame_3crop.py => tsn_r50_test_340x256_1x1x25_3crop_100e_kinetics400_rgb.py} (100%) diff --git a/configs/recognition/tsn/README.md b/configs/recognition/tsn/README.md index 75892f157c..74c88eddba 100644 --- a/configs/recognition/tsn/README.md +++ b/configs/recognition/tsn/README.md @@ -32,13 +32,13 @@ In data benchmark, we compare: 1. Different data preprocessing methods: (1) Resi | config | resolution | training augmentation | testing protocol | top1 acc | top5 acc | ckpt | log | json | | :----------------------------------------------------------: | :------------: | :-------------------: | :--------------: | :------: | :------: | :------: | :-----: | :------: | -| [tsn_340x256_multiscalecrop_train](data_benchmark/tsn_340x256_multiscalecrop_train.py) | 340x256 | MultiScaleCrop | 25x10 frames | 70.60 | 89.26 | [ckpt]() | [log]() | [json]() | +| [tsn_r50_multiscalecrop_340x256_1x1x3_100e_kinetics400_rgb](data_benchmark/tsn_r50_multiscalecrop_340x256_1x1x3_100e_kinetics400_rgb.py) | 340x256 | MultiScaleCrop | 25x10 frames | 70.60 | 89.26 | [ckpt]() | [log]() | [json]() | | x | 340x256 | MultiScaleCrop | 25x3 frames | 70.52 | 89.39 | x | x | x | -| [tsn_340x256_randomresizedcrop_train](data_benchmark/tsn_340x256_randomresizedcrop_train.py) | 340x256 | RandomResizedCrop | 25x10 frames | 70.11 | 89.01 | [ckpt]() | [log]() | [json]() | +| [tsn_r50_randomresizedcrop_340x256_1x1x3_100e_kinetics400_rgb](data_benchmark/tsn_r50_randomresizedcrop_340x256_1x1x3_100e_kinetics400_rgb.py) | 340x256 | RandomResizedCrop | 25x10 frames | 70.11 | 89.01 | [ckpt]() | [log]() | [json]() | | x | 340x256 | RandomResizedCrop | 25x3 frames | 69.95 | 89.02 | x | x | x | -| [tsn_320p_multiscalecrop_train](data_benchmark/tsn_320p_multiscalecrop_train.py) | short-side 320 | MultiScaleCrop | 25x10 frames | 70.32 | 89.25 | [ckpt]() | [log]() | [json]() | +| [tsn_r50_multiscalecrop_320p_1x1x3_100e_kinetics400_rgb](data_benchmark/tsn_r50_multiscalecrop_320p_1x1x3_100e_kinetics400_rgb.py) | short-side 320 | MultiScaleCrop | 25x10 frames | 70.32 | 89.25 | [ckpt]() | [log]() | [json]() | | x | short-side 320 | MultiScaleCrop | 25x3 frames | 70.54 | 89.39 | x | x | x | -| [tsn_320p_randomresizedcrop_train](data_benchmark/tsn_320p_randomresizedcrop_train.py) | short-side 320 | RandomResizedCrop | 25x10 frames | 70.44 | 89.23 | [ckpt]() | [log]() | [json]() | +| [tsn_r50_randomresizedcrop_320p_1x1x3_100e_kinetics400_rgb](data_benchmark/tsn_r50_randomresizedcrop_320p_1x1x3_100e_kinetics400_rgb.py) | short-side 320 | RandomResizedCrop | 25x10 frames | 70.44 | 89.23 | [ckpt]() | [log]() | [json]() | | x | short-side 320 | RandomResizedCrop | 25x3 frames | 70.91 | 89.51 | x | x | x | ### Something-Something V1 diff --git a/configs/recognition/tsn/data_benchmark/tsn_320p_multiscalecrop_train.py b/configs/recognition/tsn/data_benchmark/tsn_r50_multiscalecrop_320p_1x1x3_100e_kinetics400_rgb.py similarity index 97% rename from configs/recognition/tsn/data_benchmark/tsn_320p_multiscalecrop_train.py rename to configs/recognition/tsn/data_benchmark/tsn_r50_multiscalecrop_320p_1x1x3_100e_kinetics400_rgb.py index 367870d3de..120f21fa8d 100644 --- a/configs/recognition/tsn/data_benchmark/tsn_320p_multiscalecrop_train.py +++ b/configs/recognition/tsn/data_benchmark/tsn_r50_multiscalecrop_320p_1x1x3_100e_kinetics400_rgb.py @@ -109,7 +109,8 @@ # runtime settings dist_params = dict(backend='nccl') log_level = 'INFO' -work_dir = './work_dirs/tsn_320p_multiscalecrop_train/' +work_dir = ('./work_dirs/tsn_r50_multiscalecrop_320p_1x1x3' + '_100e_kinetics400_rgb/') load_from = None resume_from = None workflow = [('train', 5)] diff --git a/configs/recognition/tsn/data_benchmark/tsn_340x256_multiscalecrop_train.py b/configs/recognition/tsn/data_benchmark/tsn_r50_multiscalecrop_340x256_1x1x3_100e_kinetics400_rgb.py similarity index 97% rename from configs/recognition/tsn/data_benchmark/tsn_340x256_multiscalecrop_train.py rename to configs/recognition/tsn/data_benchmark/tsn_r50_multiscalecrop_340x256_1x1x3_100e_kinetics400_rgb.py index 377e3766ea..6a96b3292d 100644 --- a/configs/recognition/tsn/data_benchmark/tsn_340x256_multiscalecrop_train.py +++ b/configs/recognition/tsn/data_benchmark/tsn_r50_multiscalecrop_340x256_1x1x3_100e_kinetics400_rgb.py @@ -113,7 +113,8 @@ # runtime settings dist_params = dict(backend='nccl') log_level = 'INFO' -work_dir = './work_dirs/tsn_340x256_multiscalecrop_train/' +work_dir = ('./work_dirs/tsn_r50_multiscalecrop_340x256_1x1x3' + '_100e_kinetics400_rgb/') load_from = None resume_from = None workflow = [('train', 1)] diff --git a/configs/recognition/tsn/data_benchmark/tsn_320p_randomresizedcrop_train.py b/configs/recognition/tsn/data_benchmark/tsn_r50_randomresizedcrop_320p_1x1x3_100e_kinetics400_rgb.py similarity index 97% rename from configs/recognition/tsn/data_benchmark/tsn_320p_randomresizedcrop_train.py rename to configs/recognition/tsn/data_benchmark/tsn_r50_randomresizedcrop_320p_1x1x3_100e_kinetics400_rgb.py index 748665aa0d..e5d048471c 100644 --- a/configs/recognition/tsn/data_benchmark/tsn_320p_randomresizedcrop_train.py +++ b/configs/recognition/tsn/data_benchmark/tsn_r50_randomresizedcrop_320p_1x1x3_100e_kinetics400_rgb.py @@ -108,7 +108,8 @@ # runtime settings dist_params = dict(backend='nccl') log_level = 'INFO' -work_dir = './work_dirs/tsn_320p_randomresizedcrop_train/' +work_dir = ('./work_dirs/tsn_r50_randomresizedcrop_320p_1x1x3' + '_100e_kinetics400_rgb/') load_from = None resume_from = None workflow = [('train', 1)] diff --git a/configs/recognition/tsn/data_benchmark/tsn_340x256_randomresizedcrop_train.py b/configs/recognition/tsn/data_benchmark/tsn_r50_randomresizedcrop_340x256_1x1x3_100e_kinetics400_rgb.py similarity index 97% rename from configs/recognition/tsn/data_benchmark/tsn_340x256_randomresizedcrop_train.py rename to configs/recognition/tsn/data_benchmark/tsn_r50_randomresizedcrop_340x256_1x1x3_100e_kinetics400_rgb.py index bcc9d19807..99d2afa57f 100644 --- a/configs/recognition/tsn/data_benchmark/tsn_340x256_randomresizedcrop_train.py +++ b/configs/recognition/tsn/data_benchmark/tsn_r50_randomresizedcrop_340x256_1x1x3_100e_kinetics400_rgb.py @@ -104,7 +104,8 @@ # runtime settings dist_params = dict(backend='nccl') log_level = 'INFO' -work_dir = './work_dirs/tsn_340x256_randomresizedcrop_train' +work_dir = ('./work_dirs/tsn_r50_randomresizedcrop_340x256_1x1x3' + '_100e_kinetics400_rgb') load_from = None resume_from = None workflow = [('train', 5)] diff --git a/configs/recognition/tsn/data_benchmark/tsn_320p_test_25frame_10crop.py b/configs/recognition/tsn/data_benchmark/tsn_r50_test_320p_1x1x25_10crop_100e_kinetics400_rgb.py similarity index 100% rename from configs/recognition/tsn/data_benchmark/tsn_320p_test_25frame_10crop.py rename to configs/recognition/tsn/data_benchmark/tsn_r50_test_320p_1x1x25_10crop_100e_kinetics400_rgb.py diff --git a/configs/recognition/tsn/data_benchmark/tsn_320p_test_25frame_3crop.py b/configs/recognition/tsn/data_benchmark/tsn_r50_test_320p_1x1x25_3crop_100e_kinetics400_rgb.py similarity index 100% rename from configs/recognition/tsn/data_benchmark/tsn_320p_test_25frame_3crop.py rename to configs/recognition/tsn/data_benchmark/tsn_r50_test_320p_1x1x25_3crop_100e_kinetics400_rgb.py diff --git a/configs/recognition/tsn/data_benchmark/tsn_340x256_test_25frame_10crop.py b/configs/recognition/tsn/data_benchmark/tsn_r50_test_340x256_1x1x25_10crop_100e_kinetics400_rgb.py similarity index 100% rename from configs/recognition/tsn/data_benchmark/tsn_340x256_test_25frame_10crop.py rename to configs/recognition/tsn/data_benchmark/tsn_r50_test_340x256_1x1x25_10crop_100e_kinetics400_rgb.py diff --git a/configs/recognition/tsn/data_benchmark/tsn_340x256_test_25frame_3crop.py b/configs/recognition/tsn/data_benchmark/tsn_r50_test_340x256_1x1x25_3crop_100e_kinetics400_rgb.py similarity index 100% rename from configs/recognition/tsn/data_benchmark/tsn_340x256_test_25frame_3crop.py rename to configs/recognition/tsn/data_benchmark/tsn_r50_test_340x256_1x1x25_3crop_100e_kinetics400_rgb.py From cb420fbfb04a86675a33303e8bb2062d4348237c Mon Sep 17 00:00:00 2001 From: HaodongDuan Date: Mon, 3 Aug 2020 21:16:06 +0800 Subject: [PATCH 7/7] update superlink --- configs/recognition/tsn/README.md | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/configs/recognition/tsn/README.md b/configs/recognition/tsn/README.md index 74c88eddba..90f92fbb44 100644 --- a/configs/recognition/tsn/README.md +++ b/configs/recognition/tsn/README.md @@ -30,16 +30,16 @@ Here, We use [1: 1] to indicate that we combine rgb and flow score with coeffici In data benchmark, we compare: 1. Different data preprocessing methods: (1) Resize video to 340x256, (2) Resize the short edge of video to 320px; 2. Different data augmentation methods: (1) MultiScaleCrop, (2) RandomResizedCrop; 3. Different testing protocols: (1) 25 frames x 10 crops, (2) 25 frames x 3 crops. -| config | resolution | training augmentation | testing protocol | top1 acc | top5 acc | ckpt | log | json | -| :----------------------------------------------------------: | :------------: | :-------------------: | :--------------: | :------: | :------: | :------: | :-----: | :------: | -| [tsn_r50_multiscalecrop_340x256_1x1x3_100e_kinetics400_rgb](data_benchmark/tsn_r50_multiscalecrop_340x256_1x1x3_100e_kinetics400_rgb.py) | 340x256 | MultiScaleCrop | 25x10 frames | 70.60 | 89.26 | [ckpt]() | [log]() | [json]() | -| x | 340x256 | MultiScaleCrop | 25x3 frames | 70.52 | 89.39 | x | x | x | -| [tsn_r50_randomresizedcrop_340x256_1x1x3_100e_kinetics400_rgb](data_benchmark/tsn_r50_randomresizedcrop_340x256_1x1x3_100e_kinetics400_rgb.py) | 340x256 | RandomResizedCrop | 25x10 frames | 70.11 | 89.01 | [ckpt]() | [log]() | [json]() | -| x | 340x256 | RandomResizedCrop | 25x3 frames | 69.95 | 89.02 | x | x | x | -| [tsn_r50_multiscalecrop_320p_1x1x3_100e_kinetics400_rgb](data_benchmark/tsn_r50_multiscalecrop_320p_1x1x3_100e_kinetics400_rgb.py) | short-side 320 | MultiScaleCrop | 25x10 frames | 70.32 | 89.25 | [ckpt]() | [log]() | [json]() | -| x | short-side 320 | MultiScaleCrop | 25x3 frames | 70.54 | 89.39 | x | x | x | -| [tsn_r50_randomresizedcrop_320p_1x1x3_100e_kinetics400_rgb](data_benchmark/tsn_r50_randomresizedcrop_320p_1x1x3_100e_kinetics400_rgb.py) | short-side 320 | RandomResizedCrop | 25x10 frames | 70.44 | 89.23 | [ckpt]() | [log]() | [json]() | -| x | short-side 320 | RandomResizedCrop | 25x3 frames | 70.91 | 89.51 | x | x | x | +| config | resolution | training augmentation | testing protocol | top1 acc | top5 acc | ckpt | log | json | +| :----------------------------------------------------------: | :------------: | :-------------------: | :--------------: | :------: | :------: | :----------------------------------------------------------: | :----------------------------------------------------------: | :----------------------------------------------------------: | +| [tsn_r50_multiscalecrop_340x256_1x1x3_100e_kinetics400_rgb](data_benchmark/tsn_r50_multiscalecrop_340x256_1x1x3_100e_kinetics400_rgb.py) | 340x256 | MultiScaleCrop | 25x10 frames | 70.60 | 89.26 | [ckpt](https://openmmlab.oss-accelerate.aliyuncs.com/mmaction/recognition/tsn/tsn_r50_1x1x3_100e_kinetics400_rgb/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmaction/recognition/tsn/tsn_r50_1x1x3_100e_kinetics400_rgb/20200614_063526.log) | [json](https://openmmlab.oss-accelerate.aliyuncs.com/mmaction/recognition/tsn/tsn_r50_1x1x3_100e_kinetics400_rgb/20200614_063526.log.json) | +| x | 340x256 | MultiScaleCrop | 25x3 frames | 70.52 | 89.39 | x | x | x | +| [tsn_r50_randomresizedcrop_340x256_1x1x3_100e_kinetics400_rgb](data_benchmark/tsn_r50_randomresizedcrop_340x256_1x1x3_100e_kinetics400_rgb.py) | 340x256 | RandomResizedCrop | 25x10 frames | 70.11 | 89.01 | [ckpt](https://openmmlab.oss-accelerate.aliyuncs.com/mmaction/recognition/tsn/data_benchmark/tsn_r50_randomresizedcrop_340x256_1x1x3_100e_kinetics400_rgb/tsn_r50_randomresizedcrop_340x256_1x1x3_100e_kinetics400_rgb_20200725-88cb325a.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmaction/recognition/tsn/data_benchmark/tsn_r50_randomresizedcrop_340x256_1x1x3_100e_kinetics400_rgb/tsn_r50_randomresizedcrop_340x256_1x1x3_100e_kinetics400_rgb_20200725.log) | [json](https://openmmlab.oss-accelerate.aliyuncs.com/mmaction/recognition/tsn/data_benchmark/tsn_r50_randomresizedcrop_340x256_1x1x3_100e_kinetics400_rgb/tsn_r50_randomresizedcrop_340x256_1x1x3_100e_kinetics400_rgb_20200725.json) | +| x | 340x256 | RandomResizedCrop | 25x3 frames | 69.95 | 89.02 | x | x | x | +| [tsn_r50_multiscalecrop_320p_1x1x3_100e_kinetics400_rgb](data_benchmark/tsn_r50_multiscalecrop_320p_1x1x3_100e_kinetics400_rgb.py) | short-side 320 | MultiScaleCrop | 25x10 frames | 70.32 | 89.25 | [ckpt](https://openmmlab.oss-accelerate.aliyuncs.com/mmaction/recognition/tsn/data_benchmark/tsn_r50_multiscalecrop_320p_1x1x3_100e_kinetics400_rgb/tsn_r50_multiscalecrop_320p_1x1x3_100e_kinetics400_rgb_20200725-9922802f.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmaction/recognition/tsn/data_benchmark/tsn_r50_multiscalecrop_320p_1x1x3_100e_kinetics400_rgb/tsn_r50_multiscalecrop_320p_1x1x3_100e_kinetics400_rgb_20200725.log) | [json](https://openmmlab.oss-accelerate.aliyuncs.com/mmaction/recognition/tsn/data_benchmark/tsn_r50_multiscalecrop_320p_1x1x3_100e_kinetics400_rgb/tsn_r50_multiscalecrop_320p_1x1x3_100e_kinetics400_rgb_20200725.json) | +| x | short-side 320 | MultiScaleCrop | 25x3 frames | 70.54 | 89.39 | x | x | x | +| [tsn_r50_randomresizedcrop_320p_1x1x3_100e_kinetics400_rgb](data_benchmark/tsn_r50_randomresizedcrop_320p_1x1x3_100e_kinetics400_rgb.py) | short-side 320 | RandomResizedCrop | 25x10 frames | 70.44 | 89.23 | [ckpt](https://openmmlab.oss-accelerate.aliyuncs.com/mmaction/recognition/tsn/tsn_r50_320p_1x1x3_100e_kinetics400_rgb/tsn_r50_320p_1x1x3_100e_kinetics400_rgb_20200702-cc665e2a.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmaction/recognition/tsn/tsn_r50_320p_1x1x3_100e_kinetics400_rgb/tsn_r50_f3_kinetics400_shortedge_70.9_89.5.log) | [json](https://openmmlab.oss-accelerate.aliyuncs.com/mmaction/recognition/tsn/tsn_r50_320p_1x1x3_100e_kinetics400_rgb/tsn_r50_f3_kinetics400_shortedge_70.9_89.5.log.json) | +| x | short-side 320 | RandomResizedCrop | 25x3 frames | 70.91 | 89.51 | x | x | x | ### Something-Something V1