Skip to content

Commit de51763

Browse files
apsdehalfacebook-github-bot
authored andcommitted
[enhancement,refactor,fix] M4C Captioner, TextCaps related changes (#113)
Summary: - Fix test reporter according to new API, distributed sampler - MultiDataset distributed sampler will be loaded now - Refactor M4C TextCaps to TextCaps - Remove the confusion between COCO and TextCaps by introducing annotation_style config parameter that will allow to load other datasets than default dataset. In long term, we want datasets to be able to specify the dataset class that will be used to build the dataset - Allow `zoo_requirements` in the model zoo as well - Upload detectron weights as well so that they can be included as requirement - Update all models to use zoo based detectron weights - Add dataset zoo for textcaps, coco-m4c version and coco-ocr_en version Pull Request resolved: fairinternal/mmf-internal#113 Test Plan: Same as the previous PR, you can use my data dir to test it out. I have tested all of the variations that come with m4c_textcaps so we should be good to go Reviewed By: vedanuj Differential Revision: D21421244 Pulled By: apsdehal fbshipit-source-id: 5a89ee516f7e4f4cac68f7a977b893ace9af4a92
1 parent 5de3499 commit de51763

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

49 files changed

+541
-356
lines changed

mmf/common/test_reporter.py

+14-18
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,13 @@
99
from mmf.common.batch_collator import BatchCollator
1010
from mmf.common.registry import registry
1111
from mmf.utils.configuration import get_mmf_env
12-
from mmf.utils.distributed import gather_tensor, get_world_size, is_master
12+
from mmf.utils.distributed import gather_tensor, is_master
1313
from mmf.utils.file_io import PathManager
14-
from mmf.utils.general import ckpt_name_from_core_args, foldername_from_config_override
14+
from mmf.utils.general import (
15+
ckpt_name_from_core_args,
16+
foldername_from_config_override,
17+
get_batch_size,
18+
)
1519
from mmf.utils.timer import Timer
1620

1721

@@ -103,24 +107,15 @@ def get_dataloader(self):
103107
def _add_extra_args_for_dataloader(self, other_args=None):
104108
if other_args is None:
105109
other_args = {}
106-
training = self.config.training
107110

108-
if training.local_rank is not None and training.distributed:
109-
other_args["sampler"] = DistributedSampler(self.current_dataset)
110-
else:
111-
other_args["shuffle"] = True
112-
113-
batch_size = training.batch_size
114-
115-
world_size = get_world_size()
116-
117-
if batch_size % world_size != 0:
118-
raise RuntimeError(
119-
"Batch size {} must be divisible by number "
120-
"of GPUs {} used.".format(batch_size, world_size)
111+
if torch.distributed.is_initialized():
112+
other_args["sampler"] = DistributedSampler(
113+
self.current_dataset, shuffle=False
121114
)
115+
else:
116+
other_args["shuffle"] = False
122117

123-
other_args["batch_size"] = batch_size // world_size
118+
other_args["batch_size"] = get_batch_size()
124119

125120
return other_args
126121

@@ -143,7 +138,8 @@ def add_to_report(self, report):
143138
report.scores = gather_tensor(report.scores).view(
144139
-1, report.scores.size(-1)
145140
)
146-
report.question_id = gather_tensor(report.question_id).view(-1)
141+
if "question_id" in report:
142+
report.question_id = gather_tensor(report.question_id).view(-1)
147143
if "image_id" in report:
148144
_, enc_size = report.image_id.size()
149145
report.image_id = gather_tensor(report.image_id)

mmf/configs/datasets/coco/defaults.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ dataset_config:
55
fast_read: false
66
use_images: false
77
use_features: true
8+
annotation_style: coco
89
features:
910
train:
1011
- coco/detectron_fix_100/fc6/train_val_2014

mmf/configs/datasets/coco/ocr_en.yaml

+63
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
dataset_config:
2+
coco:
3+
data_dir: ${env.data_dir}/datasets
4+
use_images: false
5+
use_features: true
6+
use_ocr: true
7+
use_ocr_info: true
8+
use_order_vectors: true
9+
return_features_info: true
10+
zoo_requirements:
11+
- coco.m4c_captioner
12+
- coco.ocr_en
13+
annotation_style: textcaps
14+
features:
15+
train:
16+
- coco/m4c_captioner/features/detectron.lmdb,coco/ocr_en/features/ocr_en_frcn_features.lmdb
17+
val:
18+
- coco/m4c_captioner/features/detectron.lmdb,coco/ocr_en/features/ocr_en_frcn_features.lmdb
19+
test:
20+
- coco/m4c_captioner/features/detectron.lmdb,coco/ocr_en/features/ocr_en_frcn_features.lmdb
21+
annotations:
22+
train:
23+
- coco/m4c_captioner/annotations/imdb_karpathy_train.npy
24+
val:
25+
- coco/m4c_captioner/annotations/imdb_karpathy_val_filtered_by_image_id.npy # only one sample per image_id
26+
test:
27+
- coco/m4c_captioner/annotations/imdb_karpathy_test_filtered_by_image_id.npy # only one sample per image_id
28+
processors:
29+
text_processor:
30+
type: bert_tokenizer
31+
params:
32+
tokenizer_config:
33+
type: bert-base-uncased
34+
params:
35+
do_lower_case: true
36+
max_seq_length: 3
37+
answer_processor:
38+
type: m4c_caption
39+
params:
40+
vocab_file: coco/m4c_captioner/extras/vocabs/vocab_joint_textcaps_coco_threshold_10.txt
41+
preprocessor:
42+
type: simple_word
43+
params: {}
44+
context_preprocessor:
45+
type: simple_word
46+
params: {}
47+
max_length: 50
48+
max_copy_steps: 30
49+
num_answers: 1
50+
caption_processor: null
51+
context_processor:
52+
type: fasttext
53+
params:
54+
max_length: 50
55+
model_file: wiki.en.bin
56+
copy_processor:
57+
type: copy
58+
params:
59+
max_length: 100
60+
phoc_processor:
61+
type: phoc
62+
params:
63+
max_length: 50

mmf/configs/datasets/m4c_textcaps/defaults.yaml mmf/configs/datasets/textcaps/defaults.yaml

+17-12
Original file line numberDiff line numberDiff line change
@@ -1,38 +1,43 @@
11
dataset_config:
2-
m4c_textcaps:
3-
data_dir: ${env.data_dir}
2+
textcaps:
3+
data_dir: ${env.data_dir}/datasets
44
depth_first: false
55
fast_read: false
6+
zoo_requirements:
7+
- textcaps.defaults
8+
- textvqa.defaults
69
max_features: 100
710
use_images: false
811
use_features: true
12+
use_order_vectors: true
13+
annotation_style: textcaps
914
features:
1015
train:
11-
- open_images/detectron_fix_100/fc6/train,m4c_textvqa_ocr_en_frcn_features/train_images
16+
- textvqa/defaults/features/open_images/detectron.lmdb
1217
val:
13-
- open_images/detectron_fix_100/fc6/train,m4c_textvqa_ocr_en_frcn_features/train_images
18+
- textvqa/defaults/features/open_images/detectron.lmdb
1419
test:
15-
- open_images/detectron_fix_100/fc6/test,m4c_textvqa_ocr_en_frcn_features/test_images
20+
- textvqa/defaults/features/open_images/detectron.lmdb
1621
annotations:
1722
train:
18-
- imdb/m4c_textcaps/imdb_train.npy
23+
- textcaps/defaults/annotations/imdb_train.npy
1924
val:
20-
- imdb/m4c_textcaps/imdb_val_filtered_by_image_id.npy # only one sample per image_id
25+
- textcaps/defaults/annotations/imdb_val_filtered_by_image_id.npy # only one sample per image_id
2126
test:
22-
- imdb/m4c_textcaps/imdb_test_filtered_by_image_id.npy # only one sample per image_id
27+
- textcaps/defaults/annotations/imdb_test_filtered_by_image_id.npy # only one sample per image_id
2328
processors:
2429
text_processor:
25-
type: m4c_bert_tokenizer
30+
type: bert_tokenizer
2631
params:
2732
tokenizer_config:
2833
type: bert-base-uncased
2934
params:
3035
do_lower_case: true
31-
max_length: 1
36+
max_seq_length: 3
3237
answer_processor:
3338
type: m4c_caption
3439
params:
35-
vocab_file: m4c_captioner_vocabs/textcaps/vocab_textcap_threshold_10.txt
40+
vocab_file: textcaps/defaults/extras/vocabs/vocab_textcaps_threshold_10.txt
3641
preprocessor:
3742
type: simple_word
3843
params: {}
@@ -67,5 +72,5 @@ dataset_config:
6772
use_ocr_info: true
6873

6974
training:
70-
monitored_metric: m4c_textcaps/textcaps_bleu4
75+
monitored_metric: textcaps/textcaps_bleu4
7176
metric_minimize: false

mmf/configs/defaults.yaml

+2-1
Original file line numberDiff line numberDiff line change
@@ -94,8 +94,9 @@ training:
9494
# Iteration until which warnup should be done
9595
warmup_iterations: 1000
9696

97-
# Local rank of the GPU device
97+
# Device on which the model will be trained. Set 'cpu' to train/infer on CPU
9898
device: cuda
99+
# Local rank of the GPU device
99100
local_rank: null
100101

101102
# Use to load specific modules from checkpoint to your model,

mmf/configs/models/butd/defaults.yaml

+2-2
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,8 @@ model_config:
2929
image_feature_encodings:
3030
- type: finetune_faster_rcnn_fpn_fc7
3131
params:
32-
bias_file: detectron/fc6/fc7_b.pkl
33-
weights_file: detectron/fc6/fc7_w.pkl
32+
bias_file: models/detectron.defaults/fc7_b.pkl
33+
weights_file: models/detectron.defaults/fc7_w.pkl
3434
model_data_dir: ${model_config.butd.model_data_dir}
3535
inference:
3636
type: greedy

mmf/configs/models/lorra/defaults.yaml

+2-2
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,8 @@ model_config:
3939
image_feature_encodings:
4040
- type: finetune_faster_rcnn_fpn_fc7
4141
params:
42-
bias_file: detectron/fc6/fc7_b.pkl
43-
weights_file: detectron/fc6/fc7_w.pkl
42+
bias_file: models/detectron.defaults/fc7_b.pkl
43+
weights_file: models/detectron.defaults/fc7_w.pkl
4444
model_data_dir: ${model_config.lorra.model_data_dir}
4545
- type: default
4646
params:

mmf/configs/models/mmbt/with_features.yaml

+2-2
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,6 @@ model_config:
66
type: finetune_faster_rcnn_fpn_fc7
77
params:
88
in_dim: 2048
9-
bias_file: detectron/fc6/fc7_b.pkl
10-
weights_file: detectron/fc6/fc7_w.pkl
9+
bias_file: models/detectron.defaults/fc7_b.pkl
10+
weights_file: models/detectron.defaults/fc7_w.pkl
1111
model_data_dir: ${model_config.mmbt.model_data_dir}

mmf/configs/models/pythia/defaults.yaml

+2-2
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,8 @@ model_config:
2525
image_feature_encodings:
2626
- type: finetune_faster_rcnn_fpn_fc7
2727
params:
28-
bias_file: detectron/fc6/fc7_b.pkl
29-
weights_file: detectron/fc6/fc7_w.pkl
28+
bias_file: models/detectron.defaults/fc7_b.pkl
29+
weights_file: models/detectron.defaults/fc7_w.pkl
3030
model_data_dir: ${model_config.pythia.model_data_dir}
3131
- type: default
3232
params:

mmf/configs/models/unimodal/with_features.yaml

+2-2
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,6 @@ model_config:
66
type: finetune_faster_rcnn_fpn_fc7
77
params:
88
in_dim: 2048
9-
bias_file: detectron/fc6/fc7_b.pkl
10-
weights_file: detectron/fc6/fc7_w.pkl
9+
bias_file: models/detectron.defaults/fc7_b.pkl
10+
weights_file: models/detectron.defaults/fc7_w.pkl
1111
model_data_dir: ${model_config.unimodal_image.model_data_dir}

mmf/configs/zoo/datasets.yaml

+43
Original file line numberDiff line numberDiff line change
@@ -134,3 +134,46 @@ ocrvqa:
134134
- url: mmf://datasets/ocrvqa/ocr_en/features/features.tar.gz
135135
file_name: features.tar.gz
136136
hashcode: 1c9eb9df544d431f438d340afe493a4d3db759eedd485033f88fc639106edeb8
137+
138+
139+
textcaps:
140+
defaults:
141+
version: 1.0_2020_05_04
142+
resources:
143+
annotations:
144+
- url: mmf://datasets/textcaps/defaults/annotations/annotations.tar.gz
145+
file_name: annotations.tar.gz
146+
hashcode: cfbe34a0653d18155e5b7de37724888e51c6b0e573fe436bc22ec074338d5456
147+
extras:
148+
- url: mmf://datasets/textcaps/defaults/extras.tar.gz
149+
file_name: extras.tar.gz
150+
hashcode: 6a92426b646b61eefef8d42ca38bd852375d13c7edd3cf511201633bc16aa14c
151+
152+
coco:
153+
# TODO: Fill defaults for COCO later
154+
defaults:
155+
version: 1.0_2020_05_04
156+
resources: []
157+
m4c_captioner:
158+
version: 1.0_2020_05_04
159+
resources:
160+
features:
161+
- url: mmf://datasets/coco/m4c_captioner/features/features.tar.gz
162+
file_name: features.tar.gz
163+
hashcode: b36428b7aac2c1b46d4fa75155ea7cd0f9a94444481563c87a5af64dd05e315f
164+
annotations:
165+
- url: mmf://datasets/coco/m4c_captioner/annotations/annotations.tar.gz
166+
file_name: annotations.tar.gz
167+
hashcode: 828a7a4310a6eddf2d13b9a6b11ca6877cb988680f2fbc838b600f6d8e4dcbfc
168+
extras:
169+
- url: mmf://datasets/coco/m4c_captioner/extras.tar.gz
170+
file_name: extras.tar.gz
171+
hashcode: a859c41693abb40362b5d0a2d844e612713103ff4fef1159e0351c34e5e6fb46
172+
173+
ocr_en:
174+
version: 1.0_2020_05_04
175+
resources:
176+
features:
177+
- url: mmf://datasets/coco/ocr_en/features/features.tar.gz
178+
file_name: features.tar.gz
179+
hashcode: 8d4d67e878208568934c2c3fb1c304f5073b5a89a25a59938d182e360e23473f

mmf/configs/zoo/models.yaml

+26
Original file line numberDiff line numberDiff line change
@@ -8,22 +8,34 @@ visual_bert:
88
hashcode: 9af0b8101579d7587d70d7315940310c7fc5ef7269cba497780922e65e4e000d
99
defaults: ${visual_bert.pretrained.coco}
1010

11+
detectron:
12+
vmb_weights:
13+
version: 1.0_2020_05_03
14+
resources:
15+
- url: mmf://models/detectron/vmb_weights.tar.gz
16+
file_name: vmb_weights.tar.gz
17+
hashcode: 582d85748b2df60e3c6e045b8213c0fd7084054483a3388c55aed5b25bbc3c93
18+
defaults: ${detectron.vmb_weights}
19+
1120
m4c:
1221
defaults: ${m4c.textvqa.defaults}
1322
textvqa:
1423
with_stvqa:
24+
zoo_requirements: detectron.vmb_weights
1525
version: 1.0_2020_04_29
1626
resources:
1727
- url: mmf://models/m4c/m4c.textvqa.with_stvqa.tar.gz
1828
file_name: m4c.textvqa.with_stvqa.tar.gz
1929
hashcode: 2dfd024cb144bb70843033d11dec064bc84937f22fd6561653ce1acd7610285e
2030
ocr_ml:
31+
zoo_requirements: detectron.vmb_weights
2132
version: 1.0_2020_04_29
2233
resources:
2334
- url: mmf://models/m4c/m4c.textvqa.ocr_ml.tar.gz
2435
file_name: m4c.textvqa.ocr_ml.tar.gz
2536
hashcode: 4734dbf1816ffe378f08e69bd85b8cc0e4b2abe9564626505ff93e1ea7aea741
2637
alone:
38+
zoo_requirements: detectron.vmb_weights
2739
version: 1.0_2020_04_29
2840
resources:
2941
- url: mmf://models/m4c/m4c.textvqa.alone.tar.gz
@@ -32,15 +44,29 @@ m4c:
3244
defaults: ${m4c.textvqa.with_stvqa}
3345
stvqa:
3446
defaults:
47+
zoo_requirements: detectron.vmb_weights
3548
version: 1.0_2020_04_29
3649
resources:
3750
- url: mmf://models/m4c/m4c.stvqa.tar.gz
3851
file_name: m4c.stvqa.tar.gz
3952
hashcode: ea4af8737dc04753727b596cefb262afaa1ebdd8f4b9476c4a53d06088cedfb8
4053
ocrvqa:
4154
defaults:
55+
zoo_requirements: detectron.vmb_weights
4256
version: 1.0_2020_04_29
4357
resources:
4458
- url: mmf://models/m4c/m4c.ocrvqa.tar.gz
4559
file_name: m4c.ocrvqa.tar.gz
4660
hashcode: 062d8ca7f47f942c47756574aa5944f3136cef7226173d59fcbd1e00f1a3c42d
61+
62+
63+
m4c_captioner:
64+
defaults: ${m4c_captioner.textcaps.defaults}
65+
textcaps:
66+
defaults:
67+
zoo_requirements: detectron.vmb_weights
68+
version: 1.0_2020_05_03
69+
resources:
70+
- url: mmf://models/m4c_captioner/m4c_captioner.textcaps.tar.gz
71+
file_name: m4c_captioner.textcaps.tar.gz
72+
hashcode: 69c8220750933a0472bfdfb95b83d718dc02f7f41f43ce569c02e903896b2cf4

0 commit comments

Comments
 (0)