facebookresearch
diff --git a/‎mmf/common/test_reporter.py
+14-18 b/‎mmf/common/test_reporter.py
+14-18
diff --git a/‎mmf/configs/datasets/coco/defaults.yaml
+1 b/‎mmf/configs/datasets/coco/defaults.yaml
+1
diff --git a/‎mmf/configs/datasets/coco/ocr_en.yaml
+63 b/‎mmf/configs/datasets/coco/ocr_en.yaml
+63
diff --git a/‎mmf/configs/datasets/m4c_textcaps/defaults.yaml ‎mmf/configs/datasets/textcaps/defaults.yaml
+17-12 b/‎mmf/configs/datasets/m4c_textcaps/defaults.yaml ‎mmf/configs/datasets/textcaps/defaults.yaml
+17-12
diff --git a/‎mmf/configs/defaults.yaml
+2-1 b/‎mmf/configs/defaults.yaml
+2-1
diff --git a/‎mmf/configs/models/butd/defaults.yaml
+2-2 b/‎mmf/configs/models/butd/defaults.yaml
+2-2
diff --git a/‎mmf/configs/models/lorra/defaults.yaml
+2-2 b/‎mmf/configs/models/lorra/defaults.yaml
+2-2
diff --git a/‎mmf/configs/models/mmbt/with_features.yaml
+2-2 b/‎mmf/configs/models/mmbt/with_features.yaml
+2-2
diff --git a/‎mmf/configs/models/pythia/defaults.yaml
+2-2 b/‎mmf/configs/models/pythia/defaults.yaml
+2-2
diff --git a/‎mmf/configs/models/unimodal/with_features.yaml
+2-2 b/‎mmf/configs/models/unimodal/with_features.yaml
+2-2
diff --git a/‎mmf/configs/zoo/datasets.yaml
+43 b/‎mmf/configs/zoo/datasets.yaml
+43
diff --git a/‎mmf/configs/zoo/models.yaml
+26 b/‎mmf/configs/zoo/models.yaml
+26
@@ -9,9 +9,13 @@
 from mmf.common.batch_collator import BatchCollator
 from mmf.common.registry import registry
 from mmf.utils.configuration import get_mmf_env
-from mmf.utils.distributed import gather_tensor, get_world_size, is_master
+from mmf.utils.distributed import gather_tensor, is_master
 from mmf.utils.file_io import PathManager
-from mmf.utils.general import ckpt_name_from_core_args, foldername_from_config_override
+from mmf.utils.general import (
+    ckpt_name_from_core_args,
+    foldername_from_config_override,
+    get_batch_size,
+)
 from mmf.utils.timer import Timer
 
 
@@ -103,24 +107,15 @@ def get_dataloader(self):
     def _add_extra_args_for_dataloader(self, other_args=None):
         if other_args is None:
             other_args = {}
-        training = self.config.training
 
-        if training.local_rank is not None and training.distributed:
-            other_args["sampler"] = DistributedSampler(self.current_dataset)
-        else:
-            other_args["shuffle"] = True
-
-        batch_size = training.batch_size
-
-        world_size = get_world_size()
-
-        if batch_size % world_size != 0:
-            raise RuntimeError(
-                "Batch size {} must be divisible by number "
-                "of GPUs {} used.".format(batch_size, world_size)
+        if torch.distributed.is_initialized():
+            other_args["sampler"] = DistributedSampler(
+                self.current_dataset, shuffle=False
             )
+        else:
+            other_args["shuffle"] = False
 
-        other_args["batch_size"] = batch_size // world_size
+        other_args["batch_size"] = get_batch_size()
 
         return other_args
 
@@ -143,7 +138,8 @@ def add_to_report(self, report):
             report.scores = gather_tensor(report.scores).view(
                 -1, report.scores.size(-1)
             )
-            report.question_id = gather_tensor(report.question_id).view(-1)
+            if "question_id" in report:
+                report.question_id = gather_tensor(report.question_id).view(-1)
             if "image_id" in report:
                 _, enc_size = report.image_id.size()
                 report.image_id = gather_tensor(report.image_id)
 
@@ -5,6 +5,7 @@ dataset_config:
     fast_read: false
     use_images: false
     use_features: true
+    annotation_style: coco
     features:
         train:
         - coco/detectron_fix_100/fc6/train_val_2014
 
@@ -0,0 +1,63 @@
+dataset_config:
+  coco:
+    data_dir: ${env.data_dir}/datasets
+    use_images: false
+    use_features: true
+    use_ocr: true
+    use_ocr_info: true
+    use_order_vectors: true
+    return_features_info: true
+    zoo_requirements:
+    - coco.m4c_captioner
+    - coco.ocr_en
+    annotation_style: textcaps
+    features:
+      train:
+      - coco/m4c_captioner/features/detectron.lmdb,coco/ocr_en/features/ocr_en_frcn_features.lmdb
+      val:
+      - coco/m4c_captioner/features/detectron.lmdb,coco/ocr_en/features/ocr_en_frcn_features.lmdb
+      test:
+      - coco/m4c_captioner/features/detectron.lmdb,coco/ocr_en/features/ocr_en_frcn_features.lmdb
+    annotations:
+      train:
+      - coco/m4c_captioner/annotations/imdb_karpathy_train.npy
+      val:
+      - coco/m4c_captioner/annotations/imdb_karpathy_val_filtered_by_image_id.npy  # only one sample per image_id
+      test:
+      - coco/m4c_captioner/annotations/imdb_karpathy_test_filtered_by_image_id.npy  # only one sample per image_id
+    processors:
+      text_processor:
+        type: bert_tokenizer
+        params:
+          tokenizer_config:
+            type: bert-base-uncased
+            params:
+              do_lower_case: true
+          max_seq_length: 3
+      answer_processor:
+        type: m4c_caption
+        params:
+          vocab_file: coco/m4c_captioner/extras/vocabs/vocab_joint_textcaps_coco_threshold_10.txt
+          preprocessor:
+            type: simple_word
+            params: {}
+          context_preprocessor:
+            type: simple_word
+            params: {}
+          max_length: 50
+          max_copy_steps: 30
+          num_answers: 1
+      caption_processor: null
+      context_processor:
+        type: fasttext
+        params:
+          max_length: 50
+          model_file: wiki.en.bin
+      copy_processor:
+        type: copy
+        params:
+          max_length: 100
+      phoc_processor:
+        type: phoc
+        params:
+          max_length: 50
@@ -1,38 +1,43 @@
 dataset_config:
-  m4c_textcaps:
-    data_dir: ${env.data_dir}
+  textcaps:
+    data_dir: ${env.data_dir}/datasets
     depth_first: false
     fast_read: false
+    zoo_requirements:
+    - textcaps.defaults
+    - textvqa.defaults
     max_features: 100
     use_images: false
     use_features: true
+    use_order_vectors: true
+    annotation_style: textcaps
     features:
       train:
-      - open_images/detectron_fix_100/fc6/train,m4c_textvqa_ocr_en_frcn_features/train_images
+      - textvqa/defaults/features/open_images/detectron.lmdb
       val:
-      - open_images/detectron_fix_100/fc6/train,m4c_textvqa_ocr_en_frcn_features/train_images
+      - textvqa/defaults/features/open_images/detectron.lmdb
       test:
-      - open_images/detectron_fix_100/fc6/test,m4c_textvqa_ocr_en_frcn_features/test_images
+      - textvqa/defaults/features/open_images/detectron.lmdb
     annotations:
       train:
-      - imdb/m4c_textcaps/imdb_train.npy
+      - textcaps/defaults/annotations/imdb_train.npy
       val:
-      - imdb/m4c_textcaps/imdb_val_filtered_by_image_id.npy  # only one sample per image_id
+      - textcaps/defaults/annotations/imdb_val_filtered_by_image_id.npy  # only one sample per image_id
       test:
-      - imdb/m4c_textcaps/imdb_test_filtered_by_image_id.npy  # only one sample per image_id
+      - textcaps/defaults/annotations/imdb_test_filtered_by_image_id.npy  # only one sample per image_id
     processors:
       text_processor:
-        type: m4c_bert_tokenizer
+        type: bert_tokenizer
         params:
           tokenizer_config:
             type: bert-base-uncased
             params:
               do_lower_case: true
-          max_length: 1
+          max_seq_length: 3
       answer_processor:
         type: m4c_caption
         params:
-          vocab_file: m4c_captioner_vocabs/textcaps/vocab_textcap_threshold_10.txt
+          vocab_file: textcaps/defaults/extras/vocabs/vocab_textcaps_threshold_10.txt
           preprocessor:
             type: simple_word
             params: {}
@@ -67,5 +72,5 @@ dataset_config:
     use_ocr_info: true
 
 training:
-    monitored_metric: m4c_textcaps/textcaps_bleu4
+    monitored_metric: textcaps/textcaps_bleu4
     metric_minimize: false
@@ -94,8 +94,9 @@ training:
     # Iteration until which warnup should be done
     warmup_iterations: 1000
 
-    # Local rank of the GPU device
+    # Device on which the model will be trained. Set 'cpu' to train/infer on CPU
     device: cuda
+    # Local rank of the GPU device
     local_rank: null
 
     # Use to load specific modules from checkpoint to your model,
 
@@ -29,8 +29,8 @@ model_config:
     image_feature_encodings:
     - type: finetune_faster_rcnn_fpn_fc7
       params:
-        bias_file: detectron/fc6/fc7_b.pkl
-        weights_file: detectron/fc6/fc7_w.pkl
+        bias_file: models/detectron.defaults/fc7_b.pkl
+        weights_file: models/detectron.defaults/fc7_w.pkl
         model_data_dir: ${model_config.butd.model_data_dir}
     inference:
       type: greedy
@@ -39,8 +39,8 @@ model_config:
     image_feature_encodings:
     - type: finetune_faster_rcnn_fpn_fc7
       params:
-        bias_file: detectron/fc6/fc7_b.pkl
-        weights_file: detectron/fc6/fc7_w.pkl
+        bias_file: models/detectron.defaults/fc7_b.pkl
+        weights_file: models/detectron.defaults/fc7_w.pkl
         model_data_dir: ${model_config.lorra.model_data_dir}
     - type: default
       params:
 
@@ -6,6 +6,6 @@ model_config:
       type: finetune_faster_rcnn_fpn_fc7
       params:
         in_dim: 2048
-        bias_file: detectron/fc6/fc7_b.pkl
-        weights_file: detectron/fc6/fc7_w.pkl
+        bias_file: models/detectron.defaults/fc7_b.pkl
+        weights_file: models/detectron.defaults/fc7_w.pkl
         model_data_dir: ${model_config.mmbt.model_data_dir}
@@ -25,8 +25,8 @@ model_config:
     image_feature_encodings:
     - type: finetune_faster_rcnn_fpn_fc7
       params:
-        bias_file: detectron/fc6/fc7_b.pkl
-        weights_file: detectron/fc6/fc7_w.pkl
+        bias_file: models/detectron.defaults/fc7_b.pkl
+        weights_file: models/detectron.defaults/fc7_w.pkl
         model_data_dir: ${model_config.pythia.model_data_dir}
     - type: default
       params:
 
@@ -6,6 +6,6 @@ model_config:
       type: finetune_faster_rcnn_fpn_fc7
       params:
         in_dim: 2048
-        bias_file: detectron/fc6/fc7_b.pkl
-        weights_file: detectron/fc6/fc7_w.pkl
+        bias_file: models/detectron.defaults/fc7_b.pkl
+        weights_file: models/detectron.defaults/fc7_w.pkl
         model_data_dir: ${model_config.unimodal_image.model_data_dir}
@@ -134,3 +134,46 @@ ocrvqa:
       - url: mmf://datasets/ocrvqa/ocr_en/features/features.tar.gz
         file_name: features.tar.gz
         hashcode: 1c9eb9df544d431f438d340afe493a4d3db759eedd485033f88fc639106edeb8
+
+
+textcaps:
+  defaults:
+    version: 1.0_2020_05_04
+    resources:
+      annotations:
+      - url: mmf://datasets/textcaps/defaults/annotations/annotations.tar.gz
+        file_name: annotations.tar.gz
+        hashcode: cfbe34a0653d18155e5b7de37724888e51c6b0e573fe436bc22ec074338d5456
+      extras:
+      - url: mmf://datasets/textcaps/defaults/extras.tar.gz
+        file_name: extras.tar.gz
+        hashcode: 6a92426b646b61eefef8d42ca38bd852375d13c7edd3cf511201633bc16aa14c
+
+coco:
+  # TODO: Fill defaults for COCO later
+  defaults:
+    version: 1.0_2020_05_04
+    resources: []
+  m4c_captioner:
+    version: 1.0_2020_05_04
+    resources:
+      features:
+      - url: mmf://datasets/coco/m4c_captioner/features/features.tar.gz
+        file_name: features.tar.gz
+        hashcode: b36428b7aac2c1b46d4fa75155ea7cd0f9a94444481563c87a5af64dd05e315f
+      annotations:
+      - url: mmf://datasets/coco/m4c_captioner/annotations/annotations.tar.gz
+        file_name: annotations.tar.gz
+        hashcode: 828a7a4310a6eddf2d13b9a6b11ca6877cb988680f2fbc838b600f6d8e4dcbfc
+      extras:
+      - url: mmf://datasets/coco/m4c_captioner/extras.tar.gz
+        file_name: extras.tar.gz
+        hashcode: a859c41693abb40362b5d0a2d844e612713103ff4fef1159e0351c34e5e6fb46
+
+  ocr_en:
+    version: 1.0_2020_05_04
+    resources:
+      features:
+      - url: mmf://datasets/coco/ocr_en/features/features.tar.gz
+        file_name: features.tar.gz
+        hashcode: 8d4d67e878208568934c2c3fb1c304f5073b5a89a25a59938d182e360e23473f
@@ -8,22 +8,34 @@ visual_bert:
         hashcode: 9af0b8101579d7587d70d7315940310c7fc5ef7269cba497780922e65e4e000d
     defaults: ${visual_bert.pretrained.coco}
 
+detectron:
+  vmb_weights:
+    version: 1.0_2020_05_03
+    resources:
+    - url: mmf://models/detectron/vmb_weights.tar.gz
+      file_name: vmb_weights.tar.gz
+      hashcode: 582d85748b2df60e3c6e045b8213c0fd7084054483a3388c55aed5b25bbc3c93
+  defaults: ${detectron.vmb_weights}
+
 m4c:
   defaults: ${m4c.textvqa.defaults}
   textvqa:
     with_stvqa:
+      zoo_requirements: detectron.vmb_weights
       version: 1.0_2020_04_29
       resources:
       - url: mmf://models/m4c/m4c.textvqa.with_stvqa.tar.gz
         file_name: m4c.textvqa.with_stvqa.tar.gz
         hashcode: 2dfd024cb144bb70843033d11dec064bc84937f22fd6561653ce1acd7610285e
     ocr_ml:
+      zoo_requirements: detectron.vmb_weights
       version: 1.0_2020_04_29
       resources:
       - url: mmf://models/m4c/m4c.textvqa.ocr_ml.tar.gz
         file_name: m4c.textvqa.ocr_ml.tar.gz
         hashcode: 4734dbf1816ffe378f08e69bd85b8cc0e4b2abe9564626505ff93e1ea7aea741
     alone:
+      zoo_requirements: detectron.vmb_weights
       version: 1.0_2020_04_29
       resources:
       - url: mmf://models/m4c/m4c.textvqa.alone.tar.gz
@@ -32,15 +44,29 @@ m4c:
     defaults: ${m4c.textvqa.with_stvqa}
   stvqa:
     defaults:
+      zoo_requirements: detectron.vmb_weights
       version: 1.0_2020_04_29
       resources:
       - url: mmf://models/m4c/m4c.stvqa.tar.gz
         file_name: m4c.stvqa.tar.gz
         hashcode: ea4af8737dc04753727b596cefb262afaa1ebdd8f4b9476c4a53d06088cedfb8
   ocrvqa:
     defaults:
+      zoo_requirements: detectron.vmb_weights
       version: 1.0_2020_04_29
       resources:
       - url: mmf://models/m4c/m4c.ocrvqa.tar.gz
         file_name: m4c.ocrvqa.tar.gz
         hashcode: 062d8ca7f47f942c47756574aa5944f3136cef7226173d59fcbd1e00f1a3c42d
+
+
+m4c_captioner:
+  defaults: ${m4c_captioner.textcaps.defaults}
+  textcaps:
+    defaults:
+      zoo_requirements: detectron.vmb_weights
+      version: 1.0_2020_05_03
+      resources:
+      - url: mmf://models/m4c_captioner/m4c_captioner.textcaps.tar.gz
+        file_name: m4c_captioner.textcaps.tar.gz
+        hashcode: 69c8220750933a0472bfdfb95b83d718dc02f7f41f43ce569c02e903896b2cf4