open-mmlab · dreamerlin · Jun 22, 2021 · Jun 20, 2021 · Jun 20, 2021 · Jun 21, 2021
diff --git a/demo/README.md b/demo/README.md
@@ -309,6 +309,7 @@ Optional arguments:
 - `DEVICE_TYPE`: Type of device to run the demo. Allowed values are cuda device like `cuda:0` or `cpu`. If not specified, it will be set to `cuda:0`.
 - `THRESHOLD`: Threshold of prediction score for action recognition. Only label with score higher than the threshold will be shown. If not specified, it will be set to 0.01.
 - `STRIDE`: By default, the demo generates a prediction for each single frame, which might cost lots of time. To speed up, you can set the argument `STRIDE` and then the demo will generate a prediction every `STRIDE x sample_length` frames (`sample_length` indicates the size of temporal window from which you sample frames, which equals to `clip_len x frame_interval`). For example, if the sample_length is 64 frames and you set `STRIDE` to 0.5, predictions will be generated every 32 frames. If set as 0, predictions will be generated for each frame. The desired value of `STRIDE` is (0, 1], while it also works for `STRIDE > 1` (the generated predictions will be too sparse). Default: 0.
+- `FONT_COLOR`: Color for the labels in BGR. Default is white.
 
 Examples:
 
@@ -343,11 +344,12 @@ or use checkpoint url from `configs/` to directly load corresponding checkpoint,
       demo/label_map_k400.txt PATH_TO_SAVED_VIDEO --input-step 3 --device cpu --threshold 0.2
     ```
 
-4. Predict different labels in a long video by using a I3D model on gpu, with input_step=1 and threshold=0.01 as default.
+4. Predict different labels in a long video by using a I3D model on gpu, with input_step=1, threshold=0.01 as default and print the labels in cyan.
 
     ```shell
     python demo/long_video_demo.py configs/recognition/i3d/i3d_r50_video_inference_32x2x1_100e_kinetics400_rgb.py \
-      checkpoints/i3d_r50_256p_32x2x1_100e_kinetics400_rgb_20200801-7d9f44de.pth PATH_TO_LONG_VIDEO demo/label_map_k400.txt PATH_TO_SAVED_VIDEO
+      checkpoints/i3d_r50_256p_32x2x1_100e_kinetics400_rgb_20200801-7d9f44de.pth PATH_TO_LONG_VIDEO demo/label_map_k400.txt PATH_TO_SAVED_VIDEO \
+      --font-color 255 255 0
     ```
 
 5. Predict different labels in a long video by using a I3D model on gpu and save the results as a `json` file

diff --git a/demo/long_video_demo.py b/demo/long_video_demo.py
@@ -16,8 +16,6 @@
 
 FONTFACE = cv2.FONT_HERSHEY_COMPLEX_SMALL
 FONTSCALE = 1
-FONTCOLOR = (255, 255, 255)  # BGR, white
-MSGCOLOR = (128, 128, 128)  # BGR, gray
 THICKNESS = 1
 LINETYPE = 1
 
@@ -64,11 +62,17 @@ def parse_args():
         help='override some settings in the used config, the key-value pair '
         'in xxx=yyy format will be merged into config file. For example, '
         "'--cfg-options model.backbone.depth=18 model.backbone.with_cp=True'")
+    parser.add_argument(
+        '--font-color',
+        nargs='+',
+        type=int,
+        default=(255, 255, 255),
+        help='font color (B, G, R) of the labels in output video')
     args = parser.parse_args()
     return args
 
 
-def show_results_video(result_queue, text_info, thr, msg, frame, video_writer):
+def show_results_video(result_queue, text_info, msg, thr, clr, fr, v_writer):
     if len(result_queue) != 0:
         text_info = {}
         results = result_queue.popleft()
@@ -79,20 +83,20 @@ def show_results_video(result_queue, text_info, thr, msg, frame, video_writer):
             location = (0, 40 + i * 20)
             text = selected_label + ': ' + str(round(score, 2))
             text_info[location] = text
-            cv2.putText(frame, text, location, FONTFACE, FONTSCALE, FONTCOLOR,
+            cv2.putText(fr, text, location, FONTFACE, FONTSCALE, clr,
                         THICKNESS, LINETYPE)
     elif len(text_info):
         for location, text in text_info.items():
-            cv2.putText(frame, text, location, FONTFACE, FONTSCALE, FONTCOLOR,
+            cv2.putText(fr, text, location, FONTFACE, FONTSCALE, clr,
                         THICKNESS, LINETYPE)
     else:
-        cv2.putText(frame, msg, (0, 40), FONTFACE, FONTSCALE, MSGCOLOR,
-                    THICKNESS, LINETYPE)
-    video_writer.write(frame)
+        cv2.putText(fr, msg, (0, 40), FONTFACE, FONTSCALE, clr, THICKNESS,
+                    LINETYPE)
+    v_writer.write(fr)
     return text_info
 
 
-def get_results_json(result_queue, text_info, thr, msg, ind, out_json):
+def get_results_json(result_queue, text_info, msg, thr, ind, out_json):
     if len(result_queue) != 0:
         text_info = {}
         results = result_queue.popleft()
@@ -163,12 +167,12 @@ def show_results(model, data, label, args):
 
         if args.out_file.endswith('.json'):
             text_info, out_json = get_results_json(result_queue, text_info,
-                                                   args.threshold, msg, ind,
+                                                   msg, args.threshold, ind,
                                                    out_json)
         else:
-            text_info = show_results_video(result_queue, text_info,
-                                           args.threshold, msg, frame,
-                                           video_writer)
+            text_info = show_results_video(result_queue, text_info, msg,
+                                           args.threshold, args.font_color,
+                                           frame, video_writer)
 
     cap.release()
     cv2.destroyAllWindows()