[Improvement] Add font color to args in long_video_demo (#947)

* [Improvement] Add font color to args in long_video_demo * [Improvement] Add font color to args in long_video_demo * [Improvement] Add font color to args in long_video_demo * polish * polish Co-authored-by: dreamerlin <528557675@qq.com>
open-mmlab · Jun 22, 2021 · 830f2a4 · 830f2a4
1 parent 0a6fde1
commit 830f2a4
Show file tree

Hide file tree

Showing 2 changed files with 32 additions and 11 deletions.
diff --git a/demo/README.md b/demo/README.md
@@ -309,6 +309,8 @@ Optional arguments:
 - `DEVICE_TYPE`: Type of device to run the demo. Allowed values are cuda device like `cuda:0` or `cpu`. If not specified, it will be set to `cuda:0`.
 - `THRESHOLD`: Threshold of prediction score for action recognition. Only label with score higher than the threshold will be shown. If not specified, it will be set to 0.01.
 - `STRIDE`: By default, the demo generates a prediction for each single frame, which might cost lots of time. To speed up, you can set the argument `STRIDE` and then the demo will generate a prediction every `STRIDE x sample_length` frames (`sample_length` indicates the size of temporal window from which you sample frames, which equals to `clip_len x frame_interval`). For example, if the sample_length is 64 frames and you set `STRIDE` to 0.5, predictions will be generated every 32 frames. If set as 0, predictions will be generated for each frame. The desired value of `STRIDE` is (0, 1], while it also works for `STRIDE > 1` (the generated predictions will be too sparse). Default: 0.
+- `LABEL_COLOR`: Font Color of the labels in (B, G, R). Default is white, that is (256, 256, 256).
+- `MSG_COLOR`: Font Color of the messages in (B, G, R). Default is gray, that is (128, 128, 128).
 
 Examples:
 
@@ -343,11 +345,12 @@ or use checkpoint url from `configs/` to directly load corresponding checkpoint,
       demo/label_map_k400.txt PATH_TO_SAVED_VIDEO --input-step 3 --device cpu --threshold 0.2
     ```
 
-4. Predict different labels in a long video by using a I3D model on gpu, with input_step=1 and threshold=0.01 as default.
+4. Predict different labels in a long video by using a I3D model on gpu, with input_step=1, threshold=0.01 as default and print the labels in cyan.
 
     ```shell
     python demo/long_video_demo.py configs/recognition/i3d/i3d_r50_video_inference_32x2x1_100e_kinetics400_rgb.py \
-      checkpoints/i3d_r50_256p_32x2x1_100e_kinetics400_rgb_20200801-7d9f44de.pth PATH_TO_LONG_VIDEO demo/label_map_k400.txt PATH_TO_SAVED_VIDEO
+      checkpoints/i3d_r50_256p_32x2x1_100e_kinetics400_rgb_20200801-7d9f44de.pth PATH_TO_LONG_VIDEO demo/label_map_k400.txt PATH_TO_SAVED_VIDEO \
+      --label-color 255 255 0
     ```
 
 5. Predict different labels in a long video by using a I3D model on gpu and save the results as a `json` file

diff --git a/demo/long_video_demo.py b/demo/long_video_demo.py
@@ -16,8 +16,6 @@
 
 FONTFACE = cv2.FONT_HERSHEY_COMPLEX_SMALL
 FONTSCALE = 1
-FONTCOLOR = (255, 255, 255)  # BGR, white
-MSGCOLOR = (128, 128, 128)  # BGR, gray
 THICKNESS = 1
 LINETYPE = 1
 
@@ -64,11 +62,30 @@ def parse_args():
         help='override some settings in the used config, the key-value pair '
         'in xxx=yyy format will be merged into config file. For example, '
         "'--cfg-options model.backbone.depth=18 model.backbone.with_cp=True'")
+    parser.add_argument(
+        '--label-color',
+        nargs='+',
+        type=int,
+        default=(255, 255, 255),
+        help='font color (B, G, R) of the labels in output video')
+    parser.add_argument(
+        '--msg-color',
+        nargs='+',
+        type=int,
+        default=(128, 128, 128),
+        help='font color (B, G, R) of the messages in output video')
     args = parser.parse_args()
     return args
 
 
-def show_results_video(result_queue, text_info, thr, msg, frame, video_writer):
+def show_results_video(result_queue,
+                       text_info,
+                       thr,
+                       msg,
+                       frame,
+                       video_writer,
+                       label_color=(255, 255, 255),
+                       msg_color=(128, 128, 128)):
     if len(result_queue) != 0:
         text_info = {}
         results = result_queue.popleft()
@@ -79,14 +96,14 @@ def show_results_video(result_queue, text_info, thr, msg, frame, video_writer):
             location = (0, 40 + i * 20)
             text = selected_label + ': ' + str(round(score, 2))
             text_info[location] = text
-            cv2.putText(frame, text, location, FONTFACE, FONTSCALE, FONTCOLOR,
-                        THICKNESS, LINETYPE)
+            cv2.putText(frame, text, location, FONTFACE, FONTSCALE,
+                        label_color, THICKNESS, LINETYPE)
     elif len(text_info):
         for location, text in text_info.items():
-            cv2.putText(frame, text, location, FONTFACE, FONTSCALE, FONTCOLOR,
-                        THICKNESS, LINETYPE)
+            cv2.putText(frame, text, location, FONTFACE, FONTSCALE,
+                        label_color, THICKNESS, LINETYPE)
     else:
-        cv2.putText(frame, msg, (0, 40), FONTFACE, FONTSCALE, MSGCOLOR,
+        cv2.putText(frame, msg, (0, 40), FONTFACE, FONTSCALE, msg_color,
                     THICKNESS, LINETYPE)
     video_writer.write(frame)
     return text_info
@@ -168,7 +185,8 @@ def show_results(model, data, label, args):
         else:
             text_info = show_results_video(result_queue, text_info,
                                            args.threshold, msg, frame,
-                                           video_writer)
+                                           video_writer, args.label_color,
+                                           args.msg_color)
 
     cap.release()
     cv2.destroyAllWindows()