Skip to content

Commit

Permalink
Merge f92ca92 into a56c949
Browse files Browse the repository at this point in the history
  • Loading branch information
su authored Nov 30, 2020
2 parents a56c949 + f92ca92 commit e80b90e
Show file tree
Hide file tree
Showing 4 changed files with 115 additions and 4 deletions.
1 change: 1 addition & 0 deletions docs/changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
- Support GYM99 data preparation ([#331](https://github.com/open-mmlab/mmaction2/pull/331))
- Add GradCAM utils for recognizer ([#324](https://github.com/open-mmlab/mmaction2/pull/324))
- Add print config script ([#345](https://github.com/open-mmlab/mmaction2/pull/345))
- Add online motion vector decoder ([#291](https://github.com/open-mmlab/mmaction2/pull/291))

**Improvements**
- Support PyTorch 1.7 in CI ([#312](https://github.com/open-mmlab/mmaction2/pull/312))
Expand Down
7 changes: 4 additions & 3 deletions mmaction/datasets/pipelines/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,9 @@
GenerateLocalizationLabels, ImageDecode,
LoadAudioFeature, LoadHVULabel, LoadLocalizationFeature,
LoadProposals, OpenCVDecode, OpenCVInit, PyAVDecode,
PyAVInit, RawFrameDecode, SampleAVAFrames, SampleFrames,
SampleProposalFrames, UntrimmedSampleFrames)
PyAVDecodeMotionVector, PyAVInit, RawFrameDecode,
SampleAVAFrames, SampleFrames, SampleProposalFrames,
UntrimmedSampleFrames)

__all__ = [
'SampleFrames', 'PyAVDecode', 'DecordDecode', 'DenseSampleFrames',
Expand All @@ -31,5 +32,5 @@
'FormatAudioShape', 'LoadAudioFeature', 'AudioFeatureSelector',
'AudioDecodeInit', 'EntityBoxPad', 'EntityBoxFlip', 'EntityBoxCrop',
'EntityBoxRescale', 'EntityBoxClip', 'RandomScale', 'ImageDecode',
'BuildPseudoClip', 'RandomRescale'
'BuildPseudoClip', 'RandomRescale', 'PyAVDecodeMotionVector'
]
82 changes: 82 additions & 0 deletions mmaction/datasets/pipelines/loading.py
Original file line number Diff line number Diff line change
Expand Up @@ -814,6 +814,88 @@ def __repr__(self):
return repr_str


@PIPELINES.register_module()
class PyAVDecodeMotionVector(PyAVDecode):
"""Using pyav to decode the motion vectors from video.
Reference: https://github.com/PyAV-Org/PyAV/
blob/main/tests/test_decode.py
Required keys are "video_reader" and "frame_inds",
added or modified keys are "motion_vectors", "frame_inds".
Args:
multi_thread (bool): If set to True, it will apply multi
thread processing. Default: False.
"""

@staticmethod
def _parse_vectors(mv, vectors, height, width):
"""Parse the returned vectors."""
(w, h, src_x, src_y, dst_x,
dst_y) = (vectors['w'], vectors['h'], vectors['src_x'],
vectors['src_y'], vectors['dst_x'], vectors['dst_y'])
val_x = dst_x - src_x
val_y = dst_y - src_y
start_x = dst_x - w // 2
start_y = dst_y - h // 2
end_x = start_x + w
end_y = start_y + h
for sx, ex, sy, ey, vx, vy in zip(start_x, end_x, start_y, end_y,
val_x, val_y):
if (sx >= 0 and ex < width and sy >= 0 and ey < height):
mv[sy:ey, sx:ex] = (vx, vy)

return mv

def __call__(self, results):
"""Perform the PyAV motion vector decoding.
Args:
results (dict): The resulting dict to be modified and passed
to the next transform in pipeline.
"""
container = results['video_reader']
imgs = list()

if self.multi_thread:
container.streams.video[0].thread_type = 'AUTO'
if results['frame_inds'].ndim != 1:
results['frame_inds'] = np.squeeze(results['frame_inds'])

# set max index to make early stop
max_idx = max(results['frame_inds'])
i = 0
stream = container.streams.video[0]
codec_context = stream.codec_context
codec_context.options = {'flags2': '+export_mvs'}
for packet in container.demux(stream):
for frame in packet.decode():
if i > max_idx + 1:
break
i += 1
height = frame.height
width = frame.width
mv = np.zeros((height, width, 2), dtype=np.int8)
vectors = frame.side_data.get('MOTION_VECTORS')
if frame.key_frame:
# Key frame don't have motion vectors
assert vectors is None
if vectors is not None and len(vectors) > 0:
mv = self._parse_vectors(mv, vectors.to_ndarray(), height,
width)
imgs.append(mv)

results['video_reader'] = None
del container

# the available frame in pyav may be less than its length,
# which may raise error
results['motion_vectors'] = np.array(
[imgs[i % len(imgs)] for i in results['frame_inds']])
return results


@PIPELINES.register_module()
class DecordInit:
"""Using decord to initialize the video_reader.
Expand Down
29 changes: 28 additions & 1 deletion tests/test_data/test_loading.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@
LoadAudioFeature, LoadHVULabel,
LoadLocalizationFeature,
LoadProposals, OpenCVDecode,
OpenCVInit, PyAVDecode, PyAVInit,
OpenCVInit, PyAVDecode,
PyAVDecodeMotionVector, PyAVInit,
RawFrameDecode, SampleAVAFrames,
SampleFrames, SampleProposalFrames,
UntrimmedSampleFrames)
Expand Down Expand Up @@ -1466,3 +1467,29 @@ def test_audio_feature_selector(self):
assert repr(audio_feature_selector) == (
f'{audio_feature_selector.__class__.__name__}('
f'fix_length={128})')

def test_pyav_decode_motion_vector(self):
pyav_init = PyAVInit()
pyav = PyAVDecodeMotionVector()

# test pyav with 2-dim input
results = {
'filename': self.video_path,
'frame_inds': np.arange(0, 32, 1)[:, np.newaxis]
}
results = pyav_init(results)
results = pyav(results)
target_keys = ['motion_vectors']
assert self.check_keys_contain(results.keys(), target_keys)

# test pyav with 1 dim input
results = {
'filename': self.video_path,
'frame_inds': np.arange(0, 32, 1)
}
pyav_init = PyAVInit()
results = pyav_init(results)
pyav = PyAVDecodeMotionVector()
results = pyav(results)

assert self.check_keys_contain(results.keys(), target_keys)

0 comments on commit e80b90e

Please sign in to comment.