-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathrepair_caption_file.py
64 lines (49 loc) · 1.87 KB
/
repair_caption_file.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import os
import sys
import glob
from wrapped_json import json
show_name = sys.argv[1]
video_ids = []
with open(f'data/git/shows/{show_name}.json') as f:
data = json.load(f)
for season in data['seasons']:
for episode in season['episodes']:
video_ids.append(episode['id'])
files = []
for video_id in video_ids:
files += glob.glob(f'data/remote/public/subtitles/{video_id}*.json')
files += glob.glob(f'data/remote/private/caption_data/raw_captions/{video_id}*.json')
files += glob.glob(f'data/remote/private/caption_data/meta_trimmed_captions/{video_id}*.json')
files += glob.glob(f'data/remote/private/caption_data/captions_all_translations/{video_id}*.json')
files += glob.glob(f'data/remote/private/caption_data/captions_human_translations/{video_id}*.json')
for filename in files:
if filename.endswith('merkl'):
continue
print('Processing', filename)
with open(filename, 'r') as f:
data = json.loads(f.read())
if 'frame_size' not in data or data['frame_size'] is None:
continue
y_offset = data['caption_top'] * data['frame_size'][0]
is_already_absolute = False
for line in data['lines']:
if line[3] is None:
continue
rects = line[3] if isinstance(line[3][0], list) else [line[3]]
for rect in rects:
if rect is None or rect[0] is None:
continue
#print('Prev rect', rect)
if rect[2] > 50:
#print('Rect is already relative to 0,0')
is_already_absolute = True
break
rect[2] += y_offset
rect[3] += y_offset
rect[2] = round(rect[2])
rect[3] = round(rect[3])
print('After rect', rect)
data['version'] = 1
data['show_name'] = show_name
with open(filename, 'w') as f:
json.dump(data, f)