Skip to content

Commit 646078d

Browse files
Allow for styles to persist over lines (#123)
* Allow for styles to persist over lines * Update to pass by reference for style attributes
1 parent f285923 commit 646078d

8 files changed

+134
-63
lines changed

srt.go

+23-27
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ func ReadFromSRT(i io.Reader) (o *Subtitles, err error) {
4242
var line string
4343
var lineNum int
4444
var s = &Item{}
45+
var sa = &StyleAttributes{}
4546
for scanner.Scan() {
4647
// Fetch line
4748
line = strings.TrimSpace(scanner.Text())
@@ -58,6 +59,9 @@ func ReadFromSRT(i io.Reader) (o *Subtitles, err error) {
5859

5960
// Line contains time boundaries
6061
if strings.Contains(line, srtTimeBoundariesSeparator) {
62+
// Reset style attributes
63+
sa = &StyleAttributes{}
64+
6165
// Remove last item of previous subtitle since it should be the index.
6266
// If the last line is empty then the item is missing an index.
6367
var index string
@@ -118,7 +122,7 @@ func ReadFromSRT(i io.Reader) (o *Subtitles, err error) {
118122
o.Items = append(o.Items, s)
119123
} else {
120124
// Add text
121-
if l := parseTextSrt(strings.TrimSpace(line)); len(l.Items) > 0 {
125+
if l := parseTextSrt(line, sa); len(l.Items) > 0 {
122126
s.Lines = append(s.Lines, l)
123127
}
124128
}
@@ -127,7 +131,7 @@ func ReadFromSRT(i io.Reader) (o *Subtitles, err error) {
127131
}
128132

129133
// parseTextSrt parses the input line to fill the Line
130-
func parseTextSrt(i string) (o Line) {
134+
func parseTextSrt(i string, sa *StyleAttributes) (o Line) {
131135
// special handling needed for empty line
132136
if strings.TrimSpace(i) == "" {
133137
o.Items = []LineItem{{Text: ""}}
@@ -138,13 +142,6 @@ func parseTextSrt(i string) (o Line) {
138142
tr := html.NewTokenizer(strings.NewReader(i))
139143

140144
// Loop
141-
var (
142-
bold bool
143-
italic bool
144-
underline bool
145-
color *string
146-
pos byte
147-
)
148145
for {
149146
// Get next tag
150147
t := tr.Next()
@@ -164,46 +161,45 @@ func parseTextSrt(i string) (o Line) {
164161
// Parse italic/bold/underline
165162
switch token.Data {
166163
case "b":
167-
bold = false
164+
sa.SRTBold = false
168165
case "i":
169-
italic = false
166+
sa.SRTItalics = false
170167
case "u":
171-
underline = false
168+
sa.SRTUnderline = false
172169
case "font":
173-
color = nil
170+
sa.SRTColor = nil
174171
}
175172
case html.StartTagToken:
176173
// Parse italic/bold/underline
177174
switch token.Data {
178175
case "b":
179-
bold = true
176+
sa.SRTBold = true
180177
case "i":
181-
italic = true
178+
sa.SRTItalics = true
182179
case "u":
183-
underline = true
180+
sa.SRTUnderline = true
184181
case "font":
185182
if c := htmlTokenAttribute(&token, "color"); c != nil {
186-
color = c
183+
sa.SRTColor = c
187184
}
188185
}
189186
case html.TextToken:
190187
if s := strings.TrimSpace(raw); s != "" {
191188
// Get style attribute
192-
var sa *StyleAttributes
193-
if bold || italic || underline || color != nil || pos != 0 {
194-
sa = &StyleAttributes{
195-
SRTBold: bold,
196-
SRTColor: color,
197-
SRTItalics: italic,
198-
SRTPosition: pos,
199-
SRTUnderline: underline,
189+
var styleAttributes *StyleAttributes
190+
if sa.SRTBold || sa.SRTColor != nil || sa.SRTItalics || sa.SRTUnderline {
191+
styleAttributes = &StyleAttributes{
192+
SRTBold: sa.SRTBold,
193+
SRTColor: sa.SRTColor,
194+
SRTItalics: sa.SRTItalics,
195+
SRTUnderline: sa.SRTUnderline,
200196
}
201-
sa.propagateSRTAttributes()
197+
styleAttributes.propagateSRTAttributes()
202198
}
203199

204200
// Append item
205201
o.Items = append(o.Items, LineItem{
206-
InlineStyle: sa,
202+
InlineStyle: styleAttributes,
207203
Text: unescapeHTML(s),
208204
})
209205
}

srt_test.go

+18-7
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ func TestSRTStyled(t *testing.T) {
6060
assert.NoError(t, err)
6161

6262
// assert the items are properly parsed
63-
assert.Len(t, s.Items, 6)
63+
assert.Len(t, s.Items, 9)
6464
assert.Equal(t, 17*time.Second+985*time.Millisecond, s.Items[0].StartAt)
6565
assert.Equal(t, 20*time.Second+521*time.Millisecond, s.Items[0].EndAt)
6666
assert.Equal(t, "[instrumental music]", s.Items[0].Lines[0].String())
@@ -80,36 +80,47 @@ func TestSRTStyled(t *testing.T) {
8080
assert.Equal(t, 1*time.Minute+31*time.Second+992*time.Millisecond, s.Items[5].EndAt)
8181
assert.Equal(t, "[automated]", s.Items[5].Lines[0].String())
8282
assert.Equal(t, "'The time is 7:35.'", s.Items[5].Lines[1].String())
83+
assert.Equal(t, "Test with multi line italics", s.Items[6].Lines[0].String())
84+
assert.Equal(t, "Terminated on the next line", s.Items[6].Lines[1].String())
85+
assert.Equal(t, "Unterminated styles", s.Items[7].Lines[0].String())
86+
assert.Equal(t, "Do no fall to the next item", s.Items[8].Lines[0].String())
8387

8488
// assert the styles of the items
85-
assert.Len(t, s.Items, 6)
8689
assert.Equal(t, "#00ff00", *s.Items[0].Lines[0].Items[0].InlineStyle.SRTColor)
87-
assert.Zero(t, s.Items[0].Lines[0].Items[0].InlineStyle.SRTPosition)
8890
assert.True(t, s.Items[0].Lines[0].Items[0].InlineStyle.SRTBold)
8991
assert.False(t, s.Items[0].Lines[0].Items[0].InlineStyle.SRTItalics)
9092
assert.False(t, s.Items[0].Lines[0].Items[0].InlineStyle.SRTUnderline)
9193
assert.Equal(t, "#ff00ff", *s.Items[1].Lines[0].Items[0].InlineStyle.SRTColor)
92-
assert.Zero(t, s.Items[1].Lines[0].Items[0].InlineStyle.SRTPosition)
9394
assert.False(t, s.Items[1].Lines[0].Items[0].InlineStyle.SRTBold)
9495
assert.False(t, s.Items[1].Lines[0].Items[0].InlineStyle.SRTItalics)
9596
assert.False(t, s.Items[1].Lines[0].Items[0].InlineStyle.SRTUnderline)
9697
assert.Equal(t, "#00ff00", *s.Items[2].Lines[0].Items[0].InlineStyle.SRTColor)
97-
assert.Zero(t, s.Items[2].Lines[0].Items[0].InlineStyle.SRTPosition)
9898
assert.False(t, s.Items[2].Lines[0].Items[0].InlineStyle.SRTBold)
9999
assert.False(t, s.Items[2].Lines[0].Items[0].InlineStyle.SRTItalics)
100100
assert.False(t, s.Items[2].Lines[0].Items[0].InlineStyle.SRTUnderline)
101101
assert.Nil(t, s.Items[3].Lines[0].Items[0].InlineStyle.SRTColor)
102-
assert.Zero(t, s.Items[3].Lines[0].Items[0].InlineStyle.SRTPosition)
103102
assert.True(t, s.Items[3].Lines[0].Items[0].InlineStyle.SRTBold)
104103
assert.False(t, s.Items[3].Lines[0].Items[0].InlineStyle.SRTItalics)
105104
assert.True(t, s.Items[3].Lines[0].Items[0].InlineStyle.SRTUnderline)
106105
assert.Nil(t, s.Items[4].Lines[0].Items[0].InlineStyle)
107106
assert.Nil(t, s.Items[5].Lines[0].Items[0].InlineStyle)
108107
assert.Nil(t, s.Items[5].Lines[1].Items[0].InlineStyle.SRTColor)
109-
assert.Zero(t, s.Items[5].Lines[1].Items[0].InlineStyle.SRTPosition)
110108
assert.False(t, s.Items[5].Lines[1].Items[0].InlineStyle.SRTBold)
111109
assert.True(t, s.Items[5].Lines[1].Items[0].InlineStyle.SRTItalics)
112110
assert.False(t, s.Items[5].Lines[1].Items[0].InlineStyle.SRTUnderline)
111+
assert.True(t, s.Items[6].Lines[0].Items[0].InlineStyle.SRTItalics)
112+
assert.False(t, s.Items[6].Lines[0].Items[0].InlineStyle.SRTUnderline)
113+
assert.False(t, s.Items[6].Lines[0].Items[0].InlineStyle.SRTBold)
114+
assert.Nil(t, s.Items[6].Lines[0].Items[0].InlineStyle.SRTColor)
115+
assert.True(t, s.Items[6].Lines[1].Items[0].InlineStyle.SRTItalics)
116+
assert.False(t, s.Items[6].Lines[1].Items[0].InlineStyle.SRTUnderline)
117+
assert.False(t, s.Items[6].Lines[1].Items[0].InlineStyle.SRTBold)
118+
assert.Nil(t, s.Items[6].Lines[1].Items[0].InlineStyle.SRTColor)
119+
assert.True(t, s.Items[7].Lines[0].Items[0].InlineStyle.SRTItalics)
120+
assert.False(t, s.Items[7].Lines[0].Items[0].InlineStyle.SRTUnderline)
121+
assert.False(t, s.Items[7].Lines[0].Items[0].InlineStyle.SRTBold)
122+
assert.Nil(t, s.Items[7].Lines[0].Items[0].InlineStyle.SRTColor)
123+
assert.Nil(t, s.Items[8].Lines[0].Items[0].InlineStyle)
113124

114125
// Write to srt
115126
w := &bytes.Buffer{}

testdata/example-in-styled.srt

+13
Original file line numberDiff line numberDiff line change
@@ -22,3 +22,16 @@
2222
00:01:29,590 --> 00:01:31,992
2323
[automated]
2424
<i>'The time is 7:35.'</i>
25+
26+
7
27+
00:08:00,000 --> 00:09:00,000
28+
<i>Test with multi line italics
29+
Terminated on the next line</i>
30+
31+
8
32+
00:09:00,000 --> 00:10:00,000
33+
<i>Unterminated styles
34+
35+
9
36+
00:10:00,000 --> 00:11:00,000
37+
Do no fall to the next item

testdata/example-out-styled.srt

+13
Original file line numberDiff line numberDiff line change
@@ -22,3 +22,16 @@
2222
00:01:29,590 --> 00:01:31,992
2323
[automated]
2424
<i>'The time is 7:35.'</i>
25+
26+
7
27+
00:08:00,000 --> 00:09:00,000
28+
<i>Test with multi line italics</i>
29+
<i>Terminated on the next line</i>
30+
31+
8
32+
00:09:00,000 --> 00:10:00,000
33+
<i>Unterminated styles</i>
34+
35+
9
36+
00:10:00,000 --> 00:11:00,000
37+
Do no fall to the next item

testdata/example-out-styled.vtt

+13
Original file line numberDiff line numberDiff line change
@@ -24,3 +24,16 @@ WEBVTT
2424
00:01:29.590 --> 00:01:31.992
2525
[automated]
2626
<i>'The time is 7:35.'</i>
27+
28+
7
29+
00:08:00.000 --> 00:09:00.000
30+
<i>Test with multi line italics</i>
31+
<i>Terminated on the next line</i>
32+
33+
8
34+
00:09:00.000 --> 00:10:00.000
35+
<i>Unterminated styles</i>
36+
37+
9
38+
00:10:00.000 --> 00:11:00.000
39+
Do no fall to the next item

webvtt.go

+23-21
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,7 @@ func ReadFromWebVTT(i io.Reader) (o *Subtitles, err error) {
141141
var blockName string
142142
var comments []string
143143
var index int
144-
var webVTTStyles *StyleAttributes
144+
var sa = &StyleAttributes{}
145145

146146
for scanner.Scan() {
147147
// Fetch line
@@ -162,11 +162,15 @@ func ReadFromWebVTT(i io.Reader) (o *Subtitles, err error) {
162162
// Reset block name, if we are not in the middle of CSS.
163163
// If we are in STYLE block and the CSS is empty or we meet the right brace at the end of last line,
164164
// then we are not in CSS and can switch to parse next WebVTT block.
165-
if blockName != webvttBlockNameStyle || webVTTStyles == nil ||
166-
len(webVTTStyles.WebVTTStyles) == 0 ||
167-
strings.HasSuffix(webVTTStyles.WebVTTStyles[len(webVTTStyles.WebVTTStyles)-1], "}") {
165+
if blockName != webvttBlockNameStyle || sa == nil ||
166+
len(sa.WebVTTStyles) == 0 ||
167+
strings.HasSuffix(sa.WebVTTStyles[len(sa.WebVTTStyles)-1], "}") {
168168
blockName = ""
169169
}
170+
171+
// Reset WebVTTTags
172+
sa.WebVTTTags = []WebVTTTag{}
173+
170174
// Region
171175
case strings.HasPrefix(line, "Region: "):
172176
// Add region styles
@@ -207,9 +211,9 @@ func ReadFromWebVTT(i io.Reader) (o *Subtitles, err error) {
207211
blockName = webvttBlockNameStyle
208212

209213
if _, ok := o.Styles[webvttDefaultStyleID]; !ok {
210-
webVTTStyles = &StyleAttributes{}
214+
sa = &StyleAttributes{}
211215
o.Styles[webvttDefaultStyleID] = &Style{
212-
InlineStyle: webVTTStyles,
216+
InlineStyle: sa,
213217
ID: webvttDefaultStyleID,
214218
}
215219
}
@@ -314,10 +318,10 @@ func ReadFromWebVTT(i io.Reader) (o *Subtitles, err error) {
314318
case webvttBlockNameComment:
315319
comments = append(comments, line)
316320
case webvttBlockNameStyle:
317-
webVTTStyles.WebVTTStyles = append(webVTTStyles.WebVTTStyles, line)
321+
sa.WebVTTStyles = append(sa.WebVTTStyles, line)
318322
case webvttBlockNameText:
319323
// Parse line
320-
if l := parseTextWebVTT(line); len(l.Items) > 0 {
324+
if l := parseTextWebVTT(line, sa); len(l.Items) > 0 {
321325
item.Lines = append(item.Lines, l)
322326
}
323327
default:
@@ -330,12 +334,10 @@ func ReadFromWebVTT(i io.Reader) (o *Subtitles, err error) {
330334
}
331335

332336
// parseTextWebVTT parses the input line to fill the Line
333-
func parseTextWebVTT(i string) (o Line) {
337+
func parseTextWebVTT(i string, sa *StyleAttributes) (o Line) {
334338
// Create tokenizer
335339
tr := html.NewTokenizer(strings.NewReader(i))
336340

337-
webVTTTagStack := make([]WebVTTTag, 0, 16)
338-
339341
// Loop
340342
for {
341343
// Get next tag
@@ -348,8 +350,8 @@ func parseTextWebVTT(i string) (o Line) {
348350
switch t {
349351
case html.EndTagToken:
350352
// Pop the top of stack if we meet end tag
351-
if len(webVTTTagStack) > 0 {
352-
webVTTTagStack = webVTTTagStack[:len(webVTTTagStack)-1]
353+
if len(sa.WebVTTTags) > 0 {
354+
sa.WebVTTTags = sa.WebVTTTags[:len(sa.WebVTTTags)-1]
353355
}
354356
case html.StartTagToken:
355357
if matches := webVTTRegexpTag.FindStringSubmatch(string(tr.Raw())); len(matches) > 4 {
@@ -377,7 +379,7 @@ func parseTextWebVTT(i string) (o Line) {
377379
}
378380

379381
// Push the tag to stack
380-
webVTTTagStack = append(webVTTTagStack, WebVTTTag{
382+
sa.WebVTTTags = append(sa.WebVTTTags, WebVTTTag{
381383
Name: tagName,
382384
Classes: classes,
383385
Annotation: annotation,
@@ -386,18 +388,18 @@ func parseTextWebVTT(i string) (o Line) {
386388

387389
case html.TextToken:
388390
// Get style attribute
389-
var sa *StyleAttributes
390-
if len(webVTTTagStack) > 0 {
391-
tags := make([]WebVTTTag, len(webVTTTagStack))
392-
copy(tags, webVTTTagStack)
393-
sa = &StyleAttributes{
391+
var styleAttributes *StyleAttributes
392+
if len(sa.WebVTTTags) > 0 {
393+
tags := make([]WebVTTTag, len(sa.WebVTTTags))
394+
copy(tags, sa.WebVTTTags)
395+
styleAttributes = &StyleAttributes{
394396
WebVTTTags: tags,
395397
}
396-
sa.propagateWebVTTAttributes()
398+
styleAttributes.propagateWebVTTAttributes()
397399
}
398400

399401
// Append items
400-
o.Items = append(o.Items, parseTextWebVTTTextToken(sa, string(tr.Raw()))...)
402+
o.Items = append(o.Items, parseTextWebVTTTextToken(styleAttributes, string(tr.Raw()))...)
401403
}
402404
}
403405
return

webvtt_internal_test.go

+6-6
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ func TestParseTextWebVTT(t *testing.T) {
1313
t.Run("When both voice tags are available", func(t *testing.T) {
1414
testData := `<v Bob>Correct tag</v>`
1515

16-
s := parseTextWebVTT(testData)
16+
s := parseTextWebVTT(testData, &StyleAttributes{})
1717
assert.Equal(t, "Bob", s.VoiceName)
1818
assert.Equal(t, 1, len(s.Items))
1919
assert.Equal(t, "Correct tag", s.Items[0].Text)
@@ -22,7 +22,7 @@ func TestParseTextWebVTT(t *testing.T) {
2222
t.Run("When there is no end tag", func(t *testing.T) {
2323
testData := `<v Bob> Text without end tag`
2424

25-
s := parseTextWebVTT(testData)
25+
s := parseTextWebVTT(testData, &StyleAttributes{})
2626
assert.Equal(t, "Bob", s.VoiceName)
2727
assert.Equal(t, 1, len(s.Items))
2828
assert.Equal(t, "Text without end tag", s.Items[0].Text)
@@ -31,7 +31,7 @@ func TestParseTextWebVTT(t *testing.T) {
3131
t.Run("When the end tag is correct", func(t *testing.T) {
3232
testData := `<v Bob>Incorrect end tag</vi>`
3333

34-
s := parseTextWebVTT(testData)
34+
s := parseTextWebVTT(testData, &StyleAttributes{})
3535
assert.Equal(t, "Bob", s.VoiceName)
3636
assert.Equal(t, 1, len(s.Items))
3737
assert.Equal(t, "Incorrect end tag", s.Items[0].Text)
@@ -40,7 +40,7 @@ func TestParseTextWebVTT(t *testing.T) {
4040
t.Run("When inline timestamps are included", func(t *testing.T) {
4141
testData := `<00:01:01.000>With inline <00:01:02.000>timestamps`
4242

43-
s := parseTextWebVTT(testData)
43+
s := parseTextWebVTT(testData, &StyleAttributes{})
4444
assert.Equal(t, 2, len(s.Items))
4545
assert.Equal(t, "With inline", s.Items[0].Text)
4646
assert.Equal(t, time.Minute+time.Second, s.Items[0].StartAt)
@@ -51,7 +51,7 @@ func TestParseTextWebVTT(t *testing.T) {
5151
t.Run("When inline timestamps together", func(t *testing.T) {
5252
testData := `<00:01:01.000><00:01:02.000>With timestamp tags together`
5353

54-
s := parseTextWebVTT(testData)
54+
s := parseTextWebVTT(testData, &StyleAttributes{})
5555
assert.Equal(t, 1, len(s.Items))
5656
assert.Equal(t, "With timestamp tags together", s.Items[0].Text)
5757
assert.Equal(t, time.Minute+2*time.Second, s.Items[0].StartAt)
@@ -60,7 +60,7 @@ func TestParseTextWebVTT(t *testing.T) {
6060
t.Run("When inline timestamps is at end", func(t *testing.T) {
6161
testData := `With end timestamp<00:01:02.000>`
6262

63-
s := parseTextWebVTT(testData)
63+
s := parseTextWebVTT(testData, &StyleAttributes{})
6464
assert.Equal(t, 1, len(s.Items))
6565
assert.Equal(t, "With end timestamp", s.Items[0].Text)
6666
assert.Equal(t, time.Duration(0), s.Items[0].StartAt)

0 commit comments

Comments
 (0)