-
Notifications
You must be signed in to change notification settings - Fork 83
/
Copy pathto_markdown.ts
419 lines (384 loc) · 15.5 KB
/
to_markdown.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
import {Node, Mark} from "prosemirror-model"
type MarkSerializerSpec = {
/// The string that should appear before a piece of content marked
/// by this mark, either directly or as a function that returns an
/// appropriate string.
open: string | ((state: MarkdownSerializerState, mark: Mark, parent: Node, index: number) => string),
/// The string that should appear after a piece of content marked by
/// this mark.
close: string | ((state: MarkdownSerializerState, mark: Mark, parent: Node, index: number) => string),
/// When `true`, this indicates that the order in which the mark's
/// opening and closing syntax appears relative to other mixable
/// marks can be varied. (For example, you can say `**a *b***` and
/// `*a **b***`, but not `` `a *b*` ``.)
mixable?: boolean,
/// When enabled, causes the serializer to move enclosing whitespace
/// from inside the marks to outside the marks. This is necessary
/// for emphasis marks as CommonMark does not permit enclosing
/// whitespace inside emphasis marks, see:
/// http:///spec.commonmark.org/0.26/#example-330
expelEnclosingWhitespace?: boolean,
/// Can be set to `false` to disable character escaping in a mark. A
/// non-escaping mark has to have the highest precedence (must
/// always be the innermost mark).
escape?: boolean
}
/// A specification for serializing a ProseMirror document as
/// Markdown/CommonMark text.
export class MarkdownSerializer {
/// Construct a serializer with the given configuration. The `nodes`
/// object should map node names in a given schema to function that
/// take a serializer state and such a node, and serialize the node.
constructor(
/// The node serializer functions for this serializer.
readonly nodes: {[node: string]: (state: MarkdownSerializerState, node: Node, parent: Node, index: number) => void},
/// The mark serializer info.
readonly marks: {[mark: string]: MarkSerializerSpec},
readonly options: {
/// Extra characters can be added for escaping. This is passed
/// directly to String.replace(), and the matching characters are
/// preceded by a backslash.
escapeExtraCharacters?: RegExp
} = {}
) {}
/// Serialize the content of the given node to
/// [CommonMark](http://commonmark.org/).
serialize(content: Node, options: {
/// Whether to render lists in a tight style. This can be overridden
/// on a node level by specifying a tight attribute on the node.
/// Defaults to false.
tightLists?: boolean
} = {}) {
options = Object.assign(this.options, options)
let state = new MarkdownSerializerState(this.nodes, this.marks, options)
state.renderContent(content)
return state.out
}
}
/// A serializer for the [basic schema](#schema).
export const defaultMarkdownSerializer = new MarkdownSerializer({
blockquote(state, node) {
state.wrapBlock("> ", null, node, () => state.renderContent(node))
},
code_block(state, node) {
state.write("```" + (node.attrs.params || "") + "\n")
state.text(node.textContent, false)
state.ensureNewLine()
state.write("```")
state.closeBlock(node)
},
heading(state, node) {
state.write(state.repeat("#", node.attrs.level) + " ")
state.renderInline(node)
state.closeBlock(node)
},
horizontal_rule(state, node) {
state.write(node.attrs.markup || "---")
state.closeBlock(node)
},
bullet_list(state, node) {
state.renderList(node, " ", () => (node.attrs.bullet || "*") + " ")
},
ordered_list(state, node) {
let start = node.attrs.order || 1
let maxW = String(start + node.childCount - 1).length
let space = state.repeat(" ", maxW + 2)
state.renderList(node, space, i => {
let nStr = String(start + i)
return state.repeat(" ", maxW - nStr.length) + nStr + ". "
})
},
list_item(state, node) {
state.renderContent(node)
},
paragraph(state, node) {
state.renderInline(node)
state.closeBlock(node)
},
image(state, node) {
state.write("]/g, "\\$&") +
(node.attrs.title ? ' "' + node.attrs.title.replace(/"/g, '\\"') + '"' : "") + ")")
},
hard_break(state, node, parent, index) {
for (let i = index + 1; i < parent.childCount; i++)
if (parent.child(i).type != node.type) {
state.write("\\\n")
return
}
},
text(state, node) {
state.text(node.text!, !state.inAutolink)
}
}, {
em: {open: "*", close: "*", mixable: true, expelEnclosingWhitespace: true},
strong: {open: "**", close: "**", mixable: true, expelEnclosingWhitespace: true},
link: {
open(state, mark, parent, index) {
state.inAutolink = isPlainURL(mark, parent, index)
return state.inAutolink ? "<" : "["
},
close(state, mark, parent, index) {
let {inAutolink} = state
state.inAutolink = undefined
return inAutolink ? ">"
: "](" + mark.attrs.href.replace(/[\(\)"]/g, "\\$&") + (mark.attrs.title ? ` "${mark.attrs.title.replace(/"/g, '\\"')}"` : "") + ")"
},
mixable: true
},
code: {open(_state, _mark, parent, index) { return backticksFor(parent.child(index), -1) },
close(_state, _mark, parent, index) { return backticksFor(parent.child(index - 1), 1) },
escape: false}
})
function backticksFor(node: Node, side: number) {
let ticks = /`+/g, m, len = 0
if (node.isText) while (m = ticks.exec(node.text!)) len = Math.max(len, m[0].length)
let result = len > 0 && side > 0 ? " `" : "`"
for (let i = 0; i < len; i++) result += "`"
if (len > 0 && side < 0) result += " "
return result
}
function isPlainURL(link: Mark, parent: Node, index: number) {
if (link.attrs.title || !/^\w+:/.test(link.attrs.href)) return false
let content = parent.child(index)
if (!content.isText || content.text != link.attrs.href || content.marks[content.marks.length - 1] != link) return false
return index == parent.childCount - 1 || !link.isInSet(parent.child(index + 1).marks)
}
/// This is an object used to track state and expose
/// methods related to markdown serialization. Instances are passed to
/// node and mark serialization methods (see `toMarkdown`).
export class MarkdownSerializerState {
/// @internal
delim: string = ""
/// @internal
out: string = ""
/// @internal
closed: Node | null = null
/// @internal
inAutolink: boolean | undefined = undefined
/// @internal
atBlockStart: boolean = false
/// @internal
inTightList: boolean = false
/// @internal
constructor(
/// @internal
readonly nodes: {[node: string]: (state: MarkdownSerializerState, node: Node, parent: Node, index: number) => void},
/// @internal
readonly marks: {[mark: string]: MarkSerializerSpec},
/// The options passed to the serializer.
readonly options: {tightLists?: boolean, escapeExtraCharacters?: RegExp}
) {
if (typeof this.options.tightLists == "undefined")
this.options.tightLists = false
}
/// @internal
flushClose(size: number = 2) {
if (this.closed) {
if (!this.atBlank()) this.out += "\n"
if (size > 1) {
let delimMin = this.delim
let trim = /\s+$/.exec(delimMin)
if (trim) delimMin = delimMin.slice(0, delimMin.length - trim[0].length)
for (let i = 1; i < size; i++)
this.out += delimMin + "\n"
}
this.closed = null
}
}
/// Render a block, prefixing each line with `delim`, and the first
/// line in `firstDelim`. `node` should be the node that is closed at
/// the end of the block, and `f` is a function that renders the
/// content of the block.
wrapBlock(delim: string, firstDelim: string | null, node: Node, f: () => void) {
let old = this.delim
this.write(firstDelim || delim)
this.delim += delim
f()
this.delim = old
this.closeBlock(node)
}
/// @internal
atBlank() {
return /(^|\n)$/.test(this.out)
}
/// Ensure the current content ends with a newline.
ensureNewLine() {
if (!this.atBlank()) this.out += "\n"
}
/// Prepare the state for writing output (closing closed paragraphs,
/// adding delimiters, and so on), and then optionally add content
/// (unescaped) to the output.
write(content?: string) {
this.flushClose()
if (this.delim && this.atBlank())
this.out += this.delim
if (content) this.out += content
}
/// Close the block for the given node.
closeBlock(node: Node) {
this.closed = node
}
/// Add the given text to the document. When escape is not `false`,
/// it will be escaped.
text(text: string, escape = true) {
let lines = text.split("\n")
for (let i = 0; i < lines.length; i++) {
this.write()
// Escape exclamation marks in front of links
if (!escape && lines[i][0] == "[" && /(^|[^\\])\!$/.test(this.out))
this.out = this.out.slice(0, this.out.length - 1) + "\\!"
this.out += escape ? this.esc(lines[i], this.atBlockStart) : lines[i]
if (i != lines.length - 1) this.out += "\n"
}
}
/// Render the given node as a block.
render(node: Node, parent: Node, index: number) {
if (typeof parent == "number") throw new Error("!")
if (!this.nodes[node.type.name]) throw new Error("Token type `" + node.type.name + "` not supported by Markdown renderer")
this.nodes[node.type.name](this, node, parent, index)
}
/// Render the contents of `parent` as block nodes.
renderContent(parent: Node) {
parent.forEach((node, _, i) => this.render(node, parent, i))
}
/// Render the contents of `parent` as inline content.
renderInline(parent: Node) {
this.atBlockStart = true
let active: Mark[] = [], trailing = ""
let progress = (node: Node | null, offset: number, index: number) => {
let marks = node ? node.marks : []
// Remove marks from `hard_break` that are the last node inside
// that mark to prevent parser edge cases with new lines just
// before closing marks.
// (FIXME it'd be nice if we had a schema-agnostic way to
// identify nodes that serialize as hard breaks)
if (node && node.type.name === "hard_break")
marks = marks.filter(m => {
if (index + 1 == parent.childCount) return false
let next = parent.child(index + 1)
return m.isInSet(next.marks) && (!next.isText || /\S/.test(next.text!))
})
let leading = trailing
trailing = ""
// If whitespace has to be expelled from the node, adjust
// leading and trailing accordingly.
if (node && node.isText && marks.some(mark => {
let info = this.marks[mark.type.name]
return info && info.expelEnclosingWhitespace &&
!(mark.isInSet(active) || index < parent.childCount - 1 && mark.isInSet(parent.child(index + 1).marks))
})) {
let [_, lead, inner, trail] = /^(\s*)(.*?)(\s*)$/m.exec(node.text!)!
leading += lead
trailing = trail
if (lead || trail) {
node = inner ? (node as any).withText(inner) : null
if (!node) marks = active
}
}
let inner = marks.length ? marks[marks.length - 1] : null
let noEsc = inner && this.marks[inner.type.name].escape === false
let len = marks.length - (noEsc ? 1 : 0)
// Try to reorder 'mixable' marks, such as em and strong, which
// in Markdown may be opened and closed in different order, so
// that order of the marks for the token matches the order in
// active.
outer: for (let i = 0; i < len; i++) {
let mark = marks[i]
if (!this.marks[mark.type.name].mixable) break
for (let j = 0; j < active.length; j++) {
let other = active[j]
if (!this.marks[other.type.name].mixable) break
if (mark.eq(other)) {
if (i > j)
marks = marks.slice(0, j).concat(mark).concat(marks.slice(j, i)).concat(marks.slice(i + 1, len))
else if (j > i)
marks = marks.slice(0, i).concat(marks.slice(i + 1, j)).concat(mark).concat(marks.slice(j, len))
continue outer
}
}
}
// Find the prefix of the mark set that didn't change
let keep = 0
while (keep < Math.min(active.length, len) && marks[keep].eq(active[keep])) ++keep
// Close the marks that need to be closed
while (keep < active.length)
this.text(this.markString(active.pop()!, false, parent, index), false)
// Output any previously expelled trailing whitespace outside the marks
if (leading) this.text(leading)
// Open the marks that need to be opened
if (node) {
while (active.length < len) {
let add = marks[active.length]
active.push(add)
this.text(this.markString(add, true, parent, index), false)
}
// Render the node. Special case code marks, since their content
// may not be escaped.
if (noEsc && node.isText)
this.text(this.markString(inner!, true, parent, index) + node.text +
this.markString(inner!, false, parent, index + 1), false)
else
this.render(node, parent, index)
}
}
parent.forEach(progress)
progress(null, 0, parent.childCount)
this.atBlockStart = false
}
/// Render a node's content as a list. `delim` should be the extra
/// indentation added to all lines except the first in an item,
/// `firstDelim` is a function going from an item index to a
/// delimiter for the first line of the item.
renderList(node: Node, delim: string, firstDelim: (index: number) => string) {
if (this.closed && this.closed.type == node.type)
this.flushClose(3)
else if (this.inTightList)
this.flushClose(1)
let isTight = typeof node.attrs.tight != "undefined" ? node.attrs.tight : this.options.tightLists
let prevTight = this.inTightList
this.inTightList = isTight
node.forEach((child, _, i) => {
if (i && isTight) this.flushClose(1)
this.wrapBlock(delim, firstDelim(i), node, () => this.render(child, node, i))
})
this.inTightList = prevTight
}
/// Escape the given string so that it can safely appear in Markdown
/// content. If `startOfLine` is true, also escape characters that
/// have special meaning only at the start of the line.
esc(str: string, startOfLine = false) {
str = str.replace(
/[`*\\~\[\]_]/g,
(m, i) => m == "_" && i > 0 && i + 1 < str.length && str[i-1].match(/\w/) && str[i+1].match(/\w/) ? m : "\\" + m
)
if (startOfLine) str = str.replace(/^[#\-*+>]/, "\\$&").replace(/^(\s*\d+)\./, "$1\\.")
if (this.options.escapeExtraCharacters) str = str.replace(this.options.escapeExtraCharacters, "\\$&")
return str
}
/// @internal
quote(str: string) {
let wrap = str.indexOf('"') == -1 ? '""' : str.indexOf("'") == -1 ? "''" : "()"
return wrap[0] + str + wrap[1]
}
/// Repeat the given string `n` times.
repeat(str: string, n: number) {
let out = ""
for (let i = 0; i < n; i++) out += str
return out
}
/// Get the markdown string for a given opening or closing mark.
markString(mark: Mark, open: boolean, parent: Node, index: number) {
let info = this.marks[mark.type.name]
let value = open ? info.open : info.close
return typeof value == "string" ? value : value(this, mark, parent, index)
}
/// Get leading and trailing whitespace from a string. Values of
/// leading or trailing property of the return object will be undefined
/// if there is no match.
getEnclosingWhitespace(text: string): {leading?: string, trailing?: string} {
return {
leading: (text.match(/^(\s+)/) || [undefined])[0],
trailing: (text.match(/(\s+)$/) || [undefined])[0]
}
}
}