This repository has been archived by the owner on Sep 5, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 92
/
Markdown.Extra.js
482 lines (404 loc) · 17.5 KB
/
Markdown.Extra.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
(function () {
// A quick way to make sure we're only keeping span-level tags when we need to.
// This isn't supposed to be foolproof. It's just a quick way to make sure we
// keep all span-level tags returned by a pagedown converter. It should allow
// all span-level tags through, with or without attributes.
var inlineTags = new RegExp(['^(<\\/?(a|abbr|acronym|applet|area|b|basefont|',
'bdo|big|button|cite|code|del|dfn|em|figcaption|',
'font|i|iframe|img|input|ins|kbd|label|map|',
'mark|meter|object|param|progress|q|ruby|rp|rt|s|',
'samp|script|select|small|span|strike|strong|',
'sub|sup|textarea|time|tt|u|var|wbr)[^>]*>|',
'<(br)\\s?\\/?>)$'].join(''), 'i');
/******************************************************************
* Utility Functions *
*****************************************************************/
function trim(str) {
return str.replace(/^\s+|\s+$/g, '');
}
function rtrim(str) {
return str.replace(/\s+$/g, '');
}
// Remove one level of indentation from text. Indent is 4 spaces.
function outdent(text) {
return text.replace(new RegExp('^(\\t|[ ]{1,4})', 'mg'), '');
}
function contains(str, substr) {
return str.indexOf(substr) != -1;
}
// Sanitize html, removing tags that aren't in the whitelist
function sanitizeHtml(html, whitelist) {
return html.replace(/<[^>]*>?/gi, function(tag) {
return tag.match(whitelist) ? tag : '';
});
}
// JS regexes don't support \A or \Z, so we add sentinels, as Pagedown
// does. In this case, we add the ascii codes for start of text (STX) and
// end of text (ETX), an idea borrowed from:
// https://github.com/tanakahisateru/js-markdown-extra
function addAnchors(text) {
if(text.charAt(0) != '\x02')
text = '\x02' + text;
if(text.charAt(text.length - 1) != '\x03')
text = text + '\x03';
return text;
}
// Remove STX and ETX sentinels.
function removeAnchors(text) {
if(text.charAt(0) == '\x02')
text = text.substr(1);
if(text.charAt(text.length - 1) == '\x03')
text = text.substr(0, text.length - 1);
return text;
}
// Convert markdown within an element, retaining only span-level tags
// (An inefficient version of Pagedown's runSpanGamut. We rely on a
// pagedown coverter to do the complete conversion, and then retain
// only the specified tags -- inline in this case).
function convertSpans(text, converter) {
var html = converter.makeHtml(text);
return sanitizeHtml(html, inlineTags);
}
// Converte escpaed special characters to HTLM decimal entity codes.
function processEscapes(text) {
// Markdown extra adds two escapable characters, `:` and `|`
// If escaped, we convert them to html entities so our
// regexes don't recognize them. Markdown doesn't support escaping
// the escape character, e.g. `\\`, which make this even simpler.
return text.replace(/\\\|/g, '|').replace(/\\:/g, ':');
}
/******************************************************************
* Markdown.Extra *
*****************************************************************/
Markdown.Extra = function() {
// For converting internal markdown (in tables for instance).
// This is necessary since these methods are meant to be called as
// preConversion hooks, and the Markdown converter passed to init()
// won't convert any markdown contained in the html tags we return.
this.converter = null;
// Stores html blocks we generate in hooks so that
// they're not destroyed if the user is using a sanitizing converter
this.hashBlocks = [];
// Fenced code block options
this.googleCodePrettify = false;
this.highlightJs = false;
// Table options
this.tableClass = '';
this.tabWidth = 4;
};
Markdown.Extra.init = function(converter, options) {
// Each call to init creates a new instance of Markdown.Extra so it's
// safe to have multiple converters, with different options, on a single page
var extra = new Markdown.Extra();
var transformations = [];
options = options || {};
options.extensions = options.extensions || ["all"];
if (contains(options.extensions, "all")) {
transformations.push("all");
} else {
if (contains(options.extensions, "tables"))
transformations.push("tables");
if (contains(options.extensions, "fenced_code_gfm"))
transformations.push("fencedCodeBlocks");
if (contains(options.extensions, "def_list"))
transformations.push("definitionLists");
}
// preBlockGamut also gives us access to a hook so we can run the
// block gamut recursively, however we don't need it at this point
converter.hooks.chain("preBlockGamut", function(text) {
return extra.doConversion(transformations, text);
});
converter.hooks.chain("postConversion", function(text) {
return extra.finishConversion(text);
});
if ("highlighter" in options) {
extra.googleCodePrettify = options.highlighter === 'prettify';
extra.highlightJs = options.highlighter === 'highlight';
}
if ("table_class" in options) {
extra.tableClass = options.table_class;
}
// we can't just use the same converter that the user passes in, as
// Pagedown forbids it (doing so could cause an infinite loop)
if (!("sanitize" in options) || options.sanitize) {
extra.converter = Markdown.getSanitizingConverter(); // default
} else {
extra.converter = new Markdown.Converter();
}
// Caller usually won't need this, but it's handy for testing.
return extra;
};
// Setup state vars, do conversion
Markdown.Extra.prototype.doConversion = function(transformations, text) {
this.hashBlocks = [];
text = processEscapes(text);
for(var i = 0; i < transformations.length; i++)
text = this[transformations[i]](text);
return text + '\n';
};
// Clear state vars that may use unnecessary memory. Unhash blocks we
// stored and return converted text.
Markdown.Extra.prototype.finishConversion = function(text) {
text = this.unHashExtraBlocks(text);
this.hashBlocks = [];
return text;
};
// Return a placeholder containing a key, which is the block's index in the
// hashBlocks array. We wrap our output in a <p> tag here so Pagedown won't.
Markdown.Extra.prototype.hashExtraBlock = function(block) {
return '<p>~X' + (this.hashBlocks.push(block) - 1) + 'X</p>';
};
// Replace placeholder blocks in `text` with their corresponding
// html blocks in the hashBlocks array.
Markdown.Extra.prototype.unHashExtraBlocks = function(text) {
var self = this;
text = text.replace(/<p>~X(\d+)X<\/p>/g, function(wholeMatch, m1) {
key = parseInt(m1, 10);
return self.hashBlocks[key];
});
return text;
};
/******************************************************************
* Tables *
*****************************************************************/
// Find and convert Markdown Extra tables into html.
Markdown.Extra.prototype.tables = function(text) {
var self = this;
var leadingPipe = new RegExp(
['^' ,
'[ ]{0,3}' , // Allowed whitespace
'[|]' , // Initial pipe
'(.+)\\n' , // $1: Header Row
'[ ]{0,3}' , // Allowed whitespace
'[|]([ ]*[-:]+[-| :]*)\\n' , // $2: Separator
'(' , // $3: Table Body
'(?:[ ]*[|].*\\n?)*' , // Table rows
')',
'(?:\\n|$)' // Stop at final newline
].join('') ,
'gm'
);
var noLeadingPipe = new RegExp(
['^' ,
'[ ]{0,3}' , // Allowed whitespace
'(\\S.*[|].*)\\n' , // $1: Header Row
'[ ]{0,3}' , // Allowed whitespace
'([-:]+[ ]*[|][-| :]*)\\n' , // $2: Separator
'(' , // $3: Table Body
'(?:.*[|].*\\n?)*' , // Table rows
')' ,
'(?:\\n|$)' // Stop at final newline
].join('') ,
'gm'
);
text = text.replace(leadingPipe, doTable);
text = text.replace(noLeadingPipe, doTable);
// $1 = header, $2 = separator, $3 = body
function doTable(match, header, separator, body, offset, string) {
// remove any leading pipes and whitespace
header = header.replace(/^ *[|]/m, '');
separator = separator.replace(/^ *[|]/m, '');
body = body.replace(/^ *[|]/gm, '');
// remove trailing pipes and whitespace
header = header.replace(/[|] *$/m, '');
separator = separator.replace(/[|] *$/m, '');
body = body.replace(/[|] *$/gm, '');
// determine column alignments
alignspecs = separator.split(/ *[|] */);
align = [];
for (var i = 0; i < alignspecs.length; i++) {
var spec = alignspecs[i];
if (spec.match(/^ *-+: *$/m))
align[i] = ' style="text-align:right;"';
else if (spec.match(/^ *:-+: *$/m))
align[i] = ' style="text-align:center;"';
else if (spec.match(/^ *:-+ *$/m))
align[i] = ' style="text-align:left;"';
else align[i] = '';
}
// TODO: parse spans in header and rows before splitting, so that pipes
// inside of tags are not interpreted as separators
var headers = header.split(/ *[|] */);
var colCount = headers.length;
// build html
var cls = self.tableClass ? ' class="' + self.tableClass + '"' : '';
var html = ['<table', cls, '>\n', '<thead>\n', '<tr>\n'].join('');
// build column headers.
for (i = 0; i < colCount; i++) {
var headerHtml = convertSpans(trim(headers[i]), self.converter);
html += [" <th", align[i], ">", headerHtml, "</th>\n"].join('');
}
html += "</tr>\n</thead>\n";
// build rows
var rows = body.split('\n');
for (i = 0; i < rows.length; i++) {
if (rows[i].match(/^\s*$/)) // can apply to final row
continue;
// ensure number of rowCells matches colCount
var rowCells = rows[i].split(/ *[|] */);
var lenDiff = colCount - rowCells.length;
for (var j = 0; j < lenDiff; j++)
rowCells.push('');
html += "<tr>\n";
for (j = 0; j < colCount; j++) {
var colHtml = convertSpans(trim(rowCells[j]), self.converter);
html += [" <td", align[j], ">", colHtml, "</td>\n"].join('');
}
html += "</tr>\n";
}
html += "</table>\n";
// replace html with placeholder until postConversion step
return self.hashExtraBlock(html);
}
return text;
};
/******************************************************************
* Fenced Code Blocks (gfm) *
******************************************************************/
// Find and convert gfm-inspired fenced code blocks into html.
Markdown.Extra.prototype.fencedCodeBlocks = function(text) {
function encodeCode(code) {
code = code.replace(/&/g, "&");
code = code.replace(/</g, "<");
code = code.replace(/>/g, ">");
return code;
}
var self = this;
text = text.replace(/(?:^|\n)```(.*)\n([\s\S]*?)\n```/g, function(match, m1, m2) {
var language = m1, codeblock = m2;
// adhere to specified options
var preclass = self.googleCodePrettify ? ' class="prettyprint"' : '';
var codeclass = '';
if (language) {
if (self.googleCodePrettify || self.highlightJs) {
// use html5 language- class names. supported by both prettify and highlight.js
codeclass = ' class="language-' + language + '"';
} else {
codeclass = ' class="' + language + '"';
}
}
var html = ['<pre', preclass, '><code', codeclass, '>',
encodeCode(codeblock), '</code></pre>'].join('');
// replace codeblock with placeholder until postConversion step
return self.hashExtraBlock(html);
});
return text;
};
Markdown.Extra.prototype.all = function(text) {
text = this.tables(text);
text = this.fencedCodeBlocks(text);
return text;
};
/******************************************************************
* Definition Lists *
******************************************************************/
// Find and convert markdown extra definition lists into html.
Markdown.Extra.prototype.definitionLists = function(text) {
var wholeList = new RegExp(
'(\\x02\\n?|\\n\\n)' +
'(?:' +
'(' + // $1 = whole list
'(' + // $2
'[ ]{0,3}' +
'((?:[ \\t]*\\S.*\\n)+)' + // $3 = defined term
'\\n?' +
'[ ]{0,3}:[ ]+' + // colon starting definition
')' +
'([\\s\\S]+?)' +
'(' + // $4
'(?=\\0x03)' + // \z
'|' +
'(?=' +
'\\n{2,}' +
'(?=\\S)' +
'(?!' + // Negative lookahead for another term
'[ ]{0,3}' +
'(?:\\S.*\\n )+?' + // defined term
'\\n?' +
'[ ]{0,3}:[ ]+' + // colon starting definition
')' +
'(?!' + // Negative lookahead for another definition
'[ ]{0,3}:[ ]+' + // colon starting definition
')' +
')' +
')' +
')' +
')',
'gm'
);
var self = this;
text = addAnchors(text);
text = text.replace(wholeList, function(match, pre, list) {
// Turn double returns into triple returns, so that we can make a
// paragraph for the last item in a list, if necessary:
var result = trim(self.processDefListItems(list));
result = "<dl>\n" + result + "\n</dl>";
return pre + self.hashExtraBlock(result) + "\n\n";
});
return removeAnchors(text);
};
// Process the contents of a single definition list, splitting it
// into individual term and definition list items.
Markdown.Extra.prototype.processDefListItems = function(listStr) {
var self = this;
var dt = new RegExp(
'(\\x02\\n?|\\n\\n+)' + // leading line
'(' + // definition terms = $1
'[ ]{0,3}' + // leading whitespace
'(?![:][ ]|[ ])' + // negative lookahead for a definition
// mark (colon) or more whitespace.
'(?:\\S.*\\n)+?' + // actual term (not whitespace).
')' +
'(?=\\n?[ ]{0,3}:[ ])' , // lookahead for following line feed
// with a definition mark.
'mg'
);
var dd = new RegExp(
'\\n(\\n+)?' + // leading line = $1
'(' + // marker space = $2
'[ ]{0,3}' + // whitespace before colon
'[:][ ]+' + // definition mark (colon)
')' +
'([\\s\\S]+?)' + // definition text = $3
'(?=\\n*' + // stop at next definition mark,
'(?:' + // next term or end of text
'\\n[ ]{0,3}[:][ ]|' +
'<dt>|\\x03' + // \z
')' +
')',
'mg'
);
listStr = addAnchors(listStr);
// trim trailing blank lines:
listStr = listStr.replace(/\n{2,}(?=\\x03)/, "\n");
// Process definition terms.
listStr = listStr.replace(dt, function(match, pre, termsStr) {
var terms = trim(termsStr).split("\n");
var text = '';
for (var i = 0; i < terms.length; i++) {
var term = terms[i];
// process spans inside dt
term = convertSpans(trim(term), self.converter);
text += "\n<dt>" + term + "</dt>";
}
return text + "\n";
});
// Process actual definitions.
listStr = listStr.replace(dd, function(match, leading_line, marker_space, def) {
if (leading_line || def.match(/\n{2,}/)) {
// replace marker with the appropriate whitespace indentation
def = Array(marker_space.length + 1).join(' ') + def;
// process markdown inside definition
// currently doesn't apply extensions
def = outdent(def) + "\n\n";
def = "\n" + self.converter.makeHtml(def) + "\n";
} else {
// convert span-level markdown inside definition
def = rtrim(def);
def = convertSpans(outdent(def), self.converter);
}
return "\n<dd>" + def + "</dd>\n";
});
return removeAnchors(listStr);
};
})();