This repository has been archived by the owner on Aug 22, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 79
/
Copy pathConfluence-html-to-github-markdown.js
executable file
·103 lines (93 loc) · 3.42 KB
/
Confluence-html-to-github-markdown.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
#!/usr/bin/env node
var fs = require('fs-extra')
var exec = require('sync-exec')
var path = require('path');
var divePath = process.cwd();
var attachmentsExportPath = "public/assets/images/"
var markdownImageReference = "assets/images/"
// print process.argv
process.argv.forEach(function (val, index, array) {
if (index === 2){
divePath = process.cwd() + "/" + val;
}else if (index === 3){
attachmentsExportPath = val
}else if(index === 4){
markdownImageReference = val
}
});
dive(divePath)
function dive(dir) {
var list = []
var stat = ""
// Read the directory
list = fs.readdirSync(dir);
list.forEach(function (file) {
// Full path of that file
var p = path.join(dir , file)
// Get the file's stats
stat = fs.statSync(p)
// If the file is a directory
if (stat && stat.isDirectory()) {
dive(p);
} else {
console.log(file)
if (file.endsWith('.html')) {
var titleRegex = /<title>([^<]*)<\/title>/i
var content = fs.readFileSync(p, 'utf8')
var match = content.match(titleRegex)
if (match != null && match.length > 1) {
fs.ensureDir("Markdown")
var sanitizedfilename = match[1].replace(/[^0-9a-zA-Z]/g,"_")
var outputFile = path.join("Markdown", sanitizedfilename + ".md")
var command = "pandoc -f html -t markdown -o " + outputFile + " " + p
var out = exec(command, {cwd: process.cwd()})
console.log(out)
//images
console.log("Reading : " + outputFile)
var content = fs.readFileSync(outputFile, 'utf8')
var matches = uniq(content.match(/(<img src=")([a-z||_|0-9|.|]+)\/([a-z||_|0-9|.|]+)\/([a-z||_|0-9|.|]+)/ig))
matches.forEach(function (img) {
img = img.replace('<img src="', '')
var attachments = img.replace("attachments/", "");
if (attachments == img) {
return;
}
var fileName = attachmentsExportPath + attachments;
// console.log("Creating Folder : " + fileName.substr(0, fileName.lastIndexOf('/')))
mkdirpSync(fileName.substr(0, fileName.lastIndexOf('/')))
// console.log("creating filename: " + fileName)
// fs.createReadStream(img).pipe(fs.createWriteStream(fileName));
try {
// var img_content = fs.readFileSync(dir + "/" + img);
// fs.writeFileSync(fileName, img);
fs.accessSync(dir + "/" + img, fs.F_OK);
fs.createReadStream(dir + "/" + img).pipe(fs.createWriteStream(process.cwd() + "/" + fileName));
console.log("Wrote: " + dir + "/" + img + "\n To: " + process.cwd() + "/" + fileName)
} catch (e) {
console.log("Can't read: " + dir + "/" + img)
}
})
var lines = content.replace(/(<img src=")([a-z||_|0-9|.|]+)\/([a-z||_|0-9|.|]+)\/([a-z||_|0-9|.|]+)/ig, "$1"+ markdownImageReference +"$3/$4")
fs.writeFileSync(outputFile, lines)
}
}
}
})
}
function uniq(a) {
return Array.from(new Set(a));
}
function mkdirSync(path) {
try {
fs.mkdirSync(path);
} catch (e) {
if (e.code != 'EEXIST') throw e;
}
}
function mkdirpSync(dirpath) {
// console.log("Making : " + dirpath)
var parts = dirpath.split(path.sep);
for (var i = 1; i <= parts.length; i++) {
mkdirSync(path.join.apply(null, parts.slice(0, i)));
}
}