-
Notifications
You must be signed in to change notification settings - Fork 146
/
generate-search-index.js
147 lines (123 loc) · 3.76 KB
/
generate-search-index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
import fs from "fs";
import path from "path";
import algoliasearch from "algoliasearch";
import crypto from "crypto";
import matter from "gray-matter";
import { remark } from "remark";
import remarkMdx from "remark-mdx";
// Using strip because stripping via remarkMdx rendering wasn't working
import strip from "strip-markdown";
import { exit } from "process";
const client = algoliasearch("ECUG3H1E0M", process.env.ALGOLIA_KEY);
const CONTENT_PATH = "src/content/docs";
if (!process.env.ALGOLIA_KEY) {
console.error("ALGOLIA_KEY not set");
exit(1);
}
const index = client.initIndex("documentation");
const sitemap = [];
function walk(dir) {
const files = fs.readdirSync(dir);
for (const file of files) {
const filepath = path.join(dir, file);
const stats = fs.statSync(filepath);
if (stats.isDirectory()) {
walk(filepath);
} else if (
file.endsWith(".astro") ||
file.endsWith(".mdx") ||
file.endsWith(".md")
) {
// remove file extension, order matters here
const url = filepath
.replace(CONTENT_PATH, "")
.replace(".astro", "/")
.replace(".mdx", "/")
.replace(".md", "/");
const mtime = stats.mtime.toISOString();
const sha = crypto.createHash("sha256");
sha.update(url);
const id = sha.digest("hex");
const fileContents = fs.readFileSync(filepath, "utf-8");
// console.log("about to parse", filepath);
// Get rid of frontmatter
let data, content;
try {
({ data, content } = matter(fileContents));
} catch (e) {
// couldn't parse the greymatter, oh well
content = fileContents;
}
// Get rid of markdown
if (filepath.indexOf(".mdx") > -1 || filepath.indexOf(".md") > -1) {
content = String(remark().use(strip).processSync(content));
// console.log("rendering from", fileContents.substring(0,1000));
// console.log("current content:",content);
// Remove imports
content = content.replace(/^import.*from.*;?$/g, "");
}
// Get rid of HTML
if (filepath.indexOf(".astro" > -1)) {
content = stripTags(content);
}
// Remove callout
content = content.replace(
/import\s*{\s*Callout\s*}\s*from\s*['"][^"']*['"]\s*;?/g,
"",
);
// Fix newlines
content = content.replace(/\n/g, " ");
// Shorten to fit in Algolia Index
if (content) {
content = content.substring(0, 8000);
}
let title = data?.title;
const pattern = /^# (.*)$/;
if (!title && fileContents[0] == "#" && /#+(.*)/) {
title = /#+\s?(.*)/.exec(fileContents)[1];
}
if (!title) {
const titleLine = fileContents.split("\n").find((line) => {
return pattern.test(line);
});
const proposedTitle = pattern.exec(titleLine);
if (proposedTitle && proposedTitle.length > 1) {
title = proposedTitle[1];
}
}
sitemap.push({
url: url,
lastmod: mtime,
objectID: id,
title: title,
frontmatter: {
...data,
},
contents: content,
});
}
}
}
walk(CONTENT_PATH);
// console.dir(sitemap, { maxArrayLength: null });
try {
index
.replaceAllObjects(sitemap, {})
.then(({ objectIDs }) => {
console.log("saved search index", objectIDs);
})
.catch((e) => {
console.error("error saving search index", e, e.message);
});
} catch (e) {
console.error("error saving search index", e, e.message);
}
function stripTags(html) {
if (!html || !html.replace) {
return html;
}
// Create a regular expression to match all HTML tags.
const regex = /<[^>]+>/g;
// Replace all HTML tags with an empty string.
return html.replace(regex, "");
}