-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathindex_generator.go
165 lines (131 loc) · 4.01 KB
/
index_generator.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
package main
import (
"bytes"
"encoding/json"
"fmt"
"regexp"
"strconv"
"strings"
)
type indexNode interface {
genIndex() string
traverseChild() []NodeTree
}
// Generate root index JSON from `revision.json` with specified version name
func ProcessRevision(revision Revision, versionName string) []IndexData {
result := []IndexData{}
currentVersion := revision.Versions[versionName]
// For current version document
result = append(result, IndexData{
Uid: currentVersion.Page.Uid,
Page: currentVersion.Page.Title,
Path: "",
})
// For other pages
traversePages(currentVersion.Page, &result)
return result
}
func traversePages(parentPage VersionInfo, result *[]IndexData) {
childPages := parentPage.Pages
// Find if any child page exists
for i := range childPages {
// Only add `document` to index
if childPages[i].Kind == "document" {
// Skip the the path of top level page
if parentPage.Path != "master" && !childPages[i].Visited {
childPages[i].Visited = true
childPages[i].Path = fmt.Sprintf("%v/%v", parentPage.Path, childPages[i].Path)
}
*result = append(*result, IndexData{
Uid: childPages[i].Uid,
Page: childPages[i].Title,
Path: childPages[i].Path,
})
}
if len(childPages[i].Pages) != 0 {
traversePages(childPages[i], result)
}
}
}
var newSection = Section{}
// Use `resultCollector` to collect index data
func (n *NodeTree) CollectIndexContent(resultCollector *[]Section, meetHeading bool, isTopLevel bool, anchorMap map[string]int) string {
if n.Type == "heading-1" || n.Type == "heading-2" {
// collect section before
// There are two situations here: no heading but has content or has both heading and content.
// Except the last section, No matter what it will be add to array util it meet next heading.
if meetHeading || newSection.Content != "" {
*resultCollector = append(*resultCollector, newSection)
}
meetHeading = true
title := n.Nodes[0].Ranges[0].Text
// Invalid title
if strings.HasPrefix(title, "\n") {
title = ""
}
src := strings.ToLower(title)
// add a space between 'v' and 'version number'
versionRegexp := regexp.MustCompile(`v(\d+)`)
anchor := versionRegexp.ReplaceAllString(src, "v $1")
// replace '&' with 'and'
anchor = strings.ReplaceAll(anchor, "&", "and")
// replace all the special characters with space
patter1 := `[,.?:()'"/!*+=\-\[\]]`
reg1 := regexp.MustCompile(patter1)
anchor = reg1.ReplaceAllString(anchor, " ")
// replace all spaces with dash
patter2 := `\b\s+\b`
reg2 := regexp.MustCompile(patter2)
anchor = reg2.ReplaceAllString(anchor, "-")
anchor = strings.TrimSpace(anchor)
// store the number of duplicate anchor
anchorMap[anchor]++
if anchorMap[anchor] != 1 {
// add number suffix to duplicate anchor
anchor += "_" + strconv.Itoa(anchorMap[anchor])
}
// create a new section
newSection = Section{
Title: title,
Anchor: anchor,
Content: "",
}
// Skip heading as content
return ""
}
if len(n.Nodes) > 0 {
for i, curNode := range n.Nodes {
if curNode.Type == "paragraph" {
strAfterTrim := strings.TrimSpace(curNode.CollectIndexContent(resultCollector, meetHeading, false, anchorMap))
newSection.Content += strAfterTrim + " "
} else {
newSection.Content += curNode.CollectIndexContent(resultCollector, meetHeading, false, anchorMap)
}
// Add the last section
if isTopLevel && i == len(n.Nodes)-1 {
*resultCollector = append(*resultCollector, newSection)
// Reset `newSection`
newSection = Section{}
}
}
}
return extractText(n)
}
// Read and join content
func extractText(n *NodeTree) string {
text := ""
if len(n.Ranges) > 0 {
for i := range n.Ranges {
text += strings.ReplaceAll(n.Ranges[i].Text, "\n", "")
}
}
return text
}
// Transform to JSON without escaped character
func JSONMarshal(t interface{}) ([]byte, error) {
buffer := &bytes.Buffer{}
encoder := json.NewEncoder(buffer)
encoder.SetEscapeHTML(false) // not escape <, >, &
err := encoder.Encode(t)
return buffer.Bytes(), err
}