Respond to comments

pulumi · Aug 8, 2024 · dacf7a1 · dacf7a1
1 parent 1145b5e
commit dacf7a1
Show file tree

Hide file tree

Showing 3 changed files with 63 additions and 47 deletions.
diff --git a/pkg/tfgen/docs.go b/pkg/tfgen/docs.go
@@ -395,7 +395,16 @@ func gmWalkNodes(node gmast.Node, f func(gmast.Node)) {
 	}
 }
 
-// addNewLineAfterHTML allows us to work around a difference in how TF's registry parses
+func gmWalkNode[T gmast.Node](node gmast.Node, f func(T)) {
+	gmWalkNodes(node, func(node gmast.Node) {
+		n, ok := node.(T)
+		if ok {
+			f(n)
+		}
+	})
+}
+
+// recognizeHeaderAfterHTML allows us to work around a difference in how TF's registry parses
 // markdown vs goldmark's CommonMark parser.
 //
 // Goldmark correctly (for CommonMark) parses the following as a single HTML Block:
@@ -407,56 +416,58 @@ func gmWalkNodes(node gmast.Node, f func(gmast.Node)) {
 //
 // This is a common pattern in GCP, and we need to parse it as a HTML block, then a header
 // block. This AST transformation makes the desired change.
-type addNewLineAfterHTML struct{}
+type recognizeHeaderAfterHTML struct{}
 
-func (addNewLineAfterHTML) Transform(node *gmast.Document, reader gmtext.Reader, pc gmparser.Context) {
-	gmWalkNodes(node, func(node gmast.Node) {
-		if html, ok := node.(*gmast.HTMLBlock); ok {
-			if html.Lines().Len() == 0 {
-				return
-			}
+func (recognizeHeaderAfterHTML) Transform(node *gmast.Document, reader gmtext.Reader, pc gmparser.Context) {
+	gmWalkNode(node, func(node *gmast.HTMLBlock) {
+		if node.Lines().Len() == 0 {
+			return
+		}
 
-			last := html.Lines().At(html.Lines().Len() - 1)
-			if bytes.HasPrefix(last.Value(reader.Source()), []byte("## ")) {
-				html.Lines().SetSliced(0, html.Lines().Len()-1)
-				heading := gmast.NewHeading(2)
-				heading.Lines().Append(last)
-				node.Parent().InsertAfter(node.Parent(), node, heading)
-			}
+		last := node.Lines().At(node.Lines().Len() - 1)
+		if bytes.HasPrefix(last.Value(reader.Source()), []byte("## ")) {
+			node.Lines().SetSliced(0, node.Lines().Len()-1)
+			heading := gmast.NewHeading(2)
+			heading.Lines().Append(last)
+			node.Parent().InsertAfter(node.Parent(), node, heading)
 		}
 	})
 }
 
-func splitByMdHeaders(text string, level int) [][]string {
+func splitByMarkdownHeaders(text string, level int) [][]string {
+	// splitByMarkdownHeaders parses text, then walks the resulting AST to find
+	// appropriate header nodes. It uses the location of these header nodes to split
+	// text into sections, which are then split into lines.
+
 	bytes := trimFrontMatter([]byte(text))
 
-	headers := []int{}
+	offset := len(text) - len(bytes)
+	contract.Assertf(offset >= 0, "The offset generated by chopping of the front-matter cannot be negative")
 
 	gm := goldmark.New(
 		goldmark.WithExtensions(extension.GFM),
 		goldmark.WithParserOptions(
 			gmparser.WithASTTransformers(
-				util.Prioritized(addNewLineAfterHTML{}, 2000),
+				util.Prioritized(recognizeHeaderAfterHTML{}, 2000),
 			),
 		),
 	)
 
-	gmWalkNodes(gm.Parser().Parse(gmtext.NewReader(bytes)), func(node gmast.Node) {
-		if _, ok := node.(*gmast.RawHTML); ok {
-			node.Parent().RemoveChild(node.Parent(), node)
-			return
-		}
-
-		heading, ok := node.(*gmast.Heading)
-		if !ok || heading.Level != level {
+	headers := []int{}
+	gmWalkNode(gm.Parser().Parse(gmtext.NewReader(bytes)), func(heading *gmast.Heading) {
+		if heading.Level != level {
 			return
 		}
 		if heading.Lines().Len() == 0 {
 			return
 		}
 		for i := heading.Lines().At(0).Start; i > 0; i-- {
 			if bytes[i] == '\n' {
-				headers = append(headers, i+1)
+				headers = append(headers,
+					// +1 to move past the \n
+					//
+					// +offset to move past the front-matter (if present)
+					i+1+offset)
 				return
 			}
 		}
@@ -467,28 +478,33 @@ func splitByMdHeaders(text string, level int) [][]string {
 	//
 	// We now use that information to extract sections from `text`.
 
-	offset := len(text) - len(bytes)
-	contract.Assertf(offset >= 0, "The offset generated by chopping of the front-matter cannot be negative")
-
 	sections := make([][]string, 0, len(headers)+1)
+	for _, section := range splitStringsAtIndexes(text, headers) {
+		sections = append(sections, strings.Split(section, "\n"))
+	}
 
-	if len(headers) == 0 {
-		return [][]string{strings.Split(text, "\n")}
+	return sections
+}
+
+func splitStringsAtIndexes(s string, splits []int) []string {
+	if len(splits) == 0 {
+		return []string{s}
 	}
 
+	parts := make([]string, 0, len(splits)+1)
+
 	// Account for the first section
-	sections = append(sections, strings.Split(text[:headers[0]+offset-1], "\n"))
+	parts = append(parts, s[:splits[0]-1])
 
 	// Now handle the middle section
-	for from := 0; from+1 < len(headers); from++ {
-		sections = append(sections,
-			strings.Split(text[headers[from]+offset:headers[from+1]+offset-1], "\n"))
+	for from := 0; from+1 < len(splits); from++ {
+		parts = append(parts, s[splits[from]:splits[from+1]-1])
 	}
 
 	// Account for the end section
-	sections = append(sections, strings.Split(text[headers[len(headers)-1]+offset:], "\n"))
+	parts = append(parts, s[splits[len(splits)-1]:])
 
-	return sections
+	return parts
 }
 
 // parseTFMarkdown takes a TF website markdown doc and extracts a structured representation for use in
@@ -575,7 +591,7 @@ func (p *tfMarkdownParser) parse(tfMarkdown []byte) (entityDocs, error) {
 	markdown = strings.Replace(markdown, "<!-- schema generated by tfplugindocs -->", "", -1)
 
 	// Split the sections by H2 topics in the Markdown file.
-	sections := splitByMdHeaders(markdown, 2)
+	sections := splitByMarkdownHeaders(markdown, 2)
 
 	// we are explicitly overwriting the Terraform examples here
 	if p.info != nil && p.info.GetDocs() != nil && p.info.ReplaceExamplesSection() {
@@ -760,7 +776,7 @@ func (p *tfMarkdownParser) parseSection(h2Section []string) error {
 	// Now split the sections by H3 topics. This is done because we'll ignore sub-sections with code
 	// snippets that are unparseable (we don't want to ignore entire H2 sections).
 	var wroteHeader bool
-	for _, h3Section := range splitByMdHeaders(strings.Join(h2Section[1:], "\n"), 3) {
+	for _, h3Section := range splitByMarkdownHeaders(strings.Join(h2Section[1:], "\n"), 3) {
 		if len(h3Section) == 0 {
 			// An unparseable H3 appears (as observed by building a few tier 1 providers) to typically be due to an
 			// empty section resulting from how we parse sections earlier in the docs generation process. Therefore, we
@@ -1299,7 +1315,7 @@ func (p *tfMarkdownParser) parseImports(subsection []string) {
 }
 
 // Recognizes import sections such as ones found in aws_accessanalyzer_analyzer. If the section is
-// recognized, patches up instructoins to make sense for the Pulumi projection.
+// recognized, patches up instructions to make sense for the Pulumi projection.
 func tryParseV2Imports(typeToken string, markdownLines []string) (string, bool) {
 	var out bytes.Buffer
 	fmt.Fprintf(&out, "## Import\n\n")

diff --git a/pkg/tfgen/docs_test.go b/pkg/tfgen/docs_test.go
@@ -670,7 +670,7 @@ func TestReplaceFooterLinks(t *testing.T) {
 	assert.Equal(t, inputText, actual)
 }
 
-func TestSplitByMdHeaders(t *testing.T) {
+func TestSplitByMarkdownHeaders(t *testing.T) {
 	t.Parallel()
 
 	tests := []struct {
@@ -1271,7 +1271,7 @@ content
 	for _, tt := range tests {
 		tt := tt
 		t.Run("", func(t *testing.T) {
-			actual := splitByMdHeaders(tt.input, tt.level)
+			actual := splitByMarkdownHeaders(tt.input, tt.level)
 			tt.expected.Equal(t, actual)
 		})
 	}
@@ -1295,7 +1295,7 @@ Provides a DigitalOcean CDN Endpoint resource for use with Spaces.
 ## Argument Reference`
 
 		var processedMarkdown string
-		groups := splitByMdHeaders(markdown, 2)
+		groups := splitByMarkdownHeaders(markdown, 2)
 		for _, lines := range groups {
 			fixExampleTitles(lines)
 			for _, line := range lines {
@@ -1322,7 +1322,7 @@ Misleading example title without any actual code fences. We should not modify th
 ## Argument Reference`
 
 		var processedMarkdown string
-		groups := splitByMdHeaders(markdown, 2)
+		groups := splitByMarkdownHeaders(markdown, 2)
 		for _, lines := range groups {
 			fixExampleTitles(lines)
 			for _, line := range lines {
@@ -1360,7 +1360,7 @@ Basic usage:`
 
 func TestReformatExamples(t *testing.T) {
 	runTest := func(input string, expected [][]string) {
-		inputSections := splitByMdHeaders(input, 2)
+		inputSections := splitByMarkdownHeaders(input, 2)
 		actual := reformatExamples(inputSections)
 
 		assert.Equal(t, expected, actual)

diff --git a/pkg/tfgen/generate_schema.go b/pkg/tfgen/generate_schema.go
@@ -1086,7 +1086,7 @@ func appendExample(description, markdownToAppend string) string {
 
 	const exampleUsageHeader = "## Example Usage"
 
-	sections := splitByMdHeaders(description, 2)
+	sections := splitByMarkdownHeaders(description, 2)
 
 	// If there's already an ## Example Usage section, we need to find this section and append
 	if strings.Contains(description, exampleUsageHeader) {