diff --git a/pkg/tfgen/docs.go b/pkg/tfgen/docs.go index 3a8563b60..151df0dfd 100644 --- a/pkg/tfgen/docs.go +++ b/pkg/tfgen/docs.go @@ -395,7 +395,16 @@ func gmWalkNodes(node gmast.Node, f func(gmast.Node)) { } } -// addNewLineAfterHTML allows us to work around a difference in how TF's registry parses +func gmWalkNode[T gmast.Node](node gmast.Node, f func(T)) { + gmWalkNodes(node, func(node gmast.Node) { + n, ok := node.(T) + if ok { + f(n) + } + }) +} + +// recognizeHeaderAfterHTML allows us to work around a difference in how TF's registry parses // markdown vs goldmark's CommonMark parser. // // Goldmark correctly (for CommonMark) parses the following as a single HTML Block: @@ -407,48 +416,46 @@ func gmWalkNodes(node gmast.Node, f func(gmast.Node)) { // // This is a common pattern in GCP, and we need to parse it as a HTML block, then a header // block. This AST transformation makes the desired change. -type addNewLineAfterHTML struct{} +type recognizeHeaderAfterHTML struct{} -func (addNewLineAfterHTML) Transform(node *gmast.Document, reader gmtext.Reader, pc gmparser.Context) { - gmWalkNodes(node, func(node gmast.Node) { - if html, ok := node.(*gmast.HTMLBlock); ok { - if html.Lines().Len() == 0 { - return - } +func (recognizeHeaderAfterHTML) Transform(node *gmast.Document, reader gmtext.Reader, pc gmparser.Context) { + gmWalkNode(node, func(node *gmast.HTMLBlock) { + if node.Lines().Len() == 0 { + return + } - last := html.Lines().At(html.Lines().Len() - 1) - if bytes.HasPrefix(last.Value(reader.Source()), []byte("## ")) { - html.Lines().SetSliced(0, html.Lines().Len()-1) - heading := gmast.NewHeading(2) - heading.Lines().Append(last) - node.Parent().InsertAfter(node.Parent(), node, heading) - } + last := node.Lines().At(node.Lines().Len() - 1) + if bytes.HasPrefix(last.Value(reader.Source()), []byte("## ")) { + node.Lines().SetSliced(0, node.Lines().Len()-1) + heading := gmast.NewHeading(2) + heading.Lines().Append(last) + node.Parent().InsertAfter(node.Parent(), node, heading) } }) } -func splitByMdHeaders(text string, level int) [][]string { +func splitByMarkdownHeaders(text string, level int) [][]string { + // splitByMarkdownHeaders parses text, then walks the resulting AST to find + // appropriate header nodes. It uses the location of these header nodes to split + // text into sections, which are then split into lines. + bytes := trimFrontMatter([]byte(text)) - headers := []int{} + offset := len(text) - len(bytes) + contract.Assertf(offset >= 0, "The offset generated by chopping of the front-matter cannot be negative") gm := goldmark.New( goldmark.WithExtensions(extension.GFM), goldmark.WithParserOptions( gmparser.WithASTTransformers( - util.Prioritized(addNewLineAfterHTML{}, 2000), + util.Prioritized(recognizeHeaderAfterHTML{}, 2000), ), ), ) - gmWalkNodes(gm.Parser().Parse(gmtext.NewReader(bytes)), func(node gmast.Node) { - if _, ok := node.(*gmast.RawHTML); ok { - node.Parent().RemoveChild(node.Parent(), node) - return - } - - heading, ok := node.(*gmast.Heading) - if !ok || heading.Level != level { + headers := []int{} + gmWalkNode(gm.Parser().Parse(gmtext.NewReader(bytes)), func(heading *gmast.Heading) { + if heading.Level != level { return } if heading.Lines().Len() == 0 { @@ -456,7 +463,11 @@ func splitByMdHeaders(text string, level int) [][]string { } for i := heading.Lines().At(0).Start; i > 0; i-- { if bytes[i] == '\n' { - headers = append(headers, i+1) + headers = append(headers, + // +1 to move past the \n + // + // +offset to move past the front-matter (if present) + i+1+offset) return } } @@ -467,28 +478,33 @@ func splitByMdHeaders(text string, level int) [][]string { // // We now use that information to extract sections from `text`. - offset := len(text) - len(bytes) - contract.Assertf(offset >= 0, "The offset generated by chopping of the front-matter cannot be negative") - sections := make([][]string, 0, len(headers)+1) + for _, section := range splitStringsAtIndexes(text, headers) { + sections = append(sections, strings.Split(section, "\n")) + } - if len(headers) == 0 { - return [][]string{strings.Split(text, "\n")} + return sections +} + +func splitStringsAtIndexes(s string, splits []int) []string { + if len(splits) == 0 { + return []string{s} } + parts := make([]string, 0, len(splits)+1) + // Account for the first section - sections = append(sections, strings.Split(text[:headers[0]+offset-1], "\n")) + parts = append(parts, s[:splits[0]-1]) // Now handle the middle section - for from := 0; from+1 < len(headers); from++ { - sections = append(sections, - strings.Split(text[headers[from]+offset:headers[from+1]+offset-1], "\n")) + for from := 0; from+1 < len(splits); from++ { + parts = append(parts, s[splits[from]:splits[from+1]-1]) } // Account for the end section - sections = append(sections, strings.Split(text[headers[len(headers)-1]+offset:], "\n")) + parts = append(parts, s[splits[len(splits)-1]:]) - return sections + return parts } // parseTFMarkdown takes a TF website markdown doc and extracts a structured representation for use in @@ -575,7 +591,7 @@ func (p *tfMarkdownParser) parse(tfMarkdown []byte) (entityDocs, error) { markdown = strings.Replace(markdown, "", "", -1) // Split the sections by H2 topics in the Markdown file. - sections := splitByMdHeaders(markdown, 2) + sections := splitByMarkdownHeaders(markdown, 2) // we are explicitly overwriting the Terraform examples here if p.info != nil && p.info.GetDocs() != nil && p.info.ReplaceExamplesSection() { @@ -760,7 +776,7 @@ func (p *tfMarkdownParser) parseSection(h2Section []string) error { // Now split the sections by H3 topics. This is done because we'll ignore sub-sections with code // snippets that are unparseable (we don't want to ignore entire H2 sections). var wroteHeader bool - for _, h3Section := range splitByMdHeaders(strings.Join(h2Section[1:], "\n"), 3) { + for _, h3Section := range splitByMarkdownHeaders(strings.Join(h2Section[1:], "\n"), 3) { if len(h3Section) == 0 { // An unparseable H3 appears (as observed by building a few tier 1 providers) to typically be due to an // empty section resulting from how we parse sections earlier in the docs generation process. Therefore, we @@ -1299,7 +1315,7 @@ func (p *tfMarkdownParser) parseImports(subsection []string) { } // Recognizes import sections such as ones found in aws_accessanalyzer_analyzer. If the section is -// recognized, patches up instructoins to make sense for the Pulumi projection. +// recognized, patches up instructions to make sense for the Pulumi projection. func tryParseV2Imports(typeToken string, markdownLines []string) (string, bool) { var out bytes.Buffer fmt.Fprintf(&out, "## Import\n\n") diff --git a/pkg/tfgen/docs_test.go b/pkg/tfgen/docs_test.go index d2cdfce44..6c7c7952a 100644 --- a/pkg/tfgen/docs_test.go +++ b/pkg/tfgen/docs_test.go @@ -670,7 +670,7 @@ func TestReplaceFooterLinks(t *testing.T) { assert.Equal(t, inputText, actual) } -func TestSplitByMdHeaders(t *testing.T) { +func TestSplitByMarkdownHeaders(t *testing.T) { t.Parallel() tests := []struct { @@ -1271,7 +1271,7 @@ content for _, tt := range tests { tt := tt t.Run("", func(t *testing.T) { - actual := splitByMdHeaders(tt.input, tt.level) + actual := splitByMarkdownHeaders(tt.input, tt.level) tt.expected.Equal(t, actual) }) } @@ -1295,7 +1295,7 @@ Provides a DigitalOcean CDN Endpoint resource for use with Spaces. ## Argument Reference` var processedMarkdown string - groups := splitByMdHeaders(markdown, 2) + groups := splitByMarkdownHeaders(markdown, 2) for _, lines := range groups { fixExampleTitles(lines) for _, line := range lines { @@ -1322,7 +1322,7 @@ Misleading example title without any actual code fences. We should not modify th ## Argument Reference` var processedMarkdown string - groups := splitByMdHeaders(markdown, 2) + groups := splitByMarkdownHeaders(markdown, 2) for _, lines := range groups { fixExampleTitles(lines) for _, line := range lines { @@ -1360,7 +1360,7 @@ Basic usage:` func TestReformatExamples(t *testing.T) { runTest := func(input string, expected [][]string) { - inputSections := splitByMdHeaders(input, 2) + inputSections := splitByMarkdownHeaders(input, 2) actual := reformatExamples(inputSections) assert.Equal(t, expected, actual) diff --git a/pkg/tfgen/generate_schema.go b/pkg/tfgen/generate_schema.go index c8b99a25e..44ed1ae57 100644 --- a/pkg/tfgen/generate_schema.go +++ b/pkg/tfgen/generate_schema.go @@ -1086,7 +1086,7 @@ func appendExample(description, markdownToAppend string) string { const exampleUsageHeader = "## Example Usage" - sections := splitByMdHeaders(description, 2) + sections := splitByMarkdownHeaders(description, 2) // If there's already an ## Example Usage section, we need to find this section and append if strings.Contains(description, exampleUsageHeader) {