Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Handle headers in code comments #2285

Merged
merged 10 commits into from
Aug 8, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 80 additions & 4 deletions pkg/tfgen/docs.go
Original file line number Diff line number Diff line change
Expand Up @@ -386,9 +386,85 @@ func groupLines(lines []string, sep string) [][]string {
return sections
}

// splitGroupLines splits and groups a string, s, by a given separator, sep.
func splitGroupLines(s, sep string) [][]string {
return groupLines(strings.Split(s, "\n"), sep)
func trimFrontMatter(text []byte) []byte {
delineater := []byte("---")
guineveresaenger marked this conversation as resolved.
Show resolved Hide resolved
body, ok := bytes.CutPrefix(text, delineater)
if !ok {
return text
}
idx := bytes.Index(body, delineater)

// Unable to find closing, so just return.
if idx == -1 {
return text
}
return body[idx+3:]
}

func splitByMdHeaders(text string, level int) [][]string {
iwahbe marked this conversation as resolved.
Show resolved Hide resolved
iwahbe marked this conversation as resolved.
Show resolved Hide resolved
bytes := trimFrontMatter([]byte(text))
idx := 0
headers := []int{}

parseDoc(bytes).Walk(func(node *bf.Node, entering bool) bf.WalkStatus {
if !entering {
return bf.GoToNext
}

if node.Type != bf.Heading || node.HeadingData.Level != level || node.HeadingData.IsTitleblock {
return bf.GoToNext
}
var foundHeader bool
for ; idx < len(bytes); idx++ {
// Here we take advantage of the fact that the .Literal field on
// leaf nodes is a view into the same byte array that was passed
// into `parseDoc` to recover the index of of .Literal[0] in the
guineveresaenger marked this conversation as resolved.
Show resolved Hide resolved
// original array.
iwahbe marked this conversation as resolved.
Show resolved Hide resolved
if &bytes[idx] == &node.FirstChild.Literal[0] {
// We have found in `bytes` the location of a header text,
iwahbe marked this conversation as resolved.
Show resolved Hide resolved
// but we want the start of the line. We need to walk
// back.
for i := idx; i > 0; i-- {
if bytes[i] == '\n' {
headers = append(headers, i+1)
break
}
}
foundHeader = true
break
}
}
contract.Assertf(foundHeader, "Failed to find source location of a header")
return bf.GoToNext
})

// headers now contains the index into `bytes` that represent the start of each
// section.
//
// We now use that information to extract sections from `text`.
iwahbe marked this conversation as resolved.
Show resolved Hide resolved

offset := len(text) - len(bytes)
contract.Assertf(offset >= 0, "The offset generated by chopping of the front-matter cannot be negative")
iwahbe marked this conversation as resolved.
Show resolved Hide resolved

sections := make([][]string, 0, len(headers)+1)

if len(headers) == 0 {
return [][]string{strings.Split(text, "\n")}
}

// Account for the first section
sections = append(sections, strings.Split(text[:headers[0]+offset-1], "\n"))

// Now handle the middle section
for from := 0; from+1 < len(headers); from++ {
sections = append(sections,
strings.Split(text[headers[from]+offset:headers[from+1]+offset-1], "\n"))
}

// Account for the end section
sections = append(sections, strings.Split(text[headers[len(headers)-1]+offset:], "\n"))

return sections
}

// parseTFMarkdown takes a TF website markdown doc and extracts a structured representation for use in
Expand Down Expand Up @@ -475,7 +551,7 @@ func (p *tfMarkdownParser) parse(tfMarkdown []byte) (entityDocs, error) {
markdown = strings.Replace(markdown, "<!-- schema generated by tfplugindocs -->", "", -1)

// Split the sections by H2 topics in the Markdown file.
sections := splitGroupLines(markdown, "## ")
sections := splitByMdHeaders(markdown, 2)

// we are explicitly overwriting the Terraform examples here
if p.info != nil && p.info.GetDocs() != nil && p.info.ReplaceExamplesSection() {
Expand Down
155 changes: 84 additions & 71 deletions pkg/tfgen/docs_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -688,7 +688,7 @@ Provides a DigitalOcean CDN Endpoint resource for use with Spaces.
## Argument Reference`

var processedMarkdown string
groups := splitGroupLines(markdown, "## ")
groups := splitByMdHeaders(markdown, 2)
for _, lines := range groups {
fixExampleTitles(lines)
for _, line := range lines {
Expand All @@ -715,7 +715,7 @@ Misleading example title without any actual code fences. We should not modify th
## Argument Reference`

var processedMarkdown string
groups := splitGroupLines(markdown, "## ")
groups := splitByMdHeaders(markdown, 2)
for _, lines := range groups {
fixExampleTitles(lines)
for _, line := range lines {
Expand Down Expand Up @@ -753,38 +753,43 @@ Basic usage:`

func TestReformatExamples(t *testing.T) {
runTest := func(input string, expected [][]string) {
inputSections := splitGroupLines(input, "## ")
output := reformatExamples(inputSections)
inputSections := splitByMdHeaders(input, 2)
actual := reformatExamples(inputSections)

assert.ElementsMatch(t, expected, output)
assert.Equal(t, expected, actual)
}

// This is a simple use case. We expect no changes to the original doc:
simpleDoc := `description
t.Run("no-op", func(t *testing.T) {
input := `description

## Example Usage

example usage content`

simpleDocExpected := [][]string{
{
"description",
"",
},
{
"## Example Usage",
"",
"example usage content",
},
}
expected := [][]string{
{
"description",
"",
},
{
"## Example Usage",
"",
"example usage content",
},
}

runTest(simpleDoc, simpleDocExpected)
runTest(input, expected)
})

// This use case demonstrates 2 examples at the same H2 level: a canonical Example Usage and another example
// for a specific use case. We expect these to be transformed into a canonical H2 "Example Usage" with an H3 for
// the specific use case.
// This scenario is common in the pulumi-gcp provider:
gcpDoc := `description
// This use case demonstrates 2 examples at the same H2 level: a canonical Example
// Usage and another example for a specific use case. We expect these to be
// transformed into a canonical H2 "Example Usage" with an H3 for the specific use
// case.
//
// This scenario is common in the pulumi-gcp provider.
t.Run("multiple-examples-same-level", func(t *testing.T) {
input := `description

## Example Usage

Expand All @@ -794,28 +799,32 @@ example usage content

specific case content`

gcpDocExpected := [][]string{
{
"description",
"",
},
{
"## Example Usage",
"",
"example usage content",
"",
"### Specific Case",
"",
"specific case content",
},
}
expected := [][]string{
{
"description",
"",
},
{
"## Example Usage",
"",
"example usage content",
"",
"### Specific Case",
"",
"specific case content",
},
}

runTest(gcpDoc, gcpDocExpected)
runTest(input, expected)
})

// This use case demonstrates 2 no canonical Example Usage/basic case and 2 specific use cases. We expect the
// function to add a canonical Example Usage section with the 2 use cases as H3's beneath the canonical section.
// This scenario is common in the pulumi-gcp provider:
gcpDoc2 := `description
// This use case demonstrates 2 no canonical Example Usage/basic case and 2
// specific use cases. We expect the function to add a canonical Example Usage
// section with the 2 use cases as H3's beneath the canonical section.
//
// This scenario is common in the pulumi-gcp provider.
t.Run("no-canonical-example-header", func(t *testing.T) {
input := `description

## Example Usage - 1

Expand All @@ -825,41 +834,44 @@ content 1

content 2`

gcpDoc2Expected := [][]string{
{
"description",
"",
},
{
"## Example Usage",
"### 1",
"",
"content 1",
"",
"### 2",
"",
"content 2",
},
}
expected := [][]string{
{
"description",
"",
},
{
"## Example Usage",
"### 1",
"",
"content 1",
"",
"### 2",
"",
"content 2",
},
}

runTest(gcpDoc2, gcpDoc2Expected)
runTest(input, expected)
})

misformattedDocNoPanic := `## jetstream_kv_entry Resource
t.Run("misformatted-docs-dont-panic", func(t *testing.T) {
input := `## jetstream_kv_entry Resource
content
### Example
content`

misformattedDocsExpected := [][]string{
nil,
{
"## jetstream_kv_entry Resource",
"content",
"### Example",
"content",
},
}
expected := [][]string{
nil,
{
"## jetstream_kv_entry Resource",
"content",
"### Example",
"content",
},
}

runTest(misformattedDocNoPanic, misformattedDocsExpected)
runTest(input, expected)
})
}

func TestFormatEntityName(t *testing.T) {
Expand Down Expand Up @@ -1596,6 +1608,7 @@ func TestParseTFMarkdown(t *testing.T) {
[]byte(`CUSTOM_REPLACES`),
[]byte(`checking custom replaces`)), nil
})),
test("codeblock-header"),
}

for _, tt := range tests {
Expand Down
6 changes: 3 additions & 3 deletions pkg/tfgen/parse_markdown.go
Original file line number Diff line number Diff line change
Expand Up @@ -443,13 +443,13 @@ func parseTextSeq(firstNode *bf.Node, useStarsForStrongAndEmph bool) (string, er
return buffer.String(), err
}

func parseDoc(text string) *bf.Node {
func parseDoc(text []byte) *bf.Node {
mdProc := bf.New(bf.WithExtensions(bf.FencedCode))
return mdProc.Parse([]byte(text))
return mdProc.Parse(text)
}

func parseNode(text string) *bf.Node {
return parseDoc(text).FirstChild
return parseDoc([]byte(text)).FirstChild
}

// Used for debugging blackfriday parse trees by visualizing them.
Expand Down
4 changes: 2 additions & 2 deletions pkg/tfgen/parse_markdown_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ func TestParseTopLevelSchema(t *testing.T) {

var schema *topLevelSchema

parseDoc(markdown).Walk(func(node *bf.Node, entering bool) bf.WalkStatus {
parseDoc([]byte(markdown)).Walk(func(node *bf.Node, entering bool) bf.WalkStatus {
if entering {
tls, err := parseTopLevelSchema(node, nil)
if err != nil {
Expand Down Expand Up @@ -113,7 +113,7 @@ func TestParseTopLevelSchema(t *testing.T) {
func TestParseNestedSchemaIntoDoc(t *testing.T) {
markdown := readTestFile(t, "mini.md")
out := &entityDocs{}
parseDoc(markdown).Walk(func(node *bf.Node, entering bool) bf.WalkStatus {
parseDoc([]byte(markdown)).Walk(func(node *bf.Node, entering bool) bf.WalkStatus {
if entering {
nested, err := parseNestedSchema(node, nil)
if err != nil {
Expand Down
41 changes: 41 additions & 0 deletions pkg/tfgen/test_data/codeblock-header/expected.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
{
"Description": "!\u003e **V1 release candidate** This resource was reworked and is a release candidate for the V1. We do not expect significant changes in it before the V1. We will welcome any feedback and adjust the resource if needed. Any errors reported will be resolved with a higher priority. We encourage checking this resource out before the V1 release. Please follow the migration guide to use it.\n\nRepresents a standard database. If replication configuration is specified, the database is promoted to serve as a primary database for replication.\n\n## Example Usage\n\n```terraform\n## Minimal\nresource \"snowflake_database\" \"primary\" {\n name = \"database_name\"\n}\n\n## Complete (with every optional set)\nresource \"snowflake_database\" \"primary\" {\n name = \"database_name\"\n is_transient = false\n comment = \"my standard database\"\n\n data_retention_time_in_days = 10\n data_retention_time_in_days_save = 10\n max_data_extension_time_in_days = 20\n external_volume = \"\u003cexternal_volume_name\u003e\"\n catalog = \"\u003ccatalog_name\u003e\"\n replace_invalid_characters = false\n default_ddl_collation = \"en_US\"\n storage_serialization_policy = \"COMPATIBLE\"\n log_level = \"INFO\"\n trace_level = \"ALWAYS\"\n suspend_task_after_num_failures = 10\n task_auto_retry_attempts = 10\n user_task_managed_initial_warehouse_size = \"LARGE\"\n user_task_timeout_ms = 3600000\n user_task_minimum_trigger_interval_in_seconds = 120\n quoted_identifiers_ignore_case = false\n enable_console_output = false\n\n replication {\n enable_to_account {\n account_identifier = \"\u003csecondary_account_organization_name\u003e.\u003csecondary_account_name\u003e\"\n with_failover = true\n }\n ignore_edition_check = true\n }\n}\n\n## Replication with for_each\nlocals {\n replication_configs = [\n {\n account_identifier = \"\u003csecondary_account_organization_name\u003e.\u003csecondary_account_name\u003e\"\n with_failover = true\n },\n {\n account_identifier = \"\u003csecondary_account_organization_name\u003e.\u003csecondary_account_name\u003e\"\n with_failover = true\n },\n ]\n}\n\nresource \"snowflake_database\" \"primary\" {\n name = \"database_name\"\n for_each = local.replication_configs\n\n replication {\n enable_to_account = each.value\n ignore_edition_check = true\n }\n}\n```",
"Arguments": {
"replication.enable_to_account": {
"description": "Entry to enable replication and optionally failover for a given account identifier."
},
"replication.enable_to_account.account_identifier": {
"description": "Specifies account identifier for which replication should be enabled. The account identifiers should be in the form of `\"\u003corganization_name\u003e\".\"\u003caccount_name\u003e\"`."
},
"replication.enable_to_account.with_failover": {
"description": "Specifies if failover should be enabled for the specified account identifier"
},
"replication.ignore_edition_check": {
"description": "Allows replicating data to accounts on lower editions in either of the following scenarios: 1. The primary database is in a Business Critical (or higher) account but one or more of the accounts approved for replication are on lower editions. Business Critical Edition is intended for Snowflake accounts with extremely sensitive data. 2. The primary database is in a Business Critical (or higher) account and a signed business associate agreement is in place to store PHI data in the account per HIPAA and HITRUST regulations, but no such agreement is in place for one or more of the accounts approved for replication, regardless if they are Business Critical (or higher) accounts. Both scenarios are prohibited by default in an effort to help prevent account administrators for Business Critical (or higher) accounts from inadvertently replicating sensitive data to accounts on lower editions."
}
},
"Attributes": {
"catalog": "The database parameter that specifies the default catalog to use for Iceberg tables. For more information, see [CATALOG](https://docs.snowflake.com/en/sql-reference/parameters#catalog).",
"comment": "Specifies a comment for the database.",
"data_retention_time_in_days": "Specifies the number of days for which Time Travel actions (CLONE and UNDROP) can be performed on the database, as well as specifying the default Time Travel retention time for all schemas created in the database. For more details, see [Understanding \u0026 Using Time Travel](https://docs.snowflake.com/en/user-guide/data-time-travel).",
"default_ddl_collation": "Specifies a default collation specification for all schemas and tables added to the database. It can be overridden on schema or table level. For more information, see [collation specification](https://docs.snowflake.com/en/sql-reference/collation#label-collation-specification).",
"enable_console_output": "If true, enables stdout/stderr fast path logging for anonymous stored procedures.",
"external_volume": "The database parameter that specifies the default external volume to use for Iceberg tables. For more information, see [EXTERNAL_VOLUME](https://docs.snowflake.com/en/sql-reference/parameters#external-volume).",
"id": "The ID of this resource.",
"is_transient": "Specifies the database as transient. Transient databases do not have a Fail-safe period so they do not incur additional storage costs once they leave Time Travel; however, this means they are also not protected by Fail-safe in the event of a data loss.",
"log_level": "Specifies the severity level of messages that should be ingested and made available in the active event table. Valid options are: [TRACE DEBUG INFO WARN ERROR FATAL OFF]. Messages at the specified level (and at more severe levels) are ingested. For more information, see [LOG_LEVEL](https://docs.snowflake.com/en/sql-reference/parameters.html#label-log-level).",
"max_data_extension_time_in_days": "Object parameter that specifies the maximum number of days for which Snowflake can extend the data retention period for tables in the database to prevent streams on the tables from becoming stale. For a detailed description of this parameter, see [MAX*DATA*EXTENSION*TIME*IN_DAYS](https://docs.snowflake.com/en/sql-reference/parameters.html#label-max-data-extension-time-in-days).",
"name": "Specifies the identifier for the database; must be unique for your account. As a best practice for [Database Replication and Failover](https://docs.snowflake.com/en/user-guide/db-replication-intro), it is recommended to give each secondary database the same name as its primary database. This practice supports referencing fully-qualified objects (i.e. '\\n\\n.\\n\\n.\\n\\n') by other objects in the same database, such as querying a fully-qualified table name in a view. If a secondary database has a different name from the primary database, then these object references would break in the secondary database.",
"quoted_identifiers_ignore_case": "If true, the case of quoted identifiers is ignored. For more information, see [QUOTED*IDENTIFIERS*IGNORE_CASE](https://docs.snowflake.com/en/sql-reference/parameters#quoted-identifiers-ignore-case).",
"replace_invalid_characters": "Specifies whether to replace invalid UTF-8 characters with the Unicode replacement character (�) in query results for an Iceberg table. You can only set this parameter for tables that use an external Iceberg catalog. For more information, see [REPLACE*INVALID*CHARACTERS](https://docs.snowflake.com/en/sql-reference/parameters#replace-invalid-characters).",
"replication": "Configures replication for a given database. When specified, this database will be promoted to serve as a primary database for replication. A primary database can be replicated in one or more accounts, allowing users in those accounts to query objects in each secondary (i.e. replica) database.",
"storage_serialization_policy": "The storage serialization policy for Iceberg tables that use Snowflake as the catalog. Valid options are: [COMPATIBLE OPTIMIZED]. COMPATIBLE: Snowflake performs encoding and compression of data files that ensures interoperability with third-party compute engines. OPTIMIZED: Snowflake performs encoding and compression of data files that ensures the best table performance within Snowflake. For more information, see [STORAGE*SERIALIZATION*POLICY](https://docs.snowflake.com/en/sql-reference/parameters#storage-serialization-policy).",
"suspend_task_after_num_failures": "How many times a task must fail in a row before it is automatically suspended. 0 disables auto-suspending. For more information, see [SUSPEND*TASK*AFTER*NUM*FAILURES](https://docs.snowflake.com/en/sql-reference/parameters#suspend-task-after-num-failures).",
"task_auto_retry_attempts": "Maximum automatic retries allowed for a user task. For more information, see [TASK*AUTO*RETRY_ATTEMPTS](https://docs.snowflake.com/en/sql-reference/parameters#task-auto-retry-attempts).",
"trace_level": "Controls how trace events are ingested into the event table. Valid options are: [ALWAYS ON*EVENT OFF]. For information about levels, see [TRACE*LEVEL](https://docs.snowflake.com/en/sql-reference/parameters.html#label-trace-level).",
"user_task_managed_initial_warehouse_size": "The initial size of warehouse to use for managed warehouses in the absence of history. For more information, see [USER*TASK*MANAGED*INITIAL*WAREHOUSE_SIZE](https://docs.snowflake.com/en/sql-reference/parameters#user-task-managed-initial-warehouse-size).",
"user_task_minimum_trigger_interval_in_seconds": "Minimum amount of time between Triggered Task executions in seconds.",
"user_task_timeout_ms": "User task execution timeout in milliseconds. For more information, see [USER*TASK*TIMEOUT_MS](https://docs.snowflake.com/en/sql-reference/parameters#user-task-timeout-ms)."
},
"Import": "## Import\n\n```sh\n$ pulumi import MISSING_TOK example 'database_name'\n```\n\n"
}
Loading
Loading