Skip to content

Commit

Permalink
Explode loudly on missing Markdown code block language specifier
Browse files Browse the repository at this point in the history
Summary:
# What

Fix this foot-gun:

    # Test something important

    ```
    $ echo fo
    foo
    ```

This is *not a test* because the `scrut` language specifier is missing. That is, it should be:

    # Test something important

    ```scrut
    $ echo fo
    foo
    ```

To fix this: `scrut` will now explode when any code block in a Markdown test file is missing a language specifier. `plaintext` and `not-scrut-leave-me-alone` are perfectly valid language specifiers!

# How

* Updated markdown parsing to recognise code blocks without a language specifier as a markdown block - until now, they were treated as plain text lines.
* Updated markdown parsing and generation with a `MarkdownToken::VerbatimCodeBlock` token that represents any Markdown code block that's *not* a Scrut test. Renamed `MarkdownToken::CodeBlock` to `MarkdownToken::TestCodeBlock` for clarity and symmetry.
* Made the parser explode when a `VerbatimCodeBlock` has no language specified.
* Updated the Markdown generator to correctly handle `VerbatimCodeBlock`

# Alternatives Considered

* Implementing a linter would be a similar amount of work, and require additional integration steps in OSS
* We could make this feature opt-in (or opt-out) with a flag for easier migration; however, `scrut` in OSS has little adoption (and no stable release), and so we don't expect this to be an issue.

Reviewed By: AndreasBackx

Differential Revision: D64597015

fbshipit-source-id: 3f3c7b2a36ee429e64f74d5d1a896cd6f5518c89
  • Loading branch information
abesto authored and facebook-github-bot committed Oct 21, 2024
1 parent f905070 commit 8c344bc
Show file tree
Hide file tree
Showing 5 changed files with 127 additions and 9 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Example EMPTY code block without a language specified

```
```
23 changes: 23 additions & 0 deletions selftest/complex/missing-language/missing-language.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Error on code block with no language specified

```scrut
$ $SCRUT_BIN test --match-markdown "*.mdtest" "$TESTDIR"/missing-language.mdtest 2>&1
* parse test from "*missing-language.mdtest" with markdown parser (glob)
Caused by:
Code block starting at line 2 is missing language specifier. Use ```scrut to make this block a Scrut test, or any other language to make Scrut skip this block.
* (glob?)
[1]
```

# Error on EMPTY code block with no language specified

```scrut
$ $SCRUT_BIN test --match-markdown "*.mdtest" "$TESTDIR"/missing-language-empty-block.mdtest 2>&1
* parse test from "*missing-language-empty-block.mdtest" with markdown parser (glob)
Caused by:
Code block starting at line 2 is missing language specifier. Use ```scrut to make this block a Scrut test, or any other language to make Scrut skip this block.
* (glob?)
[1]
```
6 changes: 6 additions & 0 deletions selftest/complex/missing-language/missing-language.mdtest
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# Example code block without a language specified

```
$ echo foo
foo
```
11 changes: 10 additions & 1 deletion src/generators/markdown.rs
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,16 @@ impl UpdateGenerator for MarkdownUpdateGenerator {
updated.push_str(&config);
updated.push_str("\n---\n");
}
MarkdownToken::CodeBlock {
MarkdownToken::VerbatimCodeBlock {
starting_line_number: _,
language: _,
lines,
} => {
for line in lines {
updated.push_str(&line.assure_newline());
}
}
MarkdownToken::TestCodeBlock {
language,
config_lines,
comment_lines,
Expand Down
92 changes: 84 additions & 8 deletions src/parsers/markdown.rs
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,19 @@ impl Parser for MarkdownParser {
title_paragraph.clear();
}
}
MarkdownToken::CodeBlock {
MarkdownToken::VerbatimCodeBlock {
starting_line_number,
language,
lines: _,
} => {
if language.is_empty() {
anyhow::bail!(
"Code block starting at line {} is missing language specifier. Use ```scrut to make this block a Scrut test, or any other language to make Scrut skip this block.",
starting_line_number
);
}
}
MarkdownToken::TestCodeBlock {
language: _,
config_lines,
comment_lines: _,
Expand Down Expand Up @@ -140,14 +152,14 @@ pub(crate) enum MarkdownToken {
/// Raw configuration that is prepending the document
DocumentConfig(Vec<(usize, String)>),

/// The parsed contents of a code block within backticks:
/// The parsed contents of a code block within backticks, representing a Scrut test:
///
/// ```scrut { ... config ..}
/// # comment
/// $ shell expression
/// output expectations
/// ```
CodeBlock {
TestCodeBlock {
/// The used language token of the test (i.e. `scrut`)
language: String,

Expand All @@ -160,6 +172,18 @@ pub(crate) enum MarkdownToken {
/// The code that makes up the test (shell expression & output expectations)
code_lines: Vec<(usize, String)>,
},

/// A code block that is not a test
VerbatimCodeBlock {
/// Index of the line containing opening backticks
starting_line_number: usize,

/// Language specifier (e.g. `scrut`), possibly an empty string
language: String,

/// All the lines of the code block, including opening and closing backtick lines
lines: Vec<String>,
},
}

/// An iterator that parses Markdown documents in lines and code-blocks
Expand Down Expand Up @@ -202,11 +226,34 @@ impl<'a> Iterator for MarkdownIterator<'a> {
}
Some(MarkdownToken::DocumentConfig(config_content))

// found the start of a code block (=testcase)?
// found the start of a code block (possibly a testcase)?
} else if let Some((backticks, language, config)) = extract_code_block_start(line) {
self.content_start = true;
if language.is_empty() || !self.languages.contains(&language) {
return Some(MarkdownToken::Line(self.line_index - 1, line.into()));

// report verbatim code block if this is not a test block
if !self.languages.contains(&language) {
// Record the opening line (i.e. the opening backticks)
let starting_line_number = self.line_index - 1;
let mut lines = vec![line.to_string()];
let mut line = self.document_lines.next()?;
self.line_index += 1;

// Record all lines until the closing backticks
while !line.starts_with(backticks) {
lines.push(line.to_string());
line = self.document_lines.next()?;
self.line_index += 1;
}

// Record the closing backticks
lines.push(line.to_string());

// Return the verbatim code block
return Some(MarkdownToken::VerbatimCodeBlock {
starting_line_number,
language: language.into(),
lines,
});
}

// gather optional per-test config
Expand All @@ -220,7 +267,6 @@ impl<'a> Iterator for MarkdownIterator<'a> {
vec![]
};

// gather optional comments
let mut line = self.document_lines.next()?;
self.line_index += 1;
let mut comment_lines = vec![];
Expand All @@ -238,7 +284,7 @@ impl<'a> Iterator for MarkdownIterator<'a> {
self.line_index += 1;
}

Some(MarkdownToken::CodeBlock {
Some(MarkdownToken::TestCodeBlock {
language: language.into(),
config_lines,
comment_lines,
Expand Down Expand Up @@ -294,6 +340,10 @@ pub(crate) fn extract_title(line: &str) -> Option<(String, String)> {
/// On the first line ending in foo, this function returns the backticks and
/// the language. On all other lines it returns None.
pub(crate) fn extract_code_block_start(line: &str) -> Option<(&str, &str, &str)> {
if line == "```" {
return Some((line, "", ""));
}

let mut language_start = None;
for (index, ch) in line.chars().enumerate() {
if let Some(language_start) = language_start {
Expand Down Expand Up @@ -339,6 +389,7 @@ mod tests {
use crate::config::TestCaseConfig;
use crate::config::TestCaseWait;
use crate::expectation::tests::expectation_maker;
use crate::parsers::markdown::extract_code_block_start;
use crate::parsers::markdown::DEFAULT_MARKDOWN_LANGUAGES;
use crate::parsers::parser::Parser;
use crate::test_expectation;
Expand Down Expand Up @@ -761,4 +812,29 @@ world
testcases
);
}

#[test]
fn test_extract_code_block_start() {
assert_eq!(
Some(("```", "scrut", "")),
extract_code_block_start("```scrut")
);
assert_eq!(
Some(("```", "bash", "")),
extract_code_block_start("```bash")
);
}

#[test]
fn test_extract_code_block_start_with_config() {
assert_eq!(
Some(("```", "scrut", "{timeout: 3m 3s, wait: 4m 4s}")),
extract_code_block_start("```scrut {timeout: 3m 3s, wait: 4m 4s}")
);
}

#[test]
fn test_extract_code_block_start_without_language() {
assert_eq!(Some(("```", "", "")), extract_code_block_start("```"));
}
}

0 comments on commit 8c344bc

Please sign in to comment.