From 8c344bc3caf9951fbb797c38fbd79adb640f2ad9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zolt=C3=A1n=20Nagy?= Date: Mon, 21 Oct 2024 05:11:35 -0700 Subject: [PATCH] Explode loudly on missing Markdown code block language specifier Summary: # What Fix this foot-gun: # Test something important ``` $ echo fo foo ``` This is *not a test* because the `scrut` language specifier is missing. That is, it should be: # Test something important ```scrut $ echo fo foo ``` To fix this: `scrut` will now explode when any code block in a Markdown test file is missing a language specifier. `plaintext` and `not-scrut-leave-me-alone` are perfectly valid language specifiers! # How * Updated markdown parsing to recognise code blocks without a language specifier as a markdown block - until now, they were treated as plain text lines. * Updated markdown parsing and generation with a `MarkdownToken::VerbatimCodeBlock` token that represents any Markdown code block that's *not* a Scrut test. Renamed `MarkdownToken::CodeBlock` to `MarkdownToken::TestCodeBlock` for clarity and symmetry. * Made the parser explode when a `VerbatimCodeBlock` has no language specified. * Updated the Markdown generator to correctly handle `VerbatimCodeBlock` # Alternatives Considered * Implementing a linter would be a similar amount of work, and require additional integration steps in OSS * We could make this feature opt-in (or opt-out) with a flag for easier migration; however, `scrut` in OSS has little adoption (and no stable release), and so we don't expect this to be an issue. Reviewed By: AndreasBackx Differential Revision: D64597015 fbshipit-source-id: 3f3c7b2a36ee429e64f74d5d1a896cd6f5518c89 --- .../missing-language-empty-block.mdtest | 4 + .../missing-language/missing-language.md | 23 +++++ .../missing-language/missing-language.mdtest | 6 ++ src/generators/markdown.rs | 11 ++- src/parsers/markdown.rs | 92 +++++++++++++++++-- 5 files changed, 127 insertions(+), 9 deletions(-) create mode 100644 selftest/complex/missing-language/missing-language-empty-block.mdtest create mode 100644 selftest/complex/missing-language/missing-language.md create mode 100644 selftest/complex/missing-language/missing-language.mdtest diff --git a/selftest/complex/missing-language/missing-language-empty-block.mdtest b/selftest/complex/missing-language/missing-language-empty-block.mdtest new file mode 100644 index 0000000..f7e2b74 --- /dev/null +++ b/selftest/complex/missing-language/missing-language-empty-block.mdtest @@ -0,0 +1,4 @@ +# Example EMPTY code block without a language specified + +``` +``` diff --git a/selftest/complex/missing-language/missing-language.md b/selftest/complex/missing-language/missing-language.md new file mode 100644 index 0000000..830260a --- /dev/null +++ b/selftest/complex/missing-language/missing-language.md @@ -0,0 +1,23 @@ +# Error on code block with no language specified + +```scrut +$ $SCRUT_BIN test --match-markdown "*.mdtest" "$TESTDIR"/missing-language.mdtest 2>&1 +* parse test from "*missing-language.mdtest" with markdown parser (glob) + +Caused by: + Code block starting at line 2 is missing language specifier. Use ```scrut to make this block a Scrut test, or any other language to make Scrut skip this block. +* (glob?) +[1] +``` + +# Error on EMPTY code block with no language specified + +```scrut +$ $SCRUT_BIN test --match-markdown "*.mdtest" "$TESTDIR"/missing-language-empty-block.mdtest 2>&1 +* parse test from "*missing-language-empty-block.mdtest" with markdown parser (glob) + +Caused by: + Code block starting at line 2 is missing language specifier. Use ```scrut to make this block a Scrut test, or any other language to make Scrut skip this block. +* (glob?) +[1] +``` diff --git a/selftest/complex/missing-language/missing-language.mdtest b/selftest/complex/missing-language/missing-language.mdtest new file mode 100644 index 0000000..d3cdf13 --- /dev/null +++ b/selftest/complex/missing-language/missing-language.mdtest @@ -0,0 +1,6 @@ +# Example code block without a language specified + +``` +$ echo foo +foo +``` diff --git a/src/generators/markdown.rs b/src/generators/markdown.rs index 5deddb3..522d7de 100644 --- a/src/generators/markdown.rs +++ b/src/generators/markdown.rs @@ -64,7 +64,16 @@ impl UpdateGenerator for MarkdownUpdateGenerator { updated.push_str(&config); updated.push_str("\n---\n"); } - MarkdownToken::CodeBlock { + MarkdownToken::VerbatimCodeBlock { + starting_line_number: _, + language: _, + lines, + } => { + for line in lines { + updated.push_str(&line.assure_newline()); + } + } + MarkdownToken::TestCodeBlock { language, config_lines, comment_lines, diff --git a/src/parsers/markdown.rs b/src/parsers/markdown.rs index ca49c12..376dd1c 100644 --- a/src/parsers/markdown.rs +++ b/src/parsers/markdown.rs @@ -96,7 +96,19 @@ impl Parser for MarkdownParser { title_paragraph.clear(); } } - MarkdownToken::CodeBlock { + MarkdownToken::VerbatimCodeBlock { + starting_line_number, + language, + lines: _, + } => { + if language.is_empty() { + anyhow::bail!( + "Code block starting at line {} is missing language specifier. Use ```scrut to make this block a Scrut test, or any other language to make Scrut skip this block.", + starting_line_number + ); + } + } + MarkdownToken::TestCodeBlock { language: _, config_lines, comment_lines: _, @@ -140,14 +152,14 @@ pub(crate) enum MarkdownToken { /// Raw configuration that is prepending the document DocumentConfig(Vec<(usize, String)>), - /// The parsed contents of a code block within backticks: + /// The parsed contents of a code block within backticks, representing a Scrut test: /// /// ```scrut { ... config ..} /// # comment /// $ shell expression /// output expectations /// ``` - CodeBlock { + TestCodeBlock { /// The used language token of the test (i.e. `scrut`) language: String, @@ -160,6 +172,18 @@ pub(crate) enum MarkdownToken { /// The code that makes up the test (shell expression & output expectations) code_lines: Vec<(usize, String)>, }, + + /// A code block that is not a test + VerbatimCodeBlock { + /// Index of the line containing opening backticks + starting_line_number: usize, + + /// Language specifier (e.g. `scrut`), possibly an empty string + language: String, + + /// All the lines of the code block, including opening and closing backtick lines + lines: Vec, + }, } /// An iterator that parses Markdown documents in lines and code-blocks @@ -202,11 +226,34 @@ impl<'a> Iterator for MarkdownIterator<'a> { } Some(MarkdownToken::DocumentConfig(config_content)) - // found the start of a code block (=testcase)? + // found the start of a code block (possibly a testcase)? } else if let Some((backticks, language, config)) = extract_code_block_start(line) { self.content_start = true; - if language.is_empty() || !self.languages.contains(&language) { - return Some(MarkdownToken::Line(self.line_index - 1, line.into())); + + // report verbatim code block if this is not a test block + if !self.languages.contains(&language) { + // Record the opening line (i.e. the opening backticks) + let starting_line_number = self.line_index - 1; + let mut lines = vec![line.to_string()]; + let mut line = self.document_lines.next()?; + self.line_index += 1; + + // Record all lines until the closing backticks + while !line.starts_with(backticks) { + lines.push(line.to_string()); + line = self.document_lines.next()?; + self.line_index += 1; + } + + // Record the closing backticks + lines.push(line.to_string()); + + // Return the verbatim code block + return Some(MarkdownToken::VerbatimCodeBlock { + starting_line_number, + language: language.into(), + lines, + }); } // gather optional per-test config @@ -220,7 +267,6 @@ impl<'a> Iterator for MarkdownIterator<'a> { vec![] }; - // gather optional comments let mut line = self.document_lines.next()?; self.line_index += 1; let mut comment_lines = vec![]; @@ -238,7 +284,7 @@ impl<'a> Iterator for MarkdownIterator<'a> { self.line_index += 1; } - Some(MarkdownToken::CodeBlock { + Some(MarkdownToken::TestCodeBlock { language: language.into(), config_lines, comment_lines, @@ -294,6 +340,10 @@ pub(crate) fn extract_title(line: &str) -> Option<(String, String)> { /// On the first line ending in foo, this function returns the backticks and /// the language. On all other lines it returns None. pub(crate) fn extract_code_block_start(line: &str) -> Option<(&str, &str, &str)> { + if line == "```" { + return Some((line, "", "")); + } + let mut language_start = None; for (index, ch) in line.chars().enumerate() { if let Some(language_start) = language_start { @@ -339,6 +389,7 @@ mod tests { use crate::config::TestCaseConfig; use crate::config::TestCaseWait; use crate::expectation::tests::expectation_maker; + use crate::parsers::markdown::extract_code_block_start; use crate::parsers::markdown::DEFAULT_MARKDOWN_LANGUAGES; use crate::parsers::parser::Parser; use crate::test_expectation; @@ -761,4 +812,29 @@ world testcases ); } + + #[test] + fn test_extract_code_block_start() { + assert_eq!( + Some(("```", "scrut", "")), + extract_code_block_start("```scrut") + ); + assert_eq!( + Some(("```", "bash", "")), + extract_code_block_start("```bash") + ); + } + + #[test] + fn test_extract_code_block_start_with_config() { + assert_eq!( + Some(("```", "scrut", "{timeout: 3m 3s, wait: 4m 4s}")), + extract_code_block_start("```scrut {timeout: 3m 3s, wait: 4m 4s}") + ); + } + + #[test] + fn test_extract_code_block_start_without_language() { + assert_eq!(Some(("```", "", "")), extract_code_block_start("```")); + } }