Skip to content

Commit

Permalink
[move][move-compiler] Revise block comment parsing to be more-resilie…
Browse files Browse the repository at this point in the history
…nt (#20424)

## Description 

Partially revise block comment parsing, plus fix issues around parsing
`/**/`

## Test plan 

New tests that work now

---

## Release notes

Check each box that your changes affect. If none of the boxes relate to
your changes, release notes aren't required.

For each box you select, include information after the relevant heading
that describes the impact of your changes that a user might notice and
any actions they must take to implement updates.

- [ ] Protocol: 
- [ ] Nodes (Validators and Full nodes): 
- [ ] Indexer: 
- [ ] JSON-RPC: 
- [ ] GraphQL: 
- [ ] CLI: 
- [ ] Rust SDK:
- [ ] REST API:
  • Loading branch information
cgswords authored Nov 26, 2024
1 parent 1dd7713 commit 9bd951a
Show file tree
Hide file tree
Showing 5 changed files with 126 additions and 58 deletions.
133 changes: 76 additions & 57 deletions external-crates/move/crates/move-compiler/src/parser/lexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -286,65 +286,9 @@ impl<'input> Lexer<'input> {
text = trim_start_whitespace(text);

if text.starts_with("/*") {
// Strip multi-line comments like '/* ... */' or '/** ... */'.
// These can be nested, as in '/* /* ... */ */', so record the
// start locations of each nested comment as a stack. The
// boolean indicates whether it's a documentation comment.
let mut locs: Vec<(usize, bool)> = vec![];
loop {
text = text.trim_start_matches(|c: char| c != '/' && c != '*');
if text.is_empty() {
// We've reached the end of string while searching for a
// terminating '*/'.
let loc = *locs.last().unwrap();
// Highlight the '/**' if it's a documentation comment, or the '/*'
// otherwise.
let location =
make_loc(self.file_hash, loc.0, loc.0 + if loc.1 { 3 } else { 2 });
return Err(Box::new(diag!(
Syntax::InvalidDocComment,
(location, "Unclosed block comment"),
)));
} else if text.starts_with("/*") {
// We've found a (perhaps nested) multi-line comment.
let start = get_offset(text);
text = &text[2..];

// Check if this is a documentation comment: '/**', but not '/***'.
// A documentation comment cannot be nested within another comment.
let is_doc =
text.starts_with('*') && !text.starts_with("**") && locs.is_empty();

locs.push((start, is_doc));
} else if text.starts_with("*/") {
// We've found a multi-line comment terminator that ends
// our innermost nested comment.
let loc = locs.pop().unwrap();
text = &text[2..];

// If this was a documentation comment, record it in our map.
if loc.1 {
let end = get_offset(text);
self.doc_comments.insert(
(loc.0 as u32, end as u32),
self.text[(loc.0 + 3)..(end - 2)].to_string(),
);
}

// If this terminated our last comment, exit the loop.
if locs.is_empty() {
break;
}
} else {
// This is a solitary '/' or '*' that isn't part of any comment delimiter.
// Skip over it.
let c = text.chars().next().unwrap();
text = &text[c.len_utf8()..];
}
}

// Continue the loop immediately after the multi-line comment.
// There may be whitespace or another comment following this one.
text = self.parse_block_comment(get_offset(text))?;
continue;
} else if text.starts_with("//") {
let start = get_offset(text);
Expand All @@ -370,6 +314,81 @@ impl<'input> Lexer<'input> {
Ok(text)
}

fn parse_block_comment(&mut self, offset: usize) -> Result<&'input str, Box<Diagnostic>> {
struct CommentEntry {
start: usize,
is_doc_comment: bool,
}

let text = &self.text[offset..];

// A helper function to compute the index of the start of the given substring.
let len = text.len();
let get_offset = |substring: &str| offset + len - substring.len();

let block_doc_comment_start: &str = "/**";

assert!(text.starts_with("/*"));
let initial_entry = CommentEntry {
start: get_offset(text),
is_doc_comment: text.starts_with(block_doc_comment_start),
};
let mut comment_queue: Vec<CommentEntry> = vec![initial_entry];

// This is a _rough_ apporximation which disregards doc comments in order to handle the
// case where we have `/**/` or similar.
let mut text = &text[2..];

while let Some(comment) = comment_queue.pop() {
text = text.trim_start_matches(|c: char| c != '/' && c != '*');
if text.is_empty() {
// We've reached the end of string while searching for a terminating '*/'.
// Highlight the '/**' if it's a documentation comment, or the '/*' otherwise.
let location = make_loc(
self.file_hash,
comment.start,
comment.start + if comment.is_doc_comment { 3 } else { 2 },
);
return Err(Box::new(diag!(
Syntax::InvalidDocComment,
(location, "Unclosed block comment"),
)));
};

match &text[..2] {
"*/" => {
let end = get_offset(text);
// If the comment was not empty -- fuzzy ot handle `/**/`, which triggers the
// doc comment check but is not actually a doc comment.
if comment.start + 3 < end && comment.is_doc_comment {
self.doc_comments.insert(
(comment.start as u32, end as u32),
self.text[(comment.start + 3)..end].to_string(),
);
}
text = &text[2..];
}
"/*" => {
comment_queue.push(comment);
let new_comment = CommentEntry {
start: get_offset(text),
is_doc_comment: text.starts_with(block_doc_comment_start),
};
comment_queue.push(new_comment);
text = &text[2..];
}
_ => {
// This is a solitary '/' or '*' that isn't part of any comment delimiter.
// Skip over it.
comment_queue.push(comment);
let c = text.chars().next().unwrap();
text = &text[c.len_utf8()..];
}
}
}
Ok(text)
}

// Trim until reaching whitespace: space, tab, lf(\n) and crlf(\r\n).
fn trim_until_whitespace(&self, offset: usize) -> &'input str {
let mut text = &self.text[offset..];
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
warning[W01004]: invalid documentation comment
┌─ tests/move_check/parser/block_comments_inline.move:11:61
11 │ let /*comment*/s = S { /***/w, /**/x, /*comment*/y, /** doc */z };
│ ^^^^^^^^ Documentation comment cannot be matched to a language item

warning[W01004]: invalid documentation comment
┌─ tests/move_check/parser/block_comments_inline.move:12:17
12 │ let S { /****/w, /**/x, /*comment*/y, /** doc */z } = /*comment*/s;
│ ^^^^ Documentation comment cannot be matched to a language item

warning[W01004]: invalid documentation comment
┌─ tests/move_check/parser/block_comments_inline.move:12:47
12 │ let S { /****/w, /**/x, /*comment*/y, /** doc */z } = /*comment*/s;
│ ^^^^^^^^ Documentation comment cannot be matched to a language item

Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
module 0x42::m {

struct S {
/*comment*/ w: u64,
/*comment*/ x: u64,
/**/ y: u64,
/** doc */ z: u64,
}

fun t(w: u64, x: u64, /*ignore*/y: u64, z:u64/**/): (u64, u64, u64, u64) {
let /*comment*/s = S { /***/w, /**/x, /*comment*/y, /** doc */z };
let S { /****/w, /**/x, /*comment*/y, /** doc */z } = /*comment*/s;
(w, x, y, z)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@ warning[W01004]: invalid documentation comment
┌─ tests/move_check/parser/comments_nested_unbalanced.move:1:4
1 │ /* /** /** * / */ /** */
│ ^^ Unclosed block comment
│ ^^^ Unclosed block comment

Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
module 0x42::m {
/* */
struct A { }
/** **/
struct B { }
/** */
struct C { }
/*** */
struct D { }
/****/
struct E { }
/***/
struct F { }
/**/
struct G { }
}

0 comments on commit 9bd951a

Please sign in to comment.