-
Notifications
You must be signed in to change notification settings - Fork 219
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat: Add comments to format output #4397
base: main
Are you sure you want to change the base?
Changes from all commits
a45287c
1b351c8
8435108
161c28a
4e8a29f
1a4858d
7f51d48
e7ad240
07eacad
73c0504
51a85b3
7021c49
99328e8
47f4fb2
e0c02e7
4e47f09
94e7955
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,10 +1,10 @@ | ||
use std::collections::HashSet; | ||
|
||
use once_cell::sync::Lazy; | ||
|
||
use crate::ast::*; | ||
use prqlc_parser::{Token, TokenKind, TokenVec}; | ||
use regex::Regex; | ||
|
||
use crate::ast::*; | ||
use crate::codegen::SeparatedExprs; | ||
|
||
use super::{WriteOpt, WriteSource}; | ||
|
@@ -17,13 +17,28 @@ | |
let parent_strength = binding_strength(parent); | ||
opt.context_strength = opt.context_strength.max(parent_strength); | ||
|
||
node.write(opt) | ||
// FIXME: this is extremely hacky. Our issue is that in: | ||
// | ||
// from a.b # comment | ||
// | ||
// ...we're writing both `from a.b` and `a.b`, so we need to know which of | ||
// these to write comments for. I'm sure there are better ways to do it. | ||
let enable_comments = opt.enable_comments; | ||
opt.enable_comments = false; | ||
let out = node.write(opt.clone()); | ||
opt.enable_comments = enable_comments; | ||
out | ||
} | ||
|
||
impl WriteSource for Expr { | ||
fn write(&self, mut opt: WriteOpt) -> Option<String> { | ||
let mut r = String::new(); | ||
|
||
// if let Some(span) = self.span { | ||
// if let Some(comment) = find_comment_before(span, &opt.tokens) { | ||
// r += &comment.to_string(); | ||
// } | ||
// } | ||
if let Some(alias) = &self.alias { | ||
r += opt.consume(alias)?; | ||
r += opt.consume(" = ")?; | ||
|
@@ -41,9 +56,44 @@ | |
if let Some(value) = value { | ||
r += &value; | ||
} else { | ||
r += &break_line_within_parenthesis(&self.kind, opt)?; | ||
r += &break_line_within_parenthesis(&self.kind, &mut opt)?; | ||
} | ||
}; | ||
|
||
if opt.enable_comments { | ||
if let Some(span) = self.span { | ||
// TODO: change underlying function so we can remove this | ||
if opt.tokens.0.is_empty() { | ||
return Some(r); | ||
} | ||
|
||
let comments = find_comments_after(span, &opt.tokens); | ||
|
||
// If the first item is a comment, it's an inline comment, and | ||
// so add two spaces | ||
if matches!( | ||
comments.first(), | ||
Some(Token { | ||
kind: TokenKind::Comment(_), | ||
.. | ||
}) | ||
) { | ||
r += " "; | ||
} | ||
|
||
for c in comments { | ||
match c.kind { | ||
// TODO: these are defined here since the debug | ||
// representations aren't quite right (NewLine is `new | ||
// line` as is used in error messages). But we should | ||
// probably move them onto the Struct. | ||
TokenKind::Comment(s) => r += format!("#{}", s).as_str(), | ||
TokenKind::NewLine => r += "\n", | ||
_ => unreachable!(), | ||
} | ||
} | ||
} | ||
} | ||
Some(r) | ||
} | ||
} | ||
|
@@ -197,7 +247,7 @@ | |
if let Some(body) = c.body.write(opt.clone()) { | ||
r += &body; | ||
} else { | ||
r += &break_line_within_parenthesis(c.body.as_ref(), opt)?; | ||
r += &break_line_within_parenthesis(c.body.as_ref(), &mut opt)?; | ||
} | ||
|
||
Some(r) | ||
|
@@ -222,7 +272,7 @@ | |
} | ||
} | ||
|
||
fn break_line_within_parenthesis<T: WriteSource>(expr: &T, mut opt: WriteOpt) -> Option<String> { | ||
fn break_line_within_parenthesis<T: WriteSource>(expr: &T, opt: &mut WriteOpt) -> Option<String> { | ||
let mut r = "(\n".to_string(); | ||
opt.indent += 1; | ||
r += &opt.write_indent(); | ||
|
@@ -321,6 +371,52 @@ | |
} | ||
} | ||
|
||
// impl WriteSource for ModuleDef { | ||
// fn write(&self, mut opt: WriteOpt) -> Option<String> { | ||
// codegen::WriteSource::write(&pl.stmts, codegen::WriteOpt::default()).unwrap() | ||
// }} | ||
|
||
// /// Find a comment before a span. If there's exactly one newline prior, then the | ||
// /// comment is included here. Any further above are included with the prior token. | ||
// fn find_comment_before(span: Span, tokens: &TokenVec) -> Option<TokenKind> { | ||
// // index of the span in the token vec | ||
// let index = tokens | ||
// .0 | ||
// .iter() | ||
// .position(|t| t.span.start == span.start && t.span.end == span.end)?; | ||
// if index <= 1 { | ||
// return None; | ||
// } | ||
// let prior_token = &tokens.0[index - 1].kind; | ||
// let prior_2_token = &tokens.0[index - 2].kind; | ||
// if matches!(prior_token, TokenKind::NewLine) && matches!(prior_2_token, TokenKind::Comment(_)) { | ||
// Some(prior_2_token.clone()) | ||
// } else { | ||
// None | ||
// } | ||
// } | ||
|
||
/// Find comments after a given span. | ||
fn find_comments_after(span: Span, tokens: &TokenVec) -> Vec<Token> { | ||
// index of the span in the token vec | ||
let index = tokens | ||
.0 | ||
.iter() | ||
// FIXME: why isn't this working? | ||
// .position(|t| t.1.start == span.start && t.1.end == span.end) | ||
.position(|t| t.span.end == span.end) | ||
.unwrap_or_else(|| panic!("{:?}, {:?}", &tokens, &span)); | ||
|
||
let mut out = vec![]; | ||
for token in tokens.0.iter().skip(index + 1) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You probably want this here: tokens.0.iter().skip_while(|t| t.span.start < span.end) ... skipping all tokens that were before or within the current expression. |
||
match token.kind { | ||
TokenKind::NewLine | TokenKind::Comment(_) => out.push(token.clone()), | ||
_ => break, | ||
} | ||
Comment on lines
+412
to
+415
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. oh, and this is: .take_while(|token| matches!(token.kind, TokenKind::NewLine | TokenKind::Comment(_)) |
||
} | ||
out | ||
} | ||
|
||
impl WriteSource for Vec<Stmt> { | ||
fn write(&self, mut opt: WriteOpt) -> Option<String> { | ||
opt.reset_line()?; | ||
|
@@ -469,7 +565,8 @@ | |
|
||
#[cfg(test)] | ||
mod test { | ||
use insta::assert_snapshot; | ||
use insta::{assert_debug_snapshot, assert_snapshot}; | ||
use prqlc_parser::lex_source; | ||
|
||
use super::*; | ||
|
||
|
@@ -490,6 +587,64 @@ | |
stmt.write(WriteOpt::default()).unwrap() | ||
} | ||
|
||
// #[test] | ||
// fn test_find_comment_before() { | ||
// let tokens = lex_source( | ||
// r#" | ||
// # comment | ||
// let a = 5 | ||
// "#, | ||
// ) | ||
// .unwrap(); | ||
// let span = tokens | ||
// .clone() | ||
// .0 | ||
// .iter() | ||
// .find(|t| t.kind == TokenKind::Keyword("let".to_string())) | ||
// .unwrap() | ||
// .span | ||
// .clone(); | ||
// let comment = find_comment_before(span.into(), &tokens); | ||
// assert_debug_snapshot!(comment, @r###" | ||
// Some( | ||
// Comment( | ||
// " comment", | ||
// ), | ||
// ) | ||
// "###); | ||
// } | ||
|
||
#[test] | ||
fn test_find_comments_after() { | ||
let tokens = lex_source( | ||
r#" | ||
let a = 5 # on side | ||
# below | ||
# and another | ||
"#, | ||
) | ||
.unwrap(); | ||
let span = tokens | ||
.clone() | ||
.0 | ||
.iter() | ||
.find(|t| t.kind == TokenKind::Literal(Literal::Integer(5))) | ||
.unwrap() | ||
.span | ||
.clone(); | ||
let comment = find_comments_after(span.into(), &tokens); | ||
assert_debug_snapshot!(comment, @r###" | ||
[ | ||
23..32: Comment(" on side"), | ||
32..33: NewLine, | ||
45..52: Comment(" below"), | ||
52..53: NewLine, | ||
65..78: Comment(" and another"), | ||
78..79: NewLine, | ||
] | ||
"###); | ||
} | ||
|
||
#[test] | ||
fn test_pipeline() { | ||
let short = Expr::new(ExprKind::Ident("short".to_string())); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,7 +4,9 @@ mod types; | |
pub(crate) use ast::write_expr; | ||
pub(crate) use types::{write_ty, write_ty_kind}; | ||
|
||
pub trait WriteSource { | ||
use prqlc_parser::TokenVec; | ||
|
||
pub trait WriteSource: std::fmt::Debug { | ||
/// Converts self to its source representation according to specified | ||
/// options. | ||
/// | ||
|
@@ -29,11 +31,14 @@ pub trait WriteSource { | |
Some(r) | ||
} | ||
|
||
/// Attempts to write the current item, expanding the maximum width where necessary. | ||
fn write_or_expand(&self, mut opt: WriteOpt) -> String { | ||
loop { | ||
if let Some(s) = self.write(opt.clone()) { | ||
return s; | ||
} else { | ||
// TODO: could we just set the max width rather than increasing | ||
// it in a loop? | ||
opt.max_width += opt.max_width / 2; | ||
opt.reset_line(); | ||
} | ||
|
@@ -72,6 +77,13 @@ pub struct WriteOpt { | |
/// For example: | ||
/// `join foo` has an unbound expr, since `join foo ==bar` produced a binary op. | ||
pub unbound_expr: bool, | ||
|
||
/// The lexer tokens that were used to produce this source; used for | ||
/// comments. | ||
pub tokens: TokenVec, | ||
Comment on lines
+81
to
+83
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just a question if we could prematurely optimize this: Do we need all of the tokens? Wouldn't just comments (and new lines maybe) be enough to get the same functionality? |
||
|
||
// TODO: remove | ||
pub enable_comments: bool, | ||
} | ||
|
||
impl Default for WriteOpt { | ||
|
@@ -84,6 +96,8 @@ impl Default for WriteOpt { | |
rem_width: 50, | ||
context_strength: 0, | ||
unbound_expr: false, | ||
tokens: TokenVec(vec![]), | ||
enable_comments: true, | ||
} | ||
} | ||
} | ||
|
@@ -102,6 +116,8 @@ impl WriteOpt { | |
Some(()) | ||
} | ||
|
||
/// Sets [WriteOpt::rem_width] to (max_width - indent_width), returning | ||
/// `Some(())`, or `None` if there's not enough space. | ||
fn reset_line(&mut self) -> Option<()> { | ||
let ident = self.tab.len() as u16 * self.indent; | ||
self.rem_width = self.max_width.checked_sub(ident)?; | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Because if expression
1 + x
hasspan=0..5
, the tokens will have spans1: 0..1 +: 2..3 x: 4..5