Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Add comments to format output #4397

Open
wants to merge 17 commits into
base: main
Choose a base branch
from
Open
9 changes: 9 additions & 0 deletions prqlc/prqlc-ast/src/span.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,15 @@ impl From<Span> for Range<usize> {
a.start..a.end
}
}
impl From<Range<usize>> for Span {
fn from(range: Range<usize>) -> Self {
Span {
start: range.start,
end: range.end,
source_id: 0, // Default value as Range<usize> does not provide a source_id
}
}
}

impl Debug for Span {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
Expand Down
4 changes: 2 additions & 2 deletions prqlc/prqlc-parser/src/lexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
use prqlc_ast::expr::*;
use serde::{Deserialize, Serialize};

#[derive(Clone, PartialEq, Serialize, Deserialize)]
#[derive(Clone, PartialEq, Serialize, Deserialize, Eq)]

Check warning on line 10 in prqlc/prqlc-parser/src/lexer.rs

View check run for this annotation

Codecov / codecov/patch

prqlc/prqlc-parser/src/lexer.rs#L10

Added line #L10 was not covered by tests
pub struct Token {
pub kind: TokenKind,
pub span: std::ops::Range<usize>,
Expand Down Expand Up @@ -579,7 +579,7 @@
}
}

#[derive(Serialize, Deserialize, Debug)]
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]

Check warning on line 582 in prqlc/prqlc-parser/src/lexer.rs

View check run for this annotation

Codecov / codecov/patch

prqlc/prqlc-parser/src/lexer.rs#L582

Added line #L582 was not covered by tests
pub struct TokenVec(pub Vec<Token>);

// impl std::fmt::Debug for TokenVec {
Expand Down
3 changes: 1 addition & 2 deletions prqlc/prqlc-parser/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,7 @@ use prqlc_ast::error::{Error, Reason, WithErrorInfo};
use prqlc_ast::stmt::*;
use prqlc_ast::Span;

use lexer::Token;
pub use lexer::{TokenKind, TokenVec};
pub use lexer::{Token, TokenKind, TokenVec};
use span::ParserSpan;

/// Build PRQL AST from a PRQL query string.
Expand Down
169 changes: 162 additions & 7 deletions prqlc/prqlc/src/codegen/ast.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
use std::collections::HashSet;

use once_cell::sync::Lazy;

use crate::ast::*;
use prqlc_parser::{Token, TokenKind, TokenVec};
use regex::Regex;

use crate::ast::*;
use crate::codegen::SeparatedExprs;

use super::{WriteOpt, WriteSource};
Expand All @@ -17,13 +17,28 @@
let parent_strength = binding_strength(parent);
opt.context_strength = opt.context_strength.max(parent_strength);

node.write(opt)
// FIXME: this is extremely hacky. Our issue is that in:
//
// from a.b # comment
//
// ...we're writing both `from a.b` and `a.b`, so we need to know which of
// these to write comments for. I'm sure there are better ways to do it.
let enable_comments = opt.enable_comments;
opt.enable_comments = false;
let out = node.write(opt.clone());
opt.enable_comments = enable_comments;
out
}

impl WriteSource for Expr {
fn write(&self, mut opt: WriteOpt) -> Option<String> {
let mut r = String::new();

// if let Some(span) = self.span {
// if let Some(comment) = find_comment_before(span, &opt.tokens) {
// r += &comment.to_string();
// }
// }
if let Some(alias) = &self.alias {
r += opt.consume(alias)?;
r += opt.consume(" = ")?;
Expand All @@ -41,9 +56,44 @@
if let Some(value) = value {
r += &value;
} else {
r += &break_line_within_parenthesis(&self.kind, opt)?;
r += &break_line_within_parenthesis(&self.kind, &mut opt)?;
}
};

if opt.enable_comments {
if let Some(span) = self.span {
// TODO: change underlying function so we can remove this
if opt.tokens.0.is_empty() {
return Some(r);
}

let comments = find_comments_after(span, &opt.tokens);

// If the first item is a comment, it's an inline comment, and
// so add two spaces
if matches!(
comments.first(),
Some(Token {
kind: TokenKind::Comment(_),
..
})
) {
r += " ";
}

for c in comments {
match c.kind {
// TODO: these are defined here since the debug
// representations aren't quite right (NewLine is `new
// line` as is used in error messages). But we should
// probably move them onto the Struct.
TokenKind::Comment(s) => r += format!("#{}", s).as_str(),
TokenKind::NewLine => r += "\n",
_ => unreachable!(),

Check warning on line 92 in prqlc/prqlc/src/codegen/ast.rs

View check run for this annotation

Codecov / codecov/patch

prqlc/prqlc/src/codegen/ast.rs#L92

Added line #L92 was not covered by tests
}
}
}
}
Some(r)
}
}
Expand Down Expand Up @@ -197,7 +247,7 @@
if let Some(body) = c.body.write(opt.clone()) {
r += &body;
} else {
r += &break_line_within_parenthesis(c.body.as_ref(), opt)?;
r += &break_line_within_parenthesis(c.body.as_ref(), &mut opt)?;
}

Some(r)
Expand All @@ -222,7 +272,7 @@
}
}

fn break_line_within_parenthesis<T: WriteSource>(expr: &T, mut opt: WriteOpt) -> Option<String> {
fn break_line_within_parenthesis<T: WriteSource>(expr: &T, opt: &mut WriteOpt) -> Option<String> {
let mut r = "(\n".to_string();
opt.indent += 1;
r += &opt.write_indent();
Expand Down Expand Up @@ -321,6 +371,52 @@
}
}

// impl WriteSource for ModuleDef {
// fn write(&self, mut opt: WriteOpt) -> Option<String> {
// codegen::WriteSource::write(&pl.stmts, codegen::WriteOpt::default()).unwrap()
// }}

// /// Find a comment before a span. If there's exactly one newline prior, then the
// /// comment is included here. Any further above are included with the prior token.
// fn find_comment_before(span: Span, tokens: &TokenVec) -> Option<TokenKind> {
// // index of the span in the token vec
// let index = tokens
// .0
// .iter()
// .position(|t| t.span.start == span.start && t.span.end == span.end)?;
// if index <= 1 {
// return None;
// }
// let prior_token = &tokens.0[index - 1].kind;
// let prior_2_token = &tokens.0[index - 2].kind;
// if matches!(prior_token, TokenKind::NewLine) && matches!(prior_2_token, TokenKind::Comment(_)) {
// Some(prior_2_token.clone())
// } else {
// None
// }
// }

/// Find comments after a given span.
fn find_comments_after(span: Span, tokens: &TokenVec) -> Vec<Token> {
// index of the span in the token vec
let index = tokens
.0
.iter()
// FIXME: why isn't this working?
// .position(|t| t.1.start == span.start && t.1.end == span.end)
Comment on lines +405 to +406
Copy link
Member

@aljazerzen aljazerzen May 13, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because if expression 1 + x has span=0..5, the tokens will have spans 1: 0..1 +: 2..3 x: 4..5

.position(|t| t.span.end == span.end)
.unwrap_or_else(|| panic!("{:?}, {:?}", &tokens, &span));

let mut out = vec![];
for token in tokens.0.iter().skip(index + 1) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You probably want this here:

tokens.0.iter().skip_while(|t| t.span.start < span.end)

... skipping all tokens that were before or within the current expression.

match token.kind {
TokenKind::NewLine | TokenKind::Comment(_) => out.push(token.clone()),
_ => break,
}
Comment on lines +412 to +415
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

oh, and this is:

.take_while(|token| matches!(token.kind, TokenKind::NewLine | TokenKind::Comment(_))

}
out
}

impl WriteSource for Vec<Stmt> {
fn write(&self, mut opt: WriteOpt) -> Option<String> {
opt.reset_line()?;
Expand Down Expand Up @@ -469,7 +565,8 @@

#[cfg(test)]
mod test {
use insta::assert_snapshot;
use insta::{assert_debug_snapshot, assert_snapshot};
use prqlc_parser::lex_source;

use super::*;

Expand All @@ -490,6 +587,64 @@
stmt.write(WriteOpt::default()).unwrap()
}

// #[test]
// fn test_find_comment_before() {
// let tokens = lex_source(
// r#"
// # comment
// let a = 5
// "#,
// )
// .unwrap();
// let span = tokens
// .clone()
// .0
// .iter()
// .find(|t| t.kind == TokenKind::Keyword("let".to_string()))
// .unwrap()
// .span
// .clone();
// let comment = find_comment_before(span.into(), &tokens);
// assert_debug_snapshot!(comment, @r###"
// Some(
// Comment(
// " comment",
// ),
// )
// "###);
// }

#[test]
fn test_find_comments_after() {
let tokens = lex_source(
r#"
let a = 5 # on side
# below
# and another
"#,
)
.unwrap();
let span = tokens
.clone()
.0
.iter()
.find(|t| t.kind == TokenKind::Literal(Literal::Integer(5)))
.unwrap()
.span
.clone();
let comment = find_comments_after(span.into(), &tokens);
assert_debug_snapshot!(comment, @r###"
[
23..32: Comment(" on side"),
32..33: NewLine,
45..52: Comment(" below"),
52..53: NewLine,
65..78: Comment(" and another"),
78..79: NewLine,
]
"###);
}

#[test]
fn test_pipeline() {
let short = Expr::new(ExprKind::Ident("short".to_string()));
Expand Down
18 changes: 17 additions & 1 deletion prqlc/prqlc/src/codegen/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@ mod types;
pub(crate) use ast::write_expr;
pub(crate) use types::{write_ty, write_ty_kind};

pub trait WriteSource {
use prqlc_parser::TokenVec;

pub trait WriteSource: std::fmt::Debug {
/// Converts self to its source representation according to specified
/// options.
///
Expand All @@ -29,11 +31,14 @@ pub trait WriteSource {
Some(r)
}

/// Attempts to write the current item, expanding the maximum width where necessary.
fn write_or_expand(&self, mut opt: WriteOpt) -> String {
loop {
if let Some(s) = self.write(opt.clone()) {
return s;
} else {
// TODO: could we just set the max width rather than increasing
// it in a loop?
opt.max_width += opt.max_width / 2;
opt.reset_line();
}
Expand Down Expand Up @@ -72,6 +77,13 @@ pub struct WriteOpt {
/// For example:
/// `join foo` has an unbound expr, since `join foo ==bar` produced a binary op.
pub unbound_expr: bool,

/// The lexer tokens that were used to produce this source; used for
/// comments.
pub tokens: TokenVec,
Comment on lines +81 to +83
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just a question if we could prematurely optimize this:

Do we need all of the tokens? Wouldn't just comments (and new lines maybe) be enough to get the same functionality?


// TODO: remove
pub enable_comments: bool,
}

impl Default for WriteOpt {
Expand All @@ -84,6 +96,8 @@ impl Default for WriteOpt {
rem_width: 50,
context_strength: 0,
unbound_expr: false,
tokens: TokenVec(vec![]),
enable_comments: true,
}
}
}
Expand All @@ -102,6 +116,8 @@ impl WriteOpt {
Some(())
}

/// Sets [WriteOpt::rem_width] to (max_width - indent_width), returning
/// `Some(())`, or `None` if there's not enough space.
fn reset_line(&mut self) -> Option<()> {
let ident = self.tab.len() as u16 * self.indent;
self.rem_width = self.max_width.checked_sub(ident)?;
Expand Down
1 change: 1 addition & 0 deletions prqlc/prqlc/src/codegen/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ impl WriteSource for TyTupleField {
}
}

#[derive(Debug, Clone)]
struct UnionVariant<'a>(&'a Option<String>, &'a Ty);

impl WriteSource for UnionVariant<'_> {
Expand Down
Loading
Loading