From a45287c04dfade147adea83ccc7238fe21c91d09 Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Sat, 17 Feb 2024 21:32:39 -0800 Subject: [PATCH 01/10] feat: Add comments to format output --- prqlc/prqlc-parser/src/lexer.rs | 1 + prqlc/prqlc-parser/src/lib.rs | 4 +- prqlc/prqlc/src/codegen/ast.rs | 78 +++++++++++++++++++++++++++++++-- prqlc/prqlc/src/codegen/mod.rs | 7 +++ prqlc/prqlc/src/lib.rs | 45 +++++++++++++++++++ 5 files changed, 130 insertions(+), 5 deletions(-) diff --git a/prqlc/prqlc-parser/src/lexer.rs b/prqlc/prqlc-parser/src/lexer.rs index af05ab0c82ab..8c1b8aed7276 100644 --- a/prqlc/prqlc-parser/src/lexer.rs +++ b/prqlc/prqlc-parser/src/lexer.rs @@ -564,6 +564,7 @@ impl std::fmt::Debug for TokenSpan { } } +#[derive(Clone, PartialEq, Eq)] pub struct TokenVec(pub Vec); impl std::fmt::Debug for TokenVec { diff --git a/prqlc/prqlc-parser/src/lib.rs b/prqlc/prqlc-parser/src/lib.rs index a7c459d64962..ce57af2ab4a0 100644 --- a/prqlc/prqlc-parser/src/lib.rs +++ b/prqlc/prqlc-parser/src/lib.rs @@ -15,8 +15,8 @@ use prqlc_ast::error::{Error, WithErrorInfo}; use prqlc_ast::stmt::*; use prqlc_ast::Span; -use lexer::Token; -use lexer::{TokenSpan, TokenVec}; +pub use lexer::Token; +pub use lexer::{TokenSpan, TokenVec}; use span::ParserSpan; /// Build PRQL AST from a PRQL query string. diff --git a/prqlc/prqlc/src/codegen/ast.rs b/prqlc/prqlc/src/codegen/ast.rs index 444368022591..5aad89c48d58 100644 --- a/prqlc/prqlc/src/codegen/ast.rs +++ b/prqlc/prqlc/src/codegen/ast.rs @@ -1,6 +1,7 @@ use std::collections::HashSet; use once_cell::sync::Lazy; +use prqlc_parser::{Token, TokenSpan, TokenVec}; use crate::ast::*; use regex::Regex; @@ -22,8 +23,20 @@ fn write_within(node: &T, parent: &ExprKind, mut opt: WriteOpt) impl WriteSource for Expr { fn write(&self, mut opt: WriteOpt) -> Option { + // let span = self.span; + // let tokens = opt.tokens; + // let comment = let mut r = String::new(); + if let Some(span) = self.span { + if let Some(comment) = find_comment_before(span, &opt.tokens) { + // dbg!(&span); + // dbg!(&self); + // dbg!(&comment); + + r += &comment.to_string(); + } + } if let Some(alias) = &self.alias { r += opt.consume(alias)?; r += opt.consume(" = ")?; @@ -41,9 +54,20 @@ impl WriteSource for Expr { if let Some(value) = value { r += &value; } else { - r += &break_line_within_parenthesis(&self.kind, opt)?; + r += &break_line_within_parenthesis(&self.kind, &mut opt)?; } }; + + if let Some(span) = self.span { + let comments = find_comments_after(span, &opt.tokens); + // dbg!(&span); + // dbg!(&self); + // dbg!(&comments); + + for c in comments { + r += &c.0.to_string(); + } + } Some(r) } } @@ -180,7 +204,7 @@ impl WriteSource for ExprKind { if let Some(body) = c.body.write(opt.clone()) { r += &body; } else { - r += &break_line_within_parenthesis(c.body.as_ref(), opt)?; + r += &break_line_within_parenthesis(c.body.as_ref(), &mut opt)?; } Some(r) @@ -205,7 +229,7 @@ impl WriteSource for ExprKind { } } -fn break_line_within_parenthesis(expr: &T, mut opt: WriteOpt) -> Option { +fn break_line_within_parenthesis(expr: &T, opt: &mut WriteOpt) -> Option { let mut r = "(\n".to_string(); opt.indent += 1; r += &opt.write_indent(); @@ -304,6 +328,54 @@ pub fn write_ident_part(s: &str) -> String { } } +// impl WriteSource for ModuleDef { +// fn write(&self, mut opt: WriteOpt) -> Option { +// codegen::WriteSource::write(&pl.stmts, codegen::WriteOpt::default()).unwrap() +// }} + +// impl WriteSource for ModuleDef { + +/// Find a comment before a span. If there's exactly one newline, then the +/// comment is included; otherwise it's included after the current line. +fn find_comment_before(span: Span, tokens: &TokenVec) -> Option { + // index of the span in the token vec + let index = tokens + .0 + .iter() + .position(|t| t.1.start == span.start && t.1.end == span.end)?; + if index <= 1 { + return None; + } + let prior_token = &tokens.0[index - 1].0; + let prior_2_token = &tokens.0[index - 2].0; + if matches!(prior_token, Token::NewLine) && matches!(prior_2_token, Token::Comment(_)) { + Some(prior_2_token.clone()) + } else { + None + } +} + +/// Find a comment before a span. If there's exactly one newline, then the +/// comment is included; otherwise it's included after the current line. +fn find_comments_after(span: Span, tokens: &TokenVec) -> Vec { + let mut out = vec![]; + // index of the span in the token vec + let index = tokens + .0 + .iter() + // FIXME: why isn't this working? + // .position(|t| t.1.start == span.start && t.1.end == span.end) + .position(|t| t.1.start == span.start) + .unwrap_or_else(|| panic!("{:?}, {:?}", &tokens, &span)); + for token in tokens.0.iter().skip(index) { + match token.0 { + Token::NewLine | Token::Comment(_) => out.push(dbg!(token.clone())), + _ => break, + } + } + out +} + impl WriteSource for Vec { fn write(&self, mut opt: WriteOpt) -> Option { opt.reset_line()?; diff --git a/prqlc/prqlc/src/codegen/mod.rs b/prqlc/prqlc/src/codegen/mod.rs index e14dcee74e49..f73570077352 100644 --- a/prqlc/prqlc/src/codegen/mod.rs +++ b/prqlc/prqlc/src/codegen/mod.rs @@ -4,6 +4,8 @@ mod types; pub(crate) use ast::write_expr; pub(crate) use types::{write_ty, write_ty_kind}; +use prqlc_parser::TokenVec; + pub trait WriteSource { /// Converts self to its source representation according to specified /// options. @@ -72,6 +74,10 @@ pub struct WriteOpt { /// For example: /// `join foo` has an unbound expr, since `join foo ==bar` produced a binary op. pub unbound_expr: bool, + + /// The lexer tokens that were used to produce this source; used for + /// comments. + pub tokens: TokenVec, } impl Default for WriteOpt { @@ -84,6 +90,7 @@ impl Default for WriteOpt { rem_width: 50, context_strength: 0, unbound_expr: false, + tokens: TokenVec(vec![]), } } } diff --git a/prqlc/prqlc/src/lib.rs b/prqlc/prqlc/src/lib.rs index 3aee35a58a15..afa00a7044ef 100644 --- a/prqlc/prqlc/src/lib.rs +++ b/prqlc/prqlc/src/lib.rs @@ -276,6 +276,15 @@ pub fn prql_to_pl(prql: &str) -> Result { prql_to_pl_tree(&source_tree) } +// pub fn fmt(prql: &str) -> Result { +// let source_tree = SourceTree::from(prql); +// let pl = prql_to_pl_tree(&source_tree)?; + +// for x in pl.stmts { +// x. + +// } + /// Parse PRQL into a PL AST pub fn prql_to_pl_tree(prql: &SourceTree) -> Result { parser::parse(prql).map_err(|e| ErrorMessages::from(e).composed(prql)) @@ -307,6 +316,42 @@ pub fn pl_to_prql(pl: &ast::ModuleDef) -> Result { Ok(codegen::WriteSource::write(&pl.stmts, codegen::WriteOpt::default()).unwrap()) } +pub fn format_prql(prql: &str) -> Result { + // TODO: convert errors + let tokens = prqlc_parser::lex_source(prql).unwrap(); + let pl = prql_to_pl(prql)?; + Ok(codegen::WriteSource::write( + &pl.stmts, + codegen::WriteOpt { + tokens, + ..Default::default() + }, + ) + .unwrap()) +} + +#[test] +fn test_format_prql() { + use insta::assert_display_snapshot; + + assert_display_snapshot!(format_prql( "from db.employees | select {name, age}").unwrap(), @r###" + from db.employees + select {name, age} + "###); + + assert_display_snapshot!(format_prql( r#" + # test comment + from db.employees # inline comment + # another test comment + select {name, age}"# +).unwrap(), @r###" + # test comment + from db.employees # in + # another test comment + select {name, age} + "###); +} + /// JSON serialization and deserialization functions pub mod json { use super::*; From 4e8a29f6cefe5366220077971f34914786653c28 Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Sat, 20 Apr 2024 12:00:28 -0700 Subject: [PATCH 02/10] --- prqlc/prqlc/src/lib.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/prqlc/prqlc/src/lib.rs b/prqlc/prqlc/src/lib.rs index 7eb3c5343127..f9bd7efe6569 100644 --- a/prqlc/prqlc/src/lib.rs +++ b/prqlc/prqlc/src/lib.rs @@ -367,14 +367,14 @@ pub fn format_prql(prql: &str) -> Result { #[test] fn test_format_prql() { - use insta::assert_display_snapshot; + use insta::assert_snapshot; - assert_display_snapshot!(format_prql( "from db.employees | select {name, age}").unwrap(), @r###" + assert_snapshot!(format_prql( "from db.employees | select {name, age}").unwrap(), @r###" from db.employees select {name, age} "###); - assert_display_snapshot!(format_prql( r#" + assert_snapshot!(format_prql( r#" # test comment from db.employees # inline comment # another test comment From 1a4858d026f51f5ba8bef3bd4f2cd2ce04fe79d1 Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Sat, 20 Apr 2024 14:26:20 -0700 Subject: [PATCH 03/10] . --- prqlc/prqlc/src/codegen/ast.rs | 92 +++++++++++++++++++++++--------- prqlc/prqlc/src/codegen/mod.rs | 7 ++- prqlc/prqlc/src/codegen/types.rs | 1 + prqlc/prqlc/src/lib.rs | 31 ++++++----- 4 files changed, 90 insertions(+), 41 deletions(-) diff --git a/prqlc/prqlc/src/codegen/ast.rs b/prqlc/prqlc/src/codegen/ast.rs index 253551455d1f..210ce28bd123 100644 --- a/prqlc/prqlc/src/codegen/ast.rs +++ b/prqlc/prqlc/src/codegen/ast.rs @@ -15,25 +15,29 @@ pub(crate) fn write_expr(expr: &Expr) -> String { } fn write_within(node: &T, parent: &ExprKind, mut opt: WriteOpt) -> Option { + // dbg!(&node, &parent); let parent_strength = binding_strength(parent); opt.context_strength = opt.context_strength.max(parent_strength); - node.write(opt) + // FIXME: this is extremely hacky. Our issue is that in: + // + // from a.b # comment + // + // ...we're writing both `from a.b` and `a.b`, so we need to know which of + // these to write comments for. I'm sure there are better ways to do it. + let comments = opt.enable_comments; + opt.enable_comments = false; + let out = node.write(opt.clone()); + opt.enable_comments = comments; + out } impl WriteSource for Expr { fn write(&self, mut opt: WriteOpt) -> Option { - // let span = self.span; - // let tokens = opt.tokens; - // let comment = let mut r = String::new(); if let Some(span) = self.span { if let Some(comment) = find_comment_before(span, &opt.tokens) { - // dbg!(&span); - // dbg!(&self); - // dbg!(&comment); - r += &comment.to_string(); } } @@ -58,14 +62,29 @@ impl WriteSource for Expr { } }; - if let Some(span) = self.span { - let comments = find_comments_after(span, &opt.tokens); - // dbg!(&span); - // dbg!(&self); - // dbg!(&comments); + if opt.enable_comments { + if let Some(span) = self.span { + // TODO: change underlying function so we can remove this + if opt.tokens.0.is_empty() { + return Some(r); + } - for c in comments { - r += &c.kind.to_string(); + let comments = find_comments_after(span, &opt.tokens); + // if !comments.is_empty() { + // dbg!(&self, &span, &opt.tokens, &comments); + // } + + for c in comments { + match c.kind { + // TODO: these are defined here since the debug + // representations aren't quite right (NewLine is `new + // line` as is used in error messages). But we should + // probably move them onto the Struct. + TokenKind::Comment(s) => r += format!(" #{}", s).as_str(), + TokenKind::NewLine => r += "\n", + _ => unreachable!(), + } + } } } Some(r) @@ -345,13 +364,6 @@ pub fn write_ident_part(s: &str) -> String { } } -// impl WriteSource for ModuleDef { -// fn write(&self, mut opt: WriteOpt) -> Option { -// codegen::WriteSource::write(&pl.stmts, codegen::WriteOpt::default()).unwrap() -// }} - -// impl WriteSource for ModuleDef { - /// Find a comment before a span. If there's exactly one newline, then the /// comment is included; otherwise it's included after the current line. fn find_comment_before(span: Span, tokens: &TokenVec) -> Option { @@ -372,7 +384,7 @@ fn find_comment_before(span: Span, tokens: &TokenVec) -> Option { } } -/// Find a comment before a span. If there's exactly one newline, then the +/// Find a comment after a span. If there's exactly one newline, then the /// comment is included; otherwise it's included after the current line. fn find_comments_after(span: Span, tokens: &TokenVec) -> Vec { let mut out = vec![]; @@ -382,11 +394,13 @@ fn find_comments_after(span: Span, tokens: &TokenVec) -> Vec { .iter() // FIXME: why isn't this working? // .position(|t| t.1.start == span.start && t.1.end == span.end) - .position(|t| t.span.start == span.start) + .position(|t| t.span.end == span.end) .unwrap_or_else(|| panic!("{:?}, {:?}", &tokens, &span)); - for token in tokens.0.iter().skip(index) { + // dbg!(index, span, &tokens); + for token in tokens.0.iter().skip(index + 1) { + // match dbg!(token).kind { match token.kind { - TokenKind::NewLine | TokenKind::Comment(_) => out.push(dbg!(token.clone())), + TokenKind::NewLine | TokenKind::Comment(_) => out.push(token.clone()), _ => break, } } @@ -562,6 +576,32 @@ mod test { stmt.write(WriteOpt::default()).unwrap() } + #[test] + fn test_find_comments_after() { + use insta::assert_debug_snapshot; + let tokens = prqlc_parser::lex_source( + r#" + let a = 5 # comment + "#, + ) + .unwrap(); + + // This is the `5` + let span = Span { + start: 17, + end: 18, + source_id: 0, + }; + + let comment = find_comments_after(span, &tokens); + assert_debug_snapshot!(comment, @r###" + [ + 20..29: Comment(" comment"), + 29..30: NewLine, + ] + "###); + } + #[test] fn test_pipeline() { let short = Expr::new(ExprKind::Ident("short".to_string())); diff --git a/prqlc/prqlc/src/codegen/mod.rs b/prqlc/prqlc/src/codegen/mod.rs index f73570077352..93ab940f3d38 100644 --- a/prqlc/prqlc/src/codegen/mod.rs +++ b/prqlc/prqlc/src/codegen/mod.rs @@ -6,7 +6,7 @@ pub(crate) use types::{write_ty, write_ty_kind}; use prqlc_parser::TokenVec; -pub trait WriteSource { +pub trait WriteSource: std::fmt::Debug { /// Converts self to its source representation according to specified /// options. /// @@ -78,6 +78,9 @@ pub struct WriteOpt { /// The lexer tokens that were used to produce this source; used for /// comments. pub tokens: TokenVec, + + // TODO: remove + pub enable_comments: bool, } impl Default for WriteOpt { @@ -91,6 +94,7 @@ impl Default for WriteOpt { context_strength: 0, unbound_expr: false, tokens: TokenVec(vec![]), + enable_comments: true, } } } @@ -131,6 +135,7 @@ impl WriteOpt { } } +#[derive(Debug, Clone)] struct SeparatedExprs<'a, T: WriteSource> { exprs: &'a [T], inline: &'static str, diff --git a/prqlc/prqlc/src/codegen/types.rs b/prqlc/prqlc/src/codegen/types.rs index 318e7593873c..49772f20cfce 100644 --- a/prqlc/prqlc/src/codegen/types.rs +++ b/prqlc/prqlc/src/codegen/types.rs @@ -113,6 +113,7 @@ impl WriteSource for TyTupleField { } } +#[derive(Debug, Clone)] struct UnionVariant<'a>(&'a Option, &'a Ty); impl WriteSource for UnionVariant<'_> { diff --git a/prqlc/prqlc/src/lib.rs b/prqlc/prqlc/src/lib.rs index f9bd7efe6569..181ceb882c74 100644 --- a/prqlc/prqlc/src/lib.rs +++ b/prqlc/prqlc/src/lib.rs @@ -311,15 +311,6 @@ pub fn prql_to_pl(prql: &str) -> Result { prql_to_pl_tree(&source_tree) } -// pub fn fmt(prql: &str) -> Result { -// let source_tree = SourceTree::from(prql); -// let pl = prql_to_pl_tree(&source_tree)?; - -// for x in pl.stmts { -// x. - -// } - /// Parse PRQL into a PL AST pub fn prql_to_pl_tree(prql: &SourceTree) -> Result { parser::parse(prql).map_err(|e| ErrorMessages::from(e).composed(prql)) @@ -364,6 +355,16 @@ pub fn format_prql(prql: &str) -> Result { ) .unwrap()) } +#[test] +fn test_format_comment_basic() { + use insta::assert_snapshot; + assert_snapshot!(format_prql( r#" + from db.employees # inline comment + "# +).unwrap(), @r###" + from db.employees # inline comment + "###); +} #[test] fn test_format_prql() { @@ -375,13 +376,15 @@ fn test_format_prql() { "###); assert_snapshot!(format_prql( r#" + # test comment + from db.employees # inline comment + # another test comment + select {name, age}"# + ).unwrap(), @r###" # test comment from db.employees # inline comment - # another test comment - select {name, age}"# -).unwrap(), @r###" - # test comment - from db.employees # in + # another test comment + # another test comment select {name, age} "###); From 73c0504ca2a639a4f15859724968aaf2f30f01e2 Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Sun, 12 May 2024 15:44:00 -0700 Subject: [PATCH 04/10] . --- prqlc/prqlc-ast/src/span.rs | 9 ++++ prqlc/prqlc-parser/src/lib.rs | 3 +- prqlc/prqlc/src/codegen/ast.rs | 87 ++++++++++++++++++++++++---------- prqlc/prqlc/src/lib.rs | 22 +++++++-- 4 files changed, 88 insertions(+), 33 deletions(-) diff --git a/prqlc/prqlc-ast/src/span.rs b/prqlc/prqlc-ast/src/span.rs index 1fb025a8a126..9227c79c8f2e 100644 --- a/prqlc/prqlc-ast/src/span.rs +++ b/prqlc/prqlc-ast/src/span.rs @@ -17,6 +17,15 @@ impl From for Range { a.start..a.end } } +impl From> for Span { + fn from(range: Range) -> Self { + Span { + start: range.start, + end: range.end, + source_id: 0, // Default value as Range does not provide a source_id + } + } +} impl Debug for Span { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { diff --git a/prqlc/prqlc-parser/src/lib.rs b/prqlc/prqlc-parser/src/lib.rs index 6a150c9f12cf..1b9887ee8eeb 100644 --- a/prqlc/prqlc-parser/src/lib.rs +++ b/prqlc/prqlc-parser/src/lib.rs @@ -14,8 +14,7 @@ use prqlc_ast::error::{Error, Reason, WithErrorInfo}; use prqlc_ast::stmt::*; use prqlc_ast::Span; -use lexer::Token; -pub use lexer::{TokenKind, TokenVec}; +pub use lexer::{Token, TokenKind, TokenVec}; use span::ParserSpan; /// Build PRQL AST from a PRQL query string. diff --git a/prqlc/prqlc/src/codegen/ast.rs b/prqlc/prqlc/src/codegen/ast.rs index 210ce28bd123..f02d3cb94552 100644 --- a/prqlc/prqlc/src/codegen/ast.rs +++ b/prqlc/prqlc/src/codegen/ast.rs @@ -2,10 +2,9 @@ use std::collections::HashSet; use once_cell::sync::Lazy; use prqlc_parser::{Token, TokenKind, TokenVec}; - -use crate::ast::*; use regex::Regex; +use crate::ast::*; use crate::codegen::SeparatedExprs; use super::{WriteOpt, WriteSource}; @@ -364,8 +363,13 @@ pub fn write_ident_part(s: &str) -> String { } } -/// Find a comment before a span. If there's exactly one newline, then the -/// comment is included; otherwise it's included after the current line. +// impl WriteSource for ModuleDef { +// fn write(&self, mut opt: WriteOpt) -> Option { +// codegen::WriteSource::write(&pl.stmts, codegen::WriteOpt::default()).unwrap() +// }} + +/// Find a comment before a span. If there's exactly one newline prior, then the +/// comment is included here. Any furthur above are included with the prior token. fn find_comment_before(span: Span, tokens: &TokenVec) -> Option { // index of the span in the token vec let index = tokens @@ -387,7 +391,6 @@ fn find_comment_before(span: Span, tokens: &TokenVec) -> Option { /// Find a comment after a span. If there's exactly one newline, then the /// comment is included; otherwise it's included after the current line. fn find_comments_after(span: Span, tokens: &TokenVec) -> Vec { - let mut out = vec![]; // index of the span in the token vec let index = tokens .0 @@ -396,9 +399,9 @@ fn find_comments_after(span: Span, tokens: &TokenVec) -> Vec { // .position(|t| t.1.start == span.start && t.1.end == span.end) .position(|t| t.span.end == span.end) .unwrap_or_else(|| panic!("{:?}, {:?}", &tokens, &span)); - // dbg!(index, span, &tokens); + + let mut out = vec![]; for token in tokens.0.iter().skip(index + 1) { - // match dbg!(token).kind { match token.kind { TokenKind::NewLine | TokenKind::Comment(_) => out.push(token.clone()), _ => break, @@ -555,7 +558,8 @@ impl WriteSource for SwitchCase { #[cfg(test)] mod test { - use insta::assert_snapshot; + use insta::{assert_debug_snapshot, assert_snapshot}; + use prqlc_parser::lex_source; use super::*; @@ -577,29 +581,60 @@ mod test { } #[test] - fn test_find_comments_after() { - use insta::assert_debug_snapshot; - let tokens = prqlc_parser::lex_source( + fn test_find_comment_before() { + let tokens = lex_source( r#" - let a = 5 # comment - "#, + # comment + let a = 5 + "#, ) .unwrap(); + let span = tokens + .clone() + .0 + .iter() + .find(|t| t.kind == TokenKind::Keyword("let".to_string())) + .unwrap() + .span + .clone(); + let comment = find_comment_before(span.into(), &tokens); + assert_debug_snapshot!(comment, @r###" + Some( + Comment( + " comment", + ), + ) + "###); + } - // This is the `5` - let span = Span { - start: 17, - end: 18, - source_id: 0, - }; - - let comment = find_comments_after(span, &tokens); + #[test] + fn test_find_comments_after() { + let tokens = lex_source( + r#" + let a = 5 # on side + # below + # and another + "#, + ) + .unwrap(); + let span = dbg!(tokens.clone()) + .0 + .iter() + .find(|t| t.kind == TokenKind::Literal(Literal::Integer(5))) + .unwrap() + .span + .clone(); + let comment = find_comments_after(span.into(), &tokens); assert_debug_snapshot!(comment, @r###" - [ - 20..29: Comment(" comment"), - 29..30: NewLine, - ] - "###); + [ + 23..32: Comment(" on side"), + 32..33: NewLine, + 45..52: Comment(" below"), + 52..53: NewLine, + 65..78: Comment(" and another"), + 78..79: NewLine, + ] + "###); } #[test] diff --git a/prqlc/prqlc/src/lib.rs b/prqlc/prqlc/src/lib.rs index cd05cdab79cf..ac40247a186e 100644 --- a/prqlc/prqlc/src/lib.rs +++ b/prqlc/prqlc/src/lib.rs @@ -389,17 +389,29 @@ fn test_format_prql() { "###); assert_snapshot!(format_prql( r#" + from employees # test comment - from db.employees # inline comment + select {name} + "# + ).unwrap(), @r###" + from employees + # test comment + + # test comment + select {name} + "###); + + assert_snapshot!(format_prql( r#" + # test comment + from employees # inline comment # another test comment - select {name, age}"# + select {name}"# ).unwrap(), @r###" # test comment - from db.employees # inline comment - # another test comment + from employees # inline comment # another test comment - select {name, age} + select {name} "###); } From 51a85b3920f8c4be8d0258e379d432f09c2a6c2d Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 12 May 2024 22:44:50 +0000 Subject: [PATCH 05/10] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- prqlc/prqlc/src/codegen/ast.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prqlc/prqlc/src/codegen/ast.rs b/prqlc/prqlc/src/codegen/ast.rs index f02d3cb94552..a6efd67e2a88 100644 --- a/prqlc/prqlc/src/codegen/ast.rs +++ b/prqlc/prqlc/src/codegen/ast.rs @@ -369,7 +369,7 @@ pub fn write_ident_part(s: &str) -> String { // }} /// Find a comment before a span. If there's exactly one newline prior, then the -/// comment is included here. Any furthur above are included with the prior token. +/// comment is included here. Any further above are included with the prior token. fn find_comment_before(span: Span, tokens: &TokenVec) -> Option { // index of the span in the token vec let index = tokens From 7021c49a1df9155f25111104996fb9028d1b7d93 Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Sun, 12 May 2024 16:06:41 -0700 Subject: [PATCH 06/10] --- prqlc/prqlc/src/codegen/ast.rs | 127 ++++++++++++++++++--------------- prqlc/prqlc/src/lib.rs | 7 +- 2 files changed, 73 insertions(+), 61 deletions(-) diff --git a/prqlc/prqlc/src/codegen/ast.rs b/prqlc/prqlc/src/codegen/ast.rs index a6efd67e2a88..24c23b1e0b01 100644 --- a/prqlc/prqlc/src/codegen/ast.rs +++ b/prqlc/prqlc/src/codegen/ast.rs @@ -24,10 +24,10 @@ fn write_within(node: &T, parent: &ExprKind, mut opt: WriteOpt) // // ...we're writing both `from a.b` and `a.b`, so we need to know which of // these to write comments for. I'm sure there are better ways to do it. - let comments = opt.enable_comments; + let enable_comments = opt.enable_comments; opt.enable_comments = false; - let out = node.write(opt.clone()); - opt.enable_comments = comments; + let out = dbg!(node.write(opt.clone())); + opt.enable_comments = enable_comments; out } @@ -35,11 +35,11 @@ impl WriteSource for Expr { fn write(&self, mut opt: WriteOpt) -> Option { let mut r = String::new(); - if let Some(span) = self.span { - if let Some(comment) = find_comment_before(span, &opt.tokens) { - r += &comment.to_string(); - } - } + // if let Some(span) = self.span { + // if let Some(comment) = find_comment_before(span, &opt.tokens) { + // r += &comment.to_string(); + // } + // } if let Some(alias) = &self.alias { r += opt.consume(alias)?; r += opt.consume(" = ")?; @@ -73,13 +73,25 @@ impl WriteSource for Expr { // dbg!(&self, &span, &opt.tokens, &comments); // } + // If the first item is a comment, it's an inline comment, and + // so add two spaces + if matches!( + comments.first(), + Some(Token { + kind: TokenKind::Comment(_), + .. + }) + ) { + r += " "; + } + for c in comments { - match c.kind { + match dbg!(c.kind) { // TODO: these are defined here since the debug // representations aren't quite right (NewLine is `new // line` as is used in error messages). But we should // probably move them onto the Struct. - TokenKind::Comment(s) => r += format!(" #{}", s).as_str(), + TokenKind::Comment(s) => r += format!("#{}", s).as_str(), TokenKind::NewLine => r += "\n", _ => unreachable!(), } @@ -368,28 +380,27 @@ pub fn write_ident_part(s: &str) -> String { // codegen::WriteSource::write(&pl.stmts, codegen::WriteOpt::default()).unwrap() // }} -/// Find a comment before a span. If there's exactly one newline prior, then the -/// comment is included here. Any further above are included with the prior token. -fn find_comment_before(span: Span, tokens: &TokenVec) -> Option { - // index of the span in the token vec - let index = tokens - .0 - .iter() - .position(|t| t.span.start == span.start && t.span.end == span.end)?; - if index <= 1 { - return None; - } - let prior_token = &tokens.0[index - 1].kind; - let prior_2_token = &tokens.0[index - 2].kind; - if matches!(prior_token, TokenKind::NewLine) && matches!(prior_2_token, TokenKind::Comment(_)) { - Some(prior_2_token.clone()) - } else { - None - } -} - -/// Find a comment after a span. If there's exactly one newline, then the -/// comment is included; otherwise it's included after the current line. +// /// Find a comment before a span. If there's exactly one newline prior, then the +// /// comment is included here. Any furthur above are included with the prior token. +// fn find_comment_before(span: Span, tokens: &TokenVec) -> Option { +// // index of the span in the token vec +// let index = tokens +// .0 +// .iter() +// .position(|t| t.span.start == span.start && t.span.end == span.end)?; +// if index <= 1 { +// return None; +// } +// let prior_token = &tokens.0[index - 1].kind; +// let prior_2_token = &tokens.0[index - 2].kind; +// if matches!(prior_token, TokenKind::NewLine) && matches!(prior_2_token, TokenKind::Comment(_)) { +// Some(prior_2_token.clone()) +// } else { +// None +// } +// } + +/// Find comments after a given span. fn find_comments_after(span: Span, tokens: &TokenVec) -> Vec { // index of the span in the token vec let index = tokens @@ -580,32 +591,32 @@ mod test { stmt.write(WriteOpt::default()).unwrap() } - #[test] - fn test_find_comment_before() { - let tokens = lex_source( - r#" - # comment - let a = 5 - "#, - ) - .unwrap(); - let span = tokens - .clone() - .0 - .iter() - .find(|t| t.kind == TokenKind::Keyword("let".to_string())) - .unwrap() - .span - .clone(); - let comment = find_comment_before(span.into(), &tokens); - assert_debug_snapshot!(comment, @r###" - Some( - Comment( - " comment", - ), - ) - "###); - } + // #[test] + // fn test_find_comment_before() { + // let tokens = lex_source( + // r#" + // # comment + // let a = 5 + // "#, + // ) + // .unwrap(); + // let span = tokens + // .clone() + // .0 + // .iter() + // .find(|t| t.kind == TokenKind::Keyword("let".to_string())) + // .unwrap() + // .span + // .clone(); + // let comment = find_comment_before(span.into(), &tokens); + // assert_debug_snapshot!(comment, @r###" + // Some( + // Comment( + // " comment", + // ), + // ) + // "###); + // } #[test] fn test_find_comments_after() { diff --git a/prqlc/prqlc/src/lib.rs b/prqlc/prqlc/src/lib.rs index ac40247a186e..a117eb3dba62 100644 --- a/prqlc/prqlc/src/lib.rs +++ b/prqlc/prqlc/src/lib.rs @@ -375,7 +375,8 @@ fn test_format_comment_basic() { from db.employees # inline comment "# ).unwrap(), @r###" - from db.employees # inline comment + from db.employees # inline comment + "###); } @@ -395,10 +396,10 @@ fn test_format_prql() { "# ).unwrap(), @r###" from employees - # test comment - # test comment + select {name} + "###); assert_snapshot!(format_prql( r#" From 99328e81e08df7a947d176784ca9a2868a0fff19 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 12 May 2024 23:07:01 +0000 Subject: [PATCH 07/10] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- prqlc/prqlc/src/codegen/ast.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prqlc/prqlc/src/codegen/ast.rs b/prqlc/prqlc/src/codegen/ast.rs index 24c23b1e0b01..e83c69d7404d 100644 --- a/prqlc/prqlc/src/codegen/ast.rs +++ b/prqlc/prqlc/src/codegen/ast.rs @@ -381,7 +381,7 @@ pub fn write_ident_part(s: &str) -> String { // }} // /// Find a comment before a span. If there's exactly one newline prior, then the -// /// comment is included here. Any furthur above are included with the prior token. +// /// comment is included here. Any further above are included with the prior token. // fn find_comment_before(span: Span, tokens: &TokenVec) -> Option { // // index of the span in the token vec // let index = tokens From 47f4fb226081276dc1b9c9e642908c2112f4986c Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Sun, 12 May 2024 16:10:01 -0700 Subject: [PATCH 08/10] . --- prqlc/prqlc/src/codegen/ast.rs | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/prqlc/prqlc/src/codegen/ast.rs b/prqlc/prqlc/src/codegen/ast.rs index e83c69d7404d..885f15915cd1 100644 --- a/prqlc/prqlc/src/codegen/ast.rs +++ b/prqlc/prqlc/src/codegen/ast.rs @@ -14,7 +14,6 @@ pub(crate) fn write_expr(expr: &Expr) -> String { } fn write_within(node: &T, parent: &ExprKind, mut opt: WriteOpt) -> Option { - // dbg!(&node, &parent); let parent_strength = binding_strength(parent); opt.context_strength = opt.context_strength.max(parent_strength); @@ -26,7 +25,7 @@ fn write_within(node: &T, parent: &ExprKind, mut opt: WriteOpt) // these to write comments for. I'm sure there are better ways to do it. let enable_comments = opt.enable_comments; opt.enable_comments = false; - let out = dbg!(node.write(opt.clone())); + let out = node.write(opt.clone()); opt.enable_comments = enable_comments; out } @@ -69,9 +68,6 @@ impl WriteSource for Expr { } let comments = find_comments_after(span, &opt.tokens); - // if !comments.is_empty() { - // dbg!(&self, &span, &opt.tokens, &comments); - // } // If the first item is a comment, it's an inline comment, and // so add two spaces @@ -86,7 +82,7 @@ impl WriteSource for Expr { } for c in comments { - match dbg!(c.kind) { + match c.kind { // TODO: these are defined here since the debug // representations aren't quite right (NewLine is `new // line` as is used in error messages). But we should @@ -628,7 +624,8 @@ mod test { "#, ) .unwrap(); - let span = dbg!(tokens.clone()) + let span = tokens + .clone() .0 .iter() .find(|t| t.kind == TokenKind::Literal(Literal::Integer(5))) From e0c02e7bcb809cfd1a01f746b029fd3f3edcd469 Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Sun, 12 May 2024 16:20:42 -0700 Subject: [PATCH 09/10] --- prqlc/prqlc/src/lib.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/prqlc/prqlc/src/lib.rs b/prqlc/prqlc/src/lib.rs index a117eb3dba62..80b822440d71 100644 --- a/prqlc/prqlc/src/lib.rs +++ b/prqlc/prqlc/src/lib.rs @@ -408,10 +408,9 @@ fn test_format_prql() { # another test comment select {name}"# ).unwrap(), @r###" - # test comment - from employees # inline comment - + from employees # inline comment # another test comment + select {name} "###); } From 4e47f09975be19c3812c642c40f239032fa9d626 Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Thu, 16 May 2024 06:56:22 -0700 Subject: [PATCH 10/10] --- prqlc/prqlc/src/codegen/mod.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/prqlc/prqlc/src/codegen/mod.rs b/prqlc/prqlc/src/codegen/mod.rs index 93ab940f3d38..68459772711b 100644 --- a/prqlc/prqlc/src/codegen/mod.rs +++ b/prqlc/prqlc/src/codegen/mod.rs @@ -31,11 +31,14 @@ pub trait WriteSource: std::fmt::Debug { Some(r) } + /// Attempts to write the current item, expanding the maximum width where necessary. fn write_or_expand(&self, mut opt: WriteOpt) -> String { loop { if let Some(s) = self.write(opt.clone()) { return s; } else { + // TODO: could we just set the max width rather than increasing + // it in a loop? opt.max_width += opt.max_width / 2; opt.reset_line(); } @@ -113,6 +116,8 @@ impl WriteOpt { Some(()) } + /// Sets [WriteOpt::rem_width] to (max_width - indent_width), returning + /// `Some(())`, or `None` if there's not enough space. fn reset_line(&mut self) -> Option<()> { let ident = self.tab.len() as u16 * self.indent; self.rem_width = self.max_width.checked_sub(ident)?;