diff --git a/prqlc/prqlc-ast/src/span.rs b/prqlc/prqlc-ast/src/span.rs index 1fb025a8a126..9227c79c8f2e 100644 --- a/prqlc/prqlc-ast/src/span.rs +++ b/prqlc/prqlc-ast/src/span.rs @@ -17,6 +17,15 @@ impl From for Range { a.start..a.end } } +impl From> for Span { + fn from(range: Range) -> Self { + Span { + start: range.start, + end: range.end, + source_id: 0, // Default value as Range does not provide a source_id + } + } +} impl Debug for Span { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { diff --git a/prqlc/prqlc-parser/src/lexer.rs b/prqlc/prqlc-parser/src/lexer.rs index 32022af83aee..72db8c459511 100644 --- a/prqlc/prqlc-parser/src/lexer.rs +++ b/prqlc/prqlc-parser/src/lexer.rs @@ -7,7 +7,7 @@ use chumsky::{ use prqlc_ast::expr::*; use serde::{Deserialize, Serialize}; -#[derive(Clone, PartialEq, Serialize, Deserialize)] +#[derive(Clone, PartialEq, Serialize, Deserialize, Eq)] pub struct Token { pub kind: TokenKind, pub span: std::ops::Range, @@ -579,7 +579,7 @@ impl std::fmt::Debug for Token { } } -#[derive(Serialize, Deserialize, Debug)] +#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] pub struct TokenVec(pub Vec); // impl std::fmt::Debug for TokenVec { diff --git a/prqlc/prqlc-parser/src/lib.rs b/prqlc/prqlc-parser/src/lib.rs index 1a930961fe98..d5682fc0e14c 100644 --- a/prqlc/prqlc-parser/src/lib.rs +++ b/prqlc/prqlc-parser/src/lib.rs @@ -14,8 +14,7 @@ use prqlc_ast::error::{Error, Reason, WithErrorInfo}; use prqlc_ast::stmt::*; use prqlc_ast::Span; -use lexer::Token; -pub use lexer::{TokenKind, TokenVec}; +pub use lexer::{Token, TokenKind, TokenVec}; use span::ParserSpan; /// Build PRQL AST from a PRQL query string. diff --git a/prqlc/prqlc/src/codegen/ast.rs b/prqlc/prqlc/src/codegen/ast.rs index 72fd6b370ac2..885f15915cd1 100644 --- a/prqlc/prqlc/src/codegen/ast.rs +++ b/prqlc/prqlc/src/codegen/ast.rs @@ -1,10 +1,10 @@ use std::collections::HashSet; use once_cell::sync::Lazy; - -use crate::ast::*; +use prqlc_parser::{Token, TokenKind, TokenVec}; use regex::Regex; +use crate::ast::*; use crate::codegen::SeparatedExprs; use super::{WriteOpt, WriteSource}; @@ -17,13 +17,28 @@ fn write_within(node: &T, parent: &ExprKind, mut opt: WriteOpt) let parent_strength = binding_strength(parent); opt.context_strength = opt.context_strength.max(parent_strength); - node.write(opt) + // FIXME: this is extremely hacky. Our issue is that in: + // + // from a.b # comment + // + // ...we're writing both `from a.b` and `a.b`, so we need to know which of + // these to write comments for. I'm sure there are better ways to do it. + let enable_comments = opt.enable_comments; + opt.enable_comments = false; + let out = node.write(opt.clone()); + opt.enable_comments = enable_comments; + out } impl WriteSource for Expr { fn write(&self, mut opt: WriteOpt) -> Option { let mut r = String::new(); + // if let Some(span) = self.span { + // if let Some(comment) = find_comment_before(span, &opt.tokens) { + // r += &comment.to_string(); + // } + // } if let Some(alias) = &self.alias { r += opt.consume(alias)?; r += opt.consume(" = ")?; @@ -41,9 +56,44 @@ impl WriteSource for Expr { if let Some(value) = value { r += &value; } else { - r += &break_line_within_parenthesis(&self.kind, opt)?; + r += &break_line_within_parenthesis(&self.kind, &mut opt)?; } }; + + if opt.enable_comments { + if let Some(span) = self.span { + // TODO: change underlying function so we can remove this + if opt.tokens.0.is_empty() { + return Some(r); + } + + let comments = find_comments_after(span, &opt.tokens); + + // If the first item is a comment, it's an inline comment, and + // so add two spaces + if matches!( + comments.first(), + Some(Token { + kind: TokenKind::Comment(_), + .. + }) + ) { + r += " "; + } + + for c in comments { + match c.kind { + // TODO: these are defined here since the debug + // representations aren't quite right (NewLine is `new + // line` as is used in error messages). But we should + // probably move them onto the Struct. + TokenKind::Comment(s) => r += format!("#{}", s).as_str(), + TokenKind::NewLine => r += "\n", + _ => unreachable!(), + } + } + } + } Some(r) } } @@ -197,7 +247,7 @@ impl WriteSource for ExprKind { if let Some(body) = c.body.write(opt.clone()) { r += &body; } else { - r += &break_line_within_parenthesis(c.body.as_ref(), opt)?; + r += &break_line_within_parenthesis(c.body.as_ref(), &mut opt)?; } Some(r) @@ -222,7 +272,7 @@ impl WriteSource for ExprKind { } } -fn break_line_within_parenthesis(expr: &T, mut opt: WriteOpt) -> Option { +fn break_line_within_parenthesis(expr: &T, opt: &mut WriteOpt) -> Option { let mut r = "(\n".to_string(); opt.indent += 1; r += &opt.write_indent(); @@ -321,6 +371,52 @@ pub fn write_ident_part(s: &str) -> String { } } +// impl WriteSource for ModuleDef { +// fn write(&self, mut opt: WriteOpt) -> Option { +// codegen::WriteSource::write(&pl.stmts, codegen::WriteOpt::default()).unwrap() +// }} + +// /// Find a comment before a span. If there's exactly one newline prior, then the +// /// comment is included here. Any further above are included with the prior token. +// fn find_comment_before(span: Span, tokens: &TokenVec) -> Option { +// // index of the span in the token vec +// let index = tokens +// .0 +// .iter() +// .position(|t| t.span.start == span.start && t.span.end == span.end)?; +// if index <= 1 { +// return None; +// } +// let prior_token = &tokens.0[index - 1].kind; +// let prior_2_token = &tokens.0[index - 2].kind; +// if matches!(prior_token, TokenKind::NewLine) && matches!(prior_2_token, TokenKind::Comment(_)) { +// Some(prior_2_token.clone()) +// } else { +// None +// } +// } + +/// Find comments after a given span. +fn find_comments_after(span: Span, tokens: &TokenVec) -> Vec { + // index of the span in the token vec + let index = tokens + .0 + .iter() + // FIXME: why isn't this working? + // .position(|t| t.1.start == span.start && t.1.end == span.end) + .position(|t| t.span.end == span.end) + .unwrap_or_else(|| panic!("{:?}, {:?}", &tokens, &span)); + + let mut out = vec![]; + for token in tokens.0.iter().skip(index + 1) { + match token.kind { + TokenKind::NewLine | TokenKind::Comment(_) => out.push(token.clone()), + _ => break, + } + } + out +} + impl WriteSource for Vec { fn write(&self, mut opt: WriteOpt) -> Option { opt.reset_line()?; @@ -469,7 +565,8 @@ impl WriteSource for SwitchCase { #[cfg(test)] mod test { - use insta::assert_snapshot; + use insta::{assert_debug_snapshot, assert_snapshot}; + use prqlc_parser::lex_source; use super::*; @@ -490,6 +587,64 @@ mod test { stmt.write(WriteOpt::default()).unwrap() } + // #[test] + // fn test_find_comment_before() { + // let tokens = lex_source( + // r#" + // # comment + // let a = 5 + // "#, + // ) + // .unwrap(); + // let span = tokens + // .clone() + // .0 + // .iter() + // .find(|t| t.kind == TokenKind::Keyword("let".to_string())) + // .unwrap() + // .span + // .clone(); + // let comment = find_comment_before(span.into(), &tokens); + // assert_debug_snapshot!(comment, @r###" + // Some( + // Comment( + // " comment", + // ), + // ) + // "###); + // } + + #[test] + fn test_find_comments_after() { + let tokens = lex_source( + r#" + let a = 5 # on side + # below + # and another + "#, + ) + .unwrap(); + let span = tokens + .clone() + .0 + .iter() + .find(|t| t.kind == TokenKind::Literal(Literal::Integer(5))) + .unwrap() + .span + .clone(); + let comment = find_comments_after(span.into(), &tokens); + assert_debug_snapshot!(comment, @r###" + [ + 23..32: Comment(" on side"), + 32..33: NewLine, + 45..52: Comment(" below"), + 52..53: NewLine, + 65..78: Comment(" and another"), + 78..79: NewLine, + ] + "###); + } + #[test] fn test_pipeline() { let short = Expr::new(ExprKind::Ident("short".to_string())); diff --git a/prqlc/prqlc/src/codegen/mod.rs b/prqlc/prqlc/src/codegen/mod.rs index 2e04280a4e78..0253c711aa0b 100644 --- a/prqlc/prqlc/src/codegen/mod.rs +++ b/prqlc/prqlc/src/codegen/mod.rs @@ -4,7 +4,9 @@ mod types; pub(crate) use ast::write_expr; pub(crate) use types::{write_ty, write_ty_kind}; -pub trait WriteSource { +use prqlc_parser::TokenVec; + +pub trait WriteSource: std::fmt::Debug { /// Converts self to its source representation according to specified /// options. /// @@ -29,11 +31,14 @@ pub trait WriteSource { Some(r) } + /// Attempts to write the current item, expanding the maximum width where necessary. fn write_or_expand(&self, mut opt: WriteOpt) -> String { loop { if let Some(s) = self.write(opt.clone()) { return s; } else { + // TODO: could we just set the max width rather than increasing + // it in a loop? opt.max_width += opt.max_width / 2; opt.reset_line(); } @@ -72,6 +77,13 @@ pub struct WriteOpt { /// For example: /// `join foo` has an unbound expr, since `join foo ==bar` produced a binary op. pub unbound_expr: bool, + + /// The lexer tokens that were used to produce this source; used for + /// comments. + pub tokens: TokenVec, + + // TODO: remove + pub enable_comments: bool, } impl Default for WriteOpt { @@ -84,6 +96,8 @@ impl Default for WriteOpt { rem_width: 50, context_strength: 0, unbound_expr: false, + tokens: TokenVec(vec![]), + enable_comments: true, } } } @@ -102,6 +116,8 @@ impl WriteOpt { Some(()) } + /// Sets [WriteOpt::rem_width] to (max_width - indent_width), returning + /// `Some(())`, or `None` if there's not enough space. fn reset_line(&mut self) -> Option<()> { let ident = self.tab.len() as u16 * self.indent; self.rem_width = self.max_width.checked_sub(ident)?; diff --git a/prqlc/prqlc/src/codegen/types.rs b/prqlc/prqlc/src/codegen/types.rs index 318e7593873c..49772f20cfce 100644 --- a/prqlc/prqlc/src/codegen/types.rs +++ b/prqlc/prqlc/src/codegen/types.rs @@ -113,6 +113,7 @@ impl WriteSource for TyTupleField { } } +#[derive(Debug, Clone)] struct UnionVariant<'a>(&'a Option, &'a Ty); impl WriteSource for UnionVariant<'_> { diff --git a/prqlc/prqlc/src/lib.rs b/prqlc/prqlc/src/lib.rs index 6f38a6d1f317..80b822440d71 100644 --- a/prqlc/prqlc/src/lib.rs +++ b/prqlc/prqlc/src/lib.rs @@ -355,6 +355,66 @@ pub fn pl_to_prql(pl: &ast::ModuleDef) -> Result { Ok(codegen::WriteSource::write(&pl.stmts, codegen::WriteOpt::default()).unwrap()) } +pub fn format_prql(prql: &str) -> Result { + // TODO: convert errors + let tokens = prqlc_parser::lex_source(prql).unwrap(); + let pl = prql_to_pl(prql)?; + Ok(codegen::WriteSource::write( + &pl.stmts, + codegen::WriteOpt { + tokens, + ..Default::default() + }, + ) + .unwrap()) +} +#[test] +fn test_format_comment_basic() { + use insta::assert_snapshot; + assert_snapshot!(format_prql( r#" + from db.employees # inline comment + "# +).unwrap(), @r###" + from db.employees # inline comment + + "###); +} + +#[test] +fn test_format_prql() { + use insta::assert_snapshot; + + assert_snapshot!(format_prql( "from db.employees | select {name, age}").unwrap(), @r###" + from db.employees + select {name, age} + "###); + + assert_snapshot!(format_prql( r#" + from employees + # test comment + select {name} + "# + ).unwrap(), @r###" + from employees + # test comment + + select {name} + + "###); + + assert_snapshot!(format_prql( r#" + # test comment + from employees # inline comment + # another test comment + select {name}"# + ).unwrap(), @r###" + from employees # inline comment + # another test comment + + select {name} + "###); +} + /// JSON serialization and deserialization functions pub mod json { use super::*;