PRQL · max-sixty · Feb 18, 2024 · Feb 18, 2024 · Mar 3, 2024 · Apr 20, 2024
diff --git a/prqlc/prqlc-ast/src/span.rs b/prqlc/prqlc-ast/src/span.rs
@@ -17,6 +17,15 @@ impl From<Span> for Range<usize> {
         a.start..a.end
     }
 }
+impl From<Range<usize>> for Span {
+    fn from(range: Range<usize>) -> Self {
+        Span {
+            start: range.start,
+            end: range.end,
+            source_id: 0, // Default value as Range<usize> does not provide a source_id
+        }
+    }
+}
 
 impl Debug for Span {
     fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {

diff --git a/prqlc/prqlc-parser/src/lexer.rs b/prqlc/prqlc-parser/src/lexer.rs
@@ -7,7 +7,7 @@
 use prqlc_ast::expr::*;
 use serde::{Deserialize, Serialize};
 
-#[derive(Clone, PartialEq, Serialize, Deserialize)]
+#[derive(Clone, PartialEq, Serialize, Deserialize, Eq)]
 pub struct Token {
     pub kind: TokenKind,
     pub span: std::ops::Range<usize>,
@@ -579,7 +579,7 @@
     }
 }
 
-#[derive(Serialize, Deserialize, Debug)]
+#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
 pub struct TokenVec(pub Vec<Token>);
 
 // impl std::fmt::Debug for TokenVec {

diff --git a/prqlc/prqlc-parser/src/lib.rs b/prqlc/prqlc-parser/src/lib.rs
@@ -14,8 +14,7 @@ use prqlc_ast::error::{Error, Reason, WithErrorInfo};
 use prqlc_ast::stmt::*;
 use prqlc_ast::Span;
 
-use lexer::Token;
-pub use lexer::{TokenKind, TokenVec};
+pub use lexer::{Token, TokenKind, TokenVec};
 use span::ParserSpan;
 
 /// Build PRQL AST from a PRQL query string.

diff --git a/prqlc/prqlc/src/codegen/ast.rs b/prqlc/prqlc/src/codegen/ast.rs
@@ -1,10 +1,10 @@
 use std::collections::HashSet;
 
 use once_cell::sync::Lazy;
-
-use crate::ast::*;
+use prqlc_parser::{Token, TokenKind, TokenVec};
 use regex::Regex;
 
+use crate::ast::*;
 use crate::codegen::SeparatedExprs;
 
 use super::{WriteOpt, WriteSource};
@@ -17,13 +17,28 @@
     let parent_strength = binding_strength(parent);
     opt.context_strength = opt.context_strength.max(parent_strength);
 
-    node.write(opt)
+    // FIXME: this is extremely hacky. Our issue is that in:
+    //
+    //   from a.b  # comment
+    //
+    // ...we're writing both `from a.b` and `a.b`, so we need to know which of
+    // these to write comments for. I'm sure there are better ways to do it.
+    let enable_comments = opt.enable_comments;
+    opt.enable_comments = false;
+    let out = node.write(opt.clone());
+    opt.enable_comments = enable_comments;
+    out
 }
 
 impl WriteSource for Expr {
     fn write(&self, mut opt: WriteOpt) -> Option<String> {
         let mut r = String::new();
 
+        // if let Some(span) = self.span {
+        //     if let Some(comment) = find_comment_before(span, &opt.tokens) {
+        //         r += &comment.to_string();
+        //     }
+        // }
         if let Some(alias) = &self.alias {
             r += opt.consume(alias)?;
             r += opt.consume(" = ")?;
@@ -41,9 +56,44 @@
             if let Some(value) = value {
                 r += &value;
             } else {
-                r += &break_line_within_parenthesis(&self.kind, opt)?;
+                r += &break_line_within_parenthesis(&self.kind, &mut opt)?;
             }
         };
+
+        if opt.enable_comments {
+            if let Some(span) = self.span {
+                // TODO: change underlying function so we can remove this
+                if opt.tokens.0.is_empty() {
+                    return Some(r);
+                }
+
+                let comments = find_comments_after(span, &opt.tokens);
+
+                // If the first item is a comment, it's an inline comment, and
+                // so add two spaces
+                if matches!(
+                    comments.first(),
+                    Some(Token {
+                        kind: TokenKind::Comment(_),
+                        ..
+                    })
+                ) {
+                    r += "  ";
+                }
+
+                for c in comments {
+                    match c.kind {
+                        // TODO: these are defined here since the debug
+                        // representations aren't quite right (NewLine is `new
+                        // line` as is used in error messages). But we should
+                        // probably move them onto the Struct.
+                        TokenKind::Comment(s) => r += format!("#{}", s).as_str(),
+                        TokenKind::NewLine => r += "\n",
+                        _ => unreachable!(),
+                    }
+                }
+            }
+        }
         Some(r)
     }
 }
@@ -197,7 +247,7 @@
                 if let Some(body) = c.body.write(opt.clone()) {
                     r += &body;
                 } else {
-                    r += &break_line_within_parenthesis(c.body.as_ref(), opt)?;
+                    r += &break_line_within_parenthesis(c.body.as_ref(), &mut opt)?;
                 }
 
                 Some(r)
@@ -222,7 +272,7 @@
     }
 }
 
-fn break_line_within_parenthesis<T: WriteSource>(expr: &T, mut opt: WriteOpt) -> Option<String> {
+fn break_line_within_parenthesis<T: WriteSource>(expr: &T, opt: &mut WriteOpt) -> Option<String> {
     let mut r = "(\n".to_string();
     opt.indent += 1;
     r += &opt.write_indent();
@@ -321,6 +371,52 @@
     }
 }
 
+// impl WriteSource for ModuleDef {
+//     fn write(&self, mut opt: WriteOpt) -> Option<String> {
+//         codegen::WriteSource::write(&pl.stmts, codegen::WriteOpt::default()).unwrap()
+//     }}
+
+// /// Find a comment before a span. If there's exactly one newline prior, then the
+// /// comment is included here. Any further above are included with the prior token.
+// fn find_comment_before(span: Span, tokens: &TokenVec) -> Option<TokenKind> {
+//     // index of the span in the token vec
+//     let index = tokens
+//         .0
+//         .iter()
+//         .position(|t| t.span.start == span.start && t.span.end == span.end)?;
+//     if index <= 1 {
+//         return None;
+//     }
+//     let prior_token = &tokens.0[index - 1].kind;
+//     let prior_2_token = &tokens.0[index - 2].kind;
+//     if matches!(prior_token, TokenKind::NewLine) && matches!(prior_2_token, TokenKind::Comment(_)) {
+//         Some(prior_2_token.clone())
+//     } else {
+//         None
+//     }
+// }
+
+/// Find comments after a given span.
+fn find_comments_after(span: Span, tokens: &TokenVec) -> Vec<Token> {
+    // index of the span in the token vec
+    let index = tokens
+        .0
+        .iter()
+        // FIXME: why isn't this working?
+        // .position(|t| t.1.start == span.start && t.1.end == span.end)
+        .position(|t| t.span.end == span.end)
+        .unwrap_or_else(|| panic!("{:?}, {:?}", &tokens, &span));
+
+    let mut out = vec![];
+    for token in tokens.0.iter().skip(index + 1) {
+        match token.kind {
+            TokenKind::NewLine | TokenKind::Comment(_) => out.push(token.clone()),
+            _ => break,
+        }
+    }
+    out
+}
+
 impl WriteSource for Vec<Stmt> {
     fn write(&self, mut opt: WriteOpt) -> Option<String> {
         opt.reset_line()?;
@@ -469,7 +565,8 @@
 
 #[cfg(test)]
 mod test {
-    use insta::assert_snapshot;
+    use insta::{assert_debug_snapshot, assert_snapshot};
+    use prqlc_parser::lex_source;
 
     use super::*;
 
@@ -490,6 +587,64 @@
         stmt.write(WriteOpt::default()).unwrap()
     }
 
+    // #[test]
+    // fn test_find_comment_before() {
+    //     let tokens = lex_source(
+    //         r#"
+    //         # comment
+    //         let a = 5
+    //         "#,
+    //     )
+    //     .unwrap();
+    //     let span = tokens
+    //         .clone()
+    //         .0
+    //         .iter()
+    //         .find(|t| t.kind == TokenKind::Keyword("let".to_string()))
+    //         .unwrap()
+    //         .span
+    //         .clone();
+    //     let comment = find_comment_before(span.into(), &tokens);
+    //     assert_debug_snapshot!(comment, @r###"
+    //     Some(
+    //         Comment(
+    //             " comment",
+    //         ),
+    //     )
+    //     "###);
+    // }
+
+    #[test]
+    fn test_find_comments_after() {
+        let tokens = lex_source(
+            r#"
+            let a = 5 # on side
+            # below
+            # and another
+            "#,
+        )
+        .unwrap();
+        let span = tokens
+            .clone()
+            .0
+            .iter()
+            .find(|t| t.kind == TokenKind::Literal(Literal::Integer(5)))
+            .unwrap()
+            .span
+            .clone();
+        let comment = find_comments_after(span.into(), &tokens);
+        assert_debug_snapshot!(comment, @r###"
+        [
+            23..32: Comment(" on side"),
+            32..33: NewLine,
+            45..52: Comment(" below"),
+            52..53: NewLine,
+            65..78: Comment(" and another"),
+            78..79: NewLine,
+        ]
+        "###);
+    }
+
     #[test]
     fn test_pipeline() {
         let short = Expr::new(ExprKind::Ident("short".to_string()));

diff --git a/prqlc/prqlc/src/codegen/mod.rs b/prqlc/prqlc/src/codegen/mod.rs
@@ -4,7 +4,9 @@ mod types;
 pub(crate) use ast::write_expr;
 pub(crate) use types::{write_ty, write_ty_kind};
 
-pub trait WriteSource {
+use prqlc_parser::TokenVec;
+
+pub trait WriteSource: std::fmt::Debug {
     /// Converts self to its source representation according to specified
     /// options.
     ///
@@ -29,11 +31,14 @@ pub trait WriteSource {
         Some(r)
     }
 
+    /// Attempts to write the current item, expanding the maximum width where necessary.
     fn write_or_expand(&self, mut opt: WriteOpt) -> String {
         loop {
             if let Some(s) = self.write(opt.clone()) {
                 return s;
             } else {
+                // TODO: could we just set the max width rather than increasing
+                // it in a loop?
                 opt.max_width += opt.max_width / 2;
                 opt.reset_line();
             }
@@ -72,6 +77,13 @@ pub struct WriteOpt {
     /// For example:
     /// `join foo` has an unbound expr, since `join foo ==bar` produced a binary op.
     pub unbound_expr: bool,
+
+    /// The lexer tokens that were used to produce this source; used for
+    /// comments.
+    pub tokens: TokenVec,
+
+    // TODO: remove
+    pub enable_comments: bool,
 }
 
 impl Default for WriteOpt {
@@ -84,6 +96,8 @@ impl Default for WriteOpt {
             rem_width: 50,
             context_strength: 0,
             unbound_expr: false,
+            tokens: TokenVec(vec![]),
+            enable_comments: true,
         }
     }
 }
@@ -102,6 +116,8 @@ impl WriteOpt {
         Some(())
     }
 
+    /// Sets [WriteOpt::rem_width] to (max_width - indent_width), returning
+    /// `Some(())`, or `None` if there's not enough space.
     fn reset_line(&mut self) -> Option<()> {
         let ident = self.tab.len() as u16 * self.indent;
         self.rem_width = self.max_width.checked_sub(ident)?;

diff --git a/prqlc/prqlc/src/codegen/types.rs b/prqlc/prqlc/src/codegen/types.rs
@@ -113,6 +113,7 @@ impl WriteSource for TyTupleField {
     }
 }
 
+#[derive(Debug, Clone)]
 struct UnionVariant<'a>(&'a Option<String>, &'a Ty);
 
 impl WriteSource for UnionVariant<'_> {