From a45287c04dfade147adea83ccc7238fe21c91d09 Mon Sep 17 00:00:00 2001
From: Maximilian Roos <m@maxroos.com>
Date: Sat, 17 Feb 2024 21:32:39 -0800
Subject: [PATCH 01/10] feat: Add comments to format output

---
 prqlc/prqlc-parser/src/lexer.rs |  1 +
 prqlc/prqlc-parser/src/lib.rs   |  4 +-
 prqlc/prqlc/src/codegen/ast.rs  | 78 +++++++++++++++++++++++++++++++--
 prqlc/prqlc/src/codegen/mod.rs  |  7 +++
 prqlc/prqlc/src/lib.rs          | 45 +++++++++++++++++++
 5 files changed, 130 insertions(+), 5 deletions(-)
diff --git a/prqlc/prqlc-parser/src/lexer.rs b/prqlc/prqlc-parser/src/lexer.rs
index af05ab0c82ab..8c1b8aed7276 100644
--- a/prqlc/prqlc-parser/src/lexer.rs
+++ b/prqlc/prqlc-parser/src/lexer.rs
@@ -564,6 +564,7 @@ impl std::fmt::Debug for TokenSpan {
     }
 }
 
+#[derive(Clone, PartialEq, Eq)]
 pub struct TokenVec(pub Vec<TokenSpan>);
 
 impl std::fmt::Debug for TokenVec {
diff --git a/prqlc/prqlc-parser/src/lib.rs b/prqlc/prqlc-parser/src/lib.rs
index a7c459d64962..ce57af2ab4a0 100644
--- a/prqlc/prqlc-parser/src/lib.rs
+++ b/prqlc/prqlc-parser/src/lib.rs
@@ -15,8 +15,8 @@ use prqlc_ast::error::{Error, WithErrorInfo};
 use prqlc_ast::stmt::*;
 use prqlc_ast::Span;
 
-use lexer::Token;
-use lexer::{TokenSpan, TokenVec};
+pub use lexer::Token;
+pub use lexer::{TokenSpan, TokenVec};
 use span::ParserSpan;
 
 /// Build PRQL AST from a PRQL query string.
diff --git a/prqlc/prqlc/src/codegen/ast.rs b/prqlc/prqlc/src/codegen/ast.rs
index 444368022591..5aad89c48d58 100644
--- a/prqlc/prqlc/src/codegen/ast.rs
+++ b/prqlc/prqlc/src/codegen/ast.rs
@@ -1,6 +1,7 @@
 use std::collections::HashSet;
 
 use once_cell::sync::Lazy;
+use prqlc_parser::{Token, TokenSpan, TokenVec};
 
 use crate::ast::*;
 use regex::Regex;
@@ -22,8 +23,20 @@ fn write_within<T: WriteSource>(node: &T, parent: &ExprKind, mut opt: WriteOpt)
 
 impl WriteSource for Expr {
     fn write(&self, mut opt: WriteOpt) -> Option<String> {
+        // let span = self.span;
+        // let tokens = opt.tokens;
+        // let comment =
         let mut r = String::new();
 
+        if let Some(span) = self.span {
+            if let Some(comment) = find_comment_before(span, &opt.tokens) {
+                // dbg!(&span);
+                // dbg!(&self);
+                // dbg!(&comment);
+
+                r += &comment.to_string();
+            }
+        }
         if let Some(alias) = &self.alias {
             r += opt.consume(alias)?;
             r += opt.consume(" = ")?;
@@ -41,9 +54,20 @@ impl WriteSource for Expr {
             if let Some(value) = value {
                 r += &value;
             } else {
-                r += &break_line_within_parenthesis(&self.kind, opt)?;
+                r += &break_line_within_parenthesis(&self.kind, &mut opt)?;
             }
         };
+
+        if let Some(span) = self.span {
+            let comments = find_comments_after(span, &opt.tokens);
+            // dbg!(&span);
+            // dbg!(&self);
+            // dbg!(&comments);
+
+            for c in comments {
+                r += &c.0.to_string();
+            }
+        }
         Some(r)
     }
 }
@@ -180,7 +204,7 @@ impl WriteSource for ExprKind {
                 if let Some(body) = c.body.write(opt.clone()) {
                     r += &body;
                 } else {
-                    r += &break_line_within_parenthesis(c.body.as_ref(), opt)?;
+                    r += &break_line_within_parenthesis(c.body.as_ref(), &mut opt)?;
                 }
 
                 Some(r)
@@ -205,7 +229,7 @@ impl WriteSource for ExprKind {
     }
 }
 
-fn break_line_within_parenthesis<T: WriteSource>(expr: &T, mut opt: WriteOpt) -> Option<String> {
+fn break_line_within_parenthesis<T: WriteSource>(expr: &T, opt: &mut WriteOpt) -> Option<String> {
     let mut r = "(\n".to_string();
     opt.indent += 1;
     r += &opt.write_indent();
@@ -304,6 +328,54 @@ pub fn write_ident_part(s: &str) -> String {
     }
 }
 
+// impl WriteSource for ModuleDef {
+//     fn write(&self, mut opt: WriteOpt) -> Option<String> {
+//         codegen::WriteSource::write(&pl.stmts, codegen::WriteOpt::default()).unwrap()
+//     }}
+
+// impl WriteSource for ModuleDef {
+
+/// Find a comment before a span. If there's exactly one newline, then the
+/// comment is included; otherwise it's included after the current line.
+fn find_comment_before(span: Span, tokens: &TokenVec) -> Option<Token> {
+    // index of the span in the token vec
+    let index = tokens
+        .0
+        .iter()
+        .position(|t| t.1.start == span.start && t.1.end == span.end)?;
+    if index <= 1 {
+        return None;
+    }
+    let prior_token = &tokens.0[index - 1].0;
+    let prior_2_token = &tokens.0[index - 2].0;
+    if matches!(prior_token, Token::NewLine) && matches!(prior_2_token, Token::Comment(_)) {
+        Some(prior_2_token.clone())
+    } else {
+        None
+    }
+}
+
+/// Find a comment before a span. If there's exactly one newline, then the
+/// comment is included; otherwise it's included after the current line.
+fn find_comments_after(span: Span, tokens: &TokenVec) -> Vec<TokenSpan> {
+    let mut out = vec![];
+    // index of the span in the token vec
+    let index = tokens
+        .0
+        .iter()
+        // FIXME: why isn't this working?
+        // .position(|t| t.1.start == span.start && t.1.end == span.end)
+        .position(|t| t.1.start == span.start)
+        .unwrap_or_else(|| panic!("{:?}, {:?}", &tokens, &span));
+    for token in tokens.0.iter().skip(index) {
+        match token.0 {
+            Token::NewLine | Token::Comment(_) => out.push(dbg!(token.clone())),
+            _ => break,
+        }
+    }
+    out
+}
+
 impl WriteSource for Vec<Stmt> {
     fn write(&self, mut opt: WriteOpt) -> Option<String> {
         opt.reset_line()?;
diff --git a/prqlc/prqlc/src/codegen/mod.rs b/prqlc/prqlc/src/codegen/mod.rs
index e14dcee74e49..f73570077352 100644
--- a/prqlc/prqlc/src/codegen/mod.rs
+++ b/prqlc/prqlc/src/codegen/mod.rs
@@ -4,6 +4,8 @@ mod types;
 pub(crate) use ast::write_expr;
 pub(crate) use types::{write_ty, write_ty_kind};
 
+use prqlc_parser::TokenVec;
+
 pub trait WriteSource {
     /// Converts self to its source representation according to specified
     /// options.
@@ -72,6 +74,10 @@ pub struct WriteOpt {
     /// For example:
     /// `join foo` has an unbound expr, since `join foo ==bar` produced a binary op.
     pub unbound_expr: bool,
+
+    /// The lexer tokens that were used to produce this source; used for
+    /// comments.
+    pub tokens: TokenVec,
 }
 
 impl Default for WriteOpt {
@@ -84,6 +90,7 @@ impl Default for WriteOpt {
             rem_width: 50,
             context_strength: 0,
             unbound_expr: false,
+            tokens: TokenVec(vec![]),
         }
     }
 }
diff --git a/prqlc/prqlc/src/lib.rs b/prqlc/prqlc/src/lib.rs
index 3aee35a58a15..afa00a7044ef 100644
--- a/prqlc/prqlc/src/lib.rs
+++ b/prqlc/prqlc/src/lib.rs
@@ -276,6 +276,15 @@ pub fn prql_to_pl(prql: &str) -> Result<ast::ModuleDef, ErrorMessages> {
     prql_to_pl_tree(&source_tree)
 }
 
+// pub fn fmt(prql: &str) -> Result<String> {
+//     let source_tree = SourceTree::from(prql);
+//     let pl = prql_to_pl_tree(&source_tree)?;
+
+//     for x in pl.stmts {
+//         x.
+
+// }
+
 /// Parse PRQL into a PL AST
 pub fn prql_to_pl_tree(prql: &SourceTree) -> Result<ast::ModuleDef, ErrorMessages> {
     parser::parse(prql).map_err(|e| ErrorMessages::from(e).composed(prql))
@@ -307,6 +316,42 @@ pub fn pl_to_prql(pl: &ast::ModuleDef) -> Result<String, ErrorMessages> {
     Ok(codegen::WriteSource::write(&pl.stmts, codegen::WriteOpt::default()).unwrap())
 }
 
+pub fn format_prql(prql: &str) -> Result<String, ErrorMessages> {
+    // TODO: convert errors
+    let tokens = prqlc_parser::lex_source(prql).unwrap();
+    let pl = prql_to_pl(prql)?;
+    Ok(codegen::WriteSource::write(
+        &pl.stmts,
+        codegen::WriteOpt {
+            tokens,
+            ..Default::default()
+        },
+    )
+    .unwrap())
+}
+
+#[test]
+fn test_format_prql() {
+    use insta::assert_display_snapshot;
+
+    assert_display_snapshot!(format_prql( "from db.employees | select {name, age}").unwrap(), @r###"
+    from db.employees
+    select {name, age}
+    "###);
+
+    assert_display_snapshot!(format_prql( r#"
+    # test comment
+    from db.employees # inline comment
+    # another test comment
+    select {name, age}"#
+).unwrap(), @r###"
+    # test comment
+    from db.employees # in
+    # another test comment
+    select {name, age}
+    "###);
+}
+
 /// JSON serialization and deserialization functions
 pub mod json {
     use super::*;

From 4e8a29f6cefe5366220077971f34914786653c28 Mon Sep 17 00:00:00 2001
From: Maximilian Roos <m@maxroos.com>
Date: Sat, 20 Apr 2024 12:00:28 -0700
Subject: [PATCH 02/10]

---
 prqlc/prqlc/src/lib.rs | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/prqlc/prqlc/src/lib.rs b/prqlc/prqlc/src/lib.rs
index 7eb3c5343127..f9bd7efe6569 100644
--- a/prqlc/prqlc/src/lib.rs
+++ b/prqlc/prqlc/src/lib.rs
@@ -367,14 +367,14 @@ pub fn format_prql(prql: &str) -> Result<String, ErrorMessages> {
 
 #[test]
 fn test_format_prql() {
-    use insta::assert_display_snapshot;
+    use insta::assert_snapshot;
 
-    assert_display_snapshot!(format_prql( "from db.employees | select {name, age}").unwrap(), @r###"
+    assert_snapshot!(format_prql( "from db.employees | select {name, age}").unwrap(), @r###"
     from db.employees
     select {name, age}
     "###);
 
-    assert_display_snapshot!(format_prql( r#"
+    assert_snapshot!(format_prql( r#"
     # test comment
     from db.employees # inline comment
     # another test comment

From 1a4858d026f51f5ba8bef3bd4f2cd2ce04fe79d1 Mon Sep 17 00:00:00 2001
From: Maximilian Roos <m@maxroos.com>
Date: Sat, 20 Apr 2024 14:26:20 -0700
Subject: [PATCH 03/10] .

---
 prqlc/prqlc/src/codegen/ast.rs   | 92 +++++++++++++++++++++++---------
 prqlc/prqlc/src/codegen/mod.rs   |  7 ++-
 prqlc/prqlc/src/codegen/types.rs |  1 +
 prqlc/prqlc/src/lib.rs           | 31 ++++++-----
 4 files changed, 90 insertions(+), 41 deletions(-)

diff --git a/prqlc/prqlc/src/codegen/ast.rs b/prqlc/prqlc/src/codegen/ast.rs
index 253551455d1f..210ce28bd123 100644
--- a/prqlc/prqlc/src/codegen/ast.rs
+++ b/prqlc/prqlc/src/codegen/ast.rs
@@ -15,25 +15,29 @@ pub(crate) fn write_expr(expr: &Expr) -> String {
 }
 
 fn write_within<T: WriteSource>(node: &T, parent: &ExprKind, mut opt: WriteOpt) -> Option<String> {
+    // dbg!(&node, &parent);
     let parent_strength = binding_strength(parent);
     opt.context_strength = opt.context_strength.max(parent_strength);
 
-    node.write(opt)
+    // FIXME: this is extremely hacky. Our issue is that in:
+    //
+    //   from a.b  # comment
+    //
+    // ...we're writing both `from a.b` and `a.b`, so we need to know which of
+    // these to write comments for. I'm sure there are better ways to do it.
+    let comments = opt.enable_comments;
+    opt.enable_comments = false;
+    let out = node.write(opt.clone());
+    opt.enable_comments = comments;
+    out
 }
 
 impl WriteSource for Expr {
     fn write(&self, mut opt: WriteOpt) -> Option<String> {
-        // let span = self.span;
-        // let tokens = opt.tokens;
-        // let comment =
         let mut r = String::new();
 
         if let Some(span) = self.span {
             if let Some(comment) = find_comment_before(span, &opt.tokens) {
-                // dbg!(&span);
-                // dbg!(&self);
-                // dbg!(&comment);
-
                 r += &comment.to_string();
             }
         }
@@ -58,14 +62,29 @@ impl WriteSource for Expr {
             }
         };
 
-        if let Some(span) = self.span {
-            let comments = find_comments_after(span, &opt.tokens);
-            // dbg!(&span);
-            // dbg!(&self);
-            // dbg!(&comments);
+        if opt.enable_comments {
+            if let Some(span) = self.span {
+                // TODO: change underlying function so we can remove this
+                if opt.tokens.0.is_empty() {
+                    return Some(r);
+                }
 
-            for c in comments {
-                r += &c.kind.to_string();
+                let comments = find_comments_after(span, &opt.tokens);
+                // if !comments.is_empty() {
+                //     dbg!(&self, &span, &opt.tokens, &comments);
+                // }
+
+                for c in comments {
+                    match c.kind {
+                        // TODO: these are defined here since the debug
+                        // representations aren't quite right (NewLine is `new
+                        // line` as is used in error messages). But we should
+                        // probably move them onto the Struct.
+                        TokenKind::Comment(s) => r += format!(" #{}", s).as_str(),
+                        TokenKind::NewLine => r += "\n",
+                        _ => unreachable!(),
+                    }
+                }
             }
         }
         Some(r)
@@ -345,13 +364,6 @@ pub fn write_ident_part(s: &str) -> String {
     }
 }
 
-// impl WriteSource for ModuleDef {
-//     fn write(&self, mut opt: WriteOpt) -> Option<String> {
-//         codegen::WriteSource::write(&pl.stmts, codegen::WriteOpt::default()).unwrap()
-//     }}
-
-// impl WriteSource for ModuleDef {
-
 /// Find a comment before a span. If there's exactly one newline, then the
 /// comment is included; otherwise it's included after the current line.
 fn find_comment_before(span: Span, tokens: &TokenVec) -> Option<TokenKind> {
@@ -372,7 +384,7 @@ fn find_comment_before(span: Span, tokens: &TokenVec) -> Option<TokenKind> {
     }
 }
 
-/// Find a comment before a span. If there's exactly one newline, then the
+/// Find a comment after a span. If there's exactly one newline, then the
 /// comment is included; otherwise it's included after the current line.
 fn find_comments_after(span: Span, tokens: &TokenVec) -> Vec<Token> {
     let mut out = vec![];
@@ -382,11 +394,13 @@ fn find_comments_after(span: Span, tokens: &TokenVec) -> Vec<Token> {
         .iter()
         // FIXME: why isn't this working?
         // .position(|t| t.1.start == span.start && t.1.end == span.end)
-        .position(|t| t.span.start == span.start)
+        .position(|t| t.span.end == span.end)
         .unwrap_or_else(|| panic!("{:?}, {:?}", &tokens, &span));
-    for token in tokens.0.iter().skip(index) {
+    // dbg!(index, span, &tokens);
+    for token in tokens.0.iter().skip(index + 1) {
+        // match dbg!(token).kind {
         match token.kind {
-            TokenKind::NewLine | TokenKind::Comment(_) => out.push(dbg!(token.clone())),
+            TokenKind::NewLine | TokenKind::Comment(_) => out.push(token.clone()),
             _ => break,
         }
     }
@@ -562,6 +576,32 @@ mod test {
         stmt.write(WriteOpt::default()).unwrap()
     }
 
+    #[test]
+    fn test_find_comments_after() {
+        use insta::assert_debug_snapshot;
+        let tokens = prqlc_parser::lex_source(
+            r#"
+        let a = 5  # comment
+        "#,
+        )
+        .unwrap();
+
+        // This is the `5`
+        let span = Span {
+            start: 17,
+            end: 18,
+            source_id: 0,
+        };
+
+        let comment = find_comments_after(span, &tokens);
+        assert_debug_snapshot!(comment, @r###"
+    [
+        20..29: Comment(" comment"),
+        29..30: NewLine,
+    ]
+    "###);
+    }
+
     #[test]
     fn test_pipeline() {
         let short = Expr::new(ExprKind::Ident("short".to_string()));
diff --git a/prqlc/prqlc/src/codegen/mod.rs b/prqlc/prqlc/src/codegen/mod.rs
index f73570077352..93ab940f3d38 100644
--- a/prqlc/prqlc/src/codegen/mod.rs
+++ b/prqlc/prqlc/src/codegen/mod.rs
@@ -6,7 +6,7 @@ pub(crate) use types::{write_ty, write_ty_kind};
 
 use prqlc_parser::TokenVec;
 
-pub trait WriteSource {
+pub trait WriteSource: std::fmt::Debug {
     /// Converts self to its source representation according to specified
     /// options.
     ///
@@ -78,6 +78,9 @@ pub struct WriteOpt {
     /// The lexer tokens that were used to produce this source; used for
     /// comments.
     pub tokens: TokenVec,
+
+    // TODO: remove
+    pub enable_comments: bool,
 }
 
 impl Default for WriteOpt {
@@ -91,6 +94,7 @@ impl Default for WriteOpt {
             context_strength: 0,
             unbound_expr: false,
             tokens: TokenVec(vec![]),
+            enable_comments: true,
         }
     }
 }
@@ -131,6 +135,7 @@ impl WriteOpt {
     }
 }
 
+#[derive(Debug, Clone)]
 struct SeparatedExprs<'a, T: WriteSource> {
     exprs: &'a [T],
     inline: &'static str,
diff --git a/prqlc/prqlc/src/codegen/types.rs b/prqlc/prqlc/src/codegen/types.rs
index 318e7593873c..49772f20cfce 100644
--- a/prqlc/prqlc/src/codegen/types.rs
+++ b/prqlc/prqlc/src/codegen/types.rs
@@ -113,6 +113,7 @@ impl WriteSource for TyTupleField {
     }
 }
 
+#[derive(Debug, Clone)]
 struct UnionVariant<'a>(&'a Option<String>, &'a Ty);
 
 impl WriteSource for UnionVariant<'_> {
diff --git a/prqlc/prqlc/src/lib.rs b/prqlc/prqlc/src/lib.rs
index f9bd7efe6569..181ceb882c74 100644
--- a/prqlc/prqlc/src/lib.rs
+++ b/prqlc/prqlc/src/lib.rs
@@ -311,15 +311,6 @@ pub fn prql_to_pl(prql: &str) -> Result<ast::ModuleDef, ErrorMessages> {
     prql_to_pl_tree(&source_tree)
 }
 
-// pub fn fmt(prql: &str) -> Result<String> {
-//     let source_tree = SourceTree::from(prql);
-//     let pl = prql_to_pl_tree(&source_tree)?;
-
-//     for x in pl.stmts {
-//         x.
-
-// }
-
 /// Parse PRQL into a PL AST
 pub fn prql_to_pl_tree(prql: &SourceTree) -> Result<ast::ModuleDef, ErrorMessages> {
     parser::parse(prql).map_err(|e| ErrorMessages::from(e).composed(prql))
@@ -364,6 +355,16 @@ pub fn format_prql(prql: &str) -> Result<String, ErrorMessages> {
     )
     .unwrap())
 }
+#[test]
+fn test_format_comment_basic() {
+    use insta::assert_snapshot;
+    assert_snapshot!(format_prql( r#"
+    from db.employees # inline comment
+    "#
+).unwrap(), @r###"
+    from db.employees # inline comment
+    "###);
+}
 
 #[test]
 fn test_format_prql() {
@@ -375,13 +376,15 @@ fn test_format_prql() {
     "###);
 
     assert_snapshot!(format_prql( r#"
+        # test comment
+        from db.employees # inline comment
+        # another test comment
+        select {name, age}"#
+    ).unwrap(), @r###"
     # test comment
     from db.employees # inline comment
-    # another test comment
-    select {name, age}"#
-).unwrap(), @r###"
-    # test comment
-    from db.employees # in
+     # another test comment
+
     # another test comment
     select {name, age}
     "###);

From 73c0504ca2a639a4f15859724968aaf2f30f01e2 Mon Sep 17 00:00:00 2001
From: Maximilian Roos <m@maxroos.com>
Date: Sun, 12 May 2024 15:44:00 -0700
Subject: [PATCH 04/10] .

---
 prqlc/prqlc-ast/src/span.rs    |  9 ++++
 prqlc/prqlc-parser/src/lib.rs  |  3 +-
 prqlc/prqlc/src/codegen/ast.rs | 87 ++++++++++++++++++++++++----------
 prqlc/prqlc/src/lib.rs         | 22 +++++++--
 4 files changed, 88 insertions(+), 33 deletions(-)

diff --git a/prqlc/prqlc-ast/src/span.rs b/prqlc/prqlc-ast/src/span.rs
index 1fb025a8a126..9227c79c8f2e 100644
--- a/prqlc/prqlc-ast/src/span.rs
+++ b/prqlc/prqlc-ast/src/span.rs
@@ -17,6 +17,15 @@ impl From<Span> for Range<usize> {
         a.start..a.end
     }
 }
+impl From<Range<usize>> for Span {
+    fn from(range: Range<usize>) -> Self {
+        Span {
+            start: range.start,
+            end: range.end,
+            source_id: 0, // Default value as Range<usize> does not provide a source_id
+        }
+    }
+}
 
 impl Debug for Span {
     fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
diff --git a/prqlc/prqlc-parser/src/lib.rs b/prqlc/prqlc-parser/src/lib.rs
index 6a150c9f12cf..1b9887ee8eeb 100644
--- a/prqlc/prqlc-parser/src/lib.rs
+++ b/prqlc/prqlc-parser/src/lib.rs
@@ -14,8 +14,7 @@ use prqlc_ast::error::{Error, Reason, WithErrorInfo};
 use prqlc_ast::stmt::*;
 use prqlc_ast::Span;
 
-use lexer::Token;
-pub use lexer::{TokenKind, TokenVec};
+pub use lexer::{Token, TokenKind, TokenVec};
 use span::ParserSpan;
 
 /// Build PRQL AST from a PRQL query string.
diff --git a/prqlc/prqlc/src/codegen/ast.rs b/prqlc/prqlc/src/codegen/ast.rs
index 210ce28bd123..f02d3cb94552 100644
--- a/prqlc/prqlc/src/codegen/ast.rs
+++ b/prqlc/prqlc/src/codegen/ast.rs
@@ -2,10 +2,9 @@ use std::collections::HashSet;
 
 use once_cell::sync::Lazy;
 use prqlc_parser::{Token, TokenKind, TokenVec};
-
-use crate::ast::*;
 use regex::Regex;
 
+use crate::ast::*;
 use crate::codegen::SeparatedExprs;
 
 use super::{WriteOpt, WriteSource};
@@ -364,8 +363,13 @@ pub fn write_ident_part(s: &str) -> String {
     }
 }
 
-/// Find a comment before a span. If there's exactly one newline, then the
-/// comment is included; otherwise it's included after the current line.
+// impl WriteSource for ModuleDef {
+//     fn write(&self, mut opt: WriteOpt) -> Option<String> {
+//         codegen::WriteSource::write(&pl.stmts, codegen::WriteOpt::default()).unwrap()
+//     }}
+
+/// Find a comment before a span. If there's exactly one newline prior, then the
+/// comment is included here. Any furthur above are included with the prior token.
 fn find_comment_before(span: Span, tokens: &TokenVec) -> Option<TokenKind> {
     // index of the span in the token vec
     let index = tokens
@@ -387,7 +391,6 @@ fn find_comment_before(span: Span, tokens: &TokenVec) -> Option<TokenKind> {
 /// Find a comment after a span. If there's exactly one newline, then the
 /// comment is included; otherwise it's included after the current line.
 fn find_comments_after(span: Span, tokens: &TokenVec) -> Vec<Token> {
-    let mut out = vec![];
     // index of the span in the token vec
     let index = tokens
         .0
@@ -396,9 +399,9 @@ fn find_comments_after(span: Span, tokens: &TokenVec) -> Vec<Token> {
         // .position(|t| t.1.start == span.start && t.1.end == span.end)
         .position(|t| t.span.end == span.end)
         .unwrap_or_else(|| panic!("{:?}, {:?}", &tokens, &span));
-    // dbg!(index, span, &tokens);
+
+    let mut out = vec![];
     for token in tokens.0.iter().skip(index + 1) {
-        // match dbg!(token).kind {
         match token.kind {
             TokenKind::NewLine | TokenKind::Comment(_) => out.push(token.clone()),
             _ => break,
@@ -555,7 +558,8 @@ impl WriteSource for SwitchCase {
 
 #[cfg(test)]
 mod test {
-    use insta::assert_snapshot;
+    use insta::{assert_debug_snapshot, assert_snapshot};
+    use prqlc_parser::lex_source;
 
     use super::*;
 
@@ -577,29 +581,60 @@ mod test {
     }
 
     #[test]
-    fn test_find_comments_after() {
-        use insta::assert_debug_snapshot;
-        let tokens = prqlc_parser::lex_source(
+    fn test_find_comment_before() {
+        let tokens = lex_source(
             r#"
-        let a = 5  # comment
-        "#,
+            # comment
+            let a = 5
+            "#,
         )
         .unwrap();
+        let span = tokens
+            .clone()
+            .0
+            .iter()
+            .find(|t| t.kind == TokenKind::Keyword("let".to_string()))
+            .unwrap()
+            .span
+            .clone();
+        let comment = find_comment_before(span.into(), &tokens);
+        assert_debug_snapshot!(comment, @r###"
+        Some(
+            Comment(
+                " comment",
+            ),
+        )
+        "###);
+    }
 
-        // This is the `5`
-        let span = Span {
-            start: 17,
-            end: 18,
-            source_id: 0,
-        };
-
-        let comment = find_comments_after(span, &tokens);
+    #[test]
+    fn test_find_comments_after() {
+        let tokens = lex_source(
+            r#"
+            let a = 5 # on side
+            # below
+            # and another
+            "#,
+        )
+        .unwrap();
+        let span = dbg!(tokens.clone())
+            .0
+            .iter()
+            .find(|t| t.kind == TokenKind::Literal(Literal::Integer(5)))
+            .unwrap()
+            .span
+            .clone();
+        let comment = find_comments_after(span.into(), &tokens);
         assert_debug_snapshot!(comment, @r###"
-    [
-        20..29: Comment(" comment"),
-        29..30: NewLine,
-    ]
-    "###);
+        [
+            23..32: Comment(" on side"),
+            32..33: NewLine,
+            45..52: Comment(" below"),
+            52..53: NewLine,
+            65..78: Comment(" and another"),
+            78..79: NewLine,
+        ]
+        "###);
     }
 
     #[test]
diff --git a/prqlc/prqlc/src/lib.rs b/prqlc/prqlc/src/lib.rs
index cd05cdab79cf..ac40247a186e 100644
--- a/prqlc/prqlc/src/lib.rs
+++ b/prqlc/prqlc/src/lib.rs
@@ -389,17 +389,29 @@ fn test_format_prql() {
     "###);
 
     assert_snapshot!(format_prql( r#"
+        from employees 
         # test comment
-        from db.employees # inline comment
+        select {name}
+        "#
+    ).unwrap(), @r###"
+    from employees
+     # test comment
+
+    # test comment
+    select {name}
+    "###);
+
+    assert_snapshot!(format_prql( r#"
+        # test comment
+        from employees # inline comment
         # another test comment
-        select {name, age}"#
+        select {name}"#
     ).unwrap(), @r###"
     # test comment
-    from db.employees # inline comment
-     # another test comment
+    from employees # inline comment
 
     # another test comment
-    select {name, age}
+    select {name}
     "###);
 }
 

From 51a85b3920f8c4be8d0258e379d432f09c2a6c2d Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sun, 12 May 2024 22:44:50 +0000
Subject: [PATCH 05/10] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 prqlc/prqlc/src/codegen/ast.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/prqlc/prqlc/src/codegen/ast.rs b/prqlc/prqlc/src/codegen/ast.rs
index f02d3cb94552..a6efd67e2a88 100644
--- a/prqlc/prqlc/src/codegen/ast.rs
+++ b/prqlc/prqlc/src/codegen/ast.rs
@@ -369,7 +369,7 @@ pub fn write_ident_part(s: &str) -> String {
 //     }}
 
 /// Find a comment before a span. If there's exactly one newline prior, then the
-/// comment is included here. Any furthur above are included with the prior token.
+/// comment is included here. Any further above are included with the prior token.
 fn find_comment_before(span: Span, tokens: &TokenVec) -> Option<TokenKind> {
     // index of the span in the token vec
     let index = tokens

From 7021c49a1df9155f25111104996fb9028d1b7d93 Mon Sep 17 00:00:00 2001
From: Maximilian Roos <m@maxroos.com>
Date: Sun, 12 May 2024 16:06:41 -0700
Subject: [PATCH 06/10]

---
 prqlc/prqlc/src/codegen/ast.rs | 127 ++++++++++++++++++---------------
 prqlc/prqlc/src/lib.rs         |   7 +-
 2 files changed, 73 insertions(+), 61 deletions(-)

diff --git a/prqlc/prqlc/src/codegen/ast.rs b/prqlc/prqlc/src/codegen/ast.rs
index a6efd67e2a88..24c23b1e0b01 100644
--- a/prqlc/prqlc/src/codegen/ast.rs
+++ b/prqlc/prqlc/src/codegen/ast.rs
@@ -24,10 +24,10 @@ fn write_within<T: WriteSource>(node: &T, parent: &ExprKind, mut opt: WriteOpt)
     //
     // ...we're writing both `from a.b` and `a.b`, so we need to know which of
     // these to write comments for. I'm sure there are better ways to do it.
-    let comments = opt.enable_comments;
+    let enable_comments = opt.enable_comments;
     opt.enable_comments = false;
-    let out = node.write(opt.clone());
-    opt.enable_comments = comments;
+    let out = dbg!(node.write(opt.clone()));
+    opt.enable_comments = enable_comments;
     out
 }
 
@@ -35,11 +35,11 @@ impl WriteSource for Expr {
     fn write(&self, mut opt: WriteOpt) -> Option<String> {
         let mut r = String::new();
 
-        if let Some(span) = self.span {
-            if let Some(comment) = find_comment_before(span, &opt.tokens) {
-                r += &comment.to_string();
-            }
-        }
+        // if let Some(span) = self.span {
+        //     if let Some(comment) = find_comment_before(span, &opt.tokens) {
+        //         r += &comment.to_string();
+        //     }
+        // }
         if let Some(alias) = &self.alias {
             r += opt.consume(alias)?;
             r += opt.consume(" = ")?;
@@ -73,13 +73,25 @@ impl WriteSource for Expr {
                 //     dbg!(&self, &span, &opt.tokens, &comments);
                 // }
 
+                // If the first item is a comment, it's an inline comment, and
+                // so add two spaces
+                if matches!(
+                    comments.first(),
+                    Some(Token {
+                        kind: TokenKind::Comment(_),
+                        ..
+                    })
+                ) {
+                    r += "  ";
+                }
+
                 for c in comments {
-                    match c.kind {
+                    match dbg!(c.kind) {
                         // TODO: these are defined here since the debug
                         // representations aren't quite right (NewLine is `new
                         // line` as is used in error messages). But we should
                         // probably move them onto the Struct.
-                        TokenKind::Comment(s) => r += format!(" #{}", s).as_str(),
+                        TokenKind::Comment(s) => r += format!("#{}", s).as_str(),
                         TokenKind::NewLine => r += "\n",
                         _ => unreachable!(),
                     }
@@ -368,28 +380,27 @@ pub fn write_ident_part(s: &str) -> String {
 //         codegen::WriteSource::write(&pl.stmts, codegen::WriteOpt::default()).unwrap()
 //     }}
 
-/// Find a comment before a span. If there's exactly one newline prior, then the
-/// comment is included here. Any further above are included with the prior token.
-fn find_comment_before(span: Span, tokens: &TokenVec) -> Option<TokenKind> {
-    // index of the span in the token vec
-    let index = tokens
-        .0
-        .iter()
-        .position(|t| t.span.start == span.start && t.span.end == span.end)?;
-    if index <= 1 {
-        return None;
-    }
-    let prior_token = &tokens.0[index - 1].kind;
-    let prior_2_token = &tokens.0[index - 2].kind;
-    if matches!(prior_token, TokenKind::NewLine) && matches!(prior_2_token, TokenKind::Comment(_)) {
-        Some(prior_2_token.clone())
-    } else {
-        None
-    }
-}
-
-/// Find a comment after a span. If there's exactly one newline, then the
-/// comment is included; otherwise it's included after the current line.
+// /// Find a comment before a span. If there's exactly one newline prior, then the
+// /// comment is included here. Any furthur above are included with the prior token.
+// fn find_comment_before(span: Span, tokens: &TokenVec) -> Option<TokenKind> {
+//     // index of the span in the token vec
+//     let index = tokens
+//         .0
+//         .iter()
+//         .position(|t| t.span.start == span.start && t.span.end == span.end)?;
+//     if index <= 1 {
+//         return None;
+//     }
+//     let prior_token = &tokens.0[index - 1].kind;
+//     let prior_2_token = &tokens.0[index - 2].kind;
+//     if matches!(prior_token, TokenKind::NewLine) && matches!(prior_2_token, TokenKind::Comment(_)) {
+//         Some(prior_2_token.clone())
+//     } else {
+//         None
+//     }
+// }
+
+/// Find comments after a given span.
 fn find_comments_after(span: Span, tokens: &TokenVec) -> Vec<Token> {
     // index of the span in the token vec
     let index = tokens
@@ -580,32 +591,32 @@ mod test {
         stmt.write(WriteOpt::default()).unwrap()
     }
 
-    #[test]
-    fn test_find_comment_before() {
-        let tokens = lex_source(
-            r#"
-            # comment
-            let a = 5
-            "#,
-        )
-        .unwrap();
-        let span = tokens
-            .clone()
-            .0
-            .iter()
-            .find(|t| t.kind == TokenKind::Keyword("let".to_string()))
-            .unwrap()
-            .span
-            .clone();
-        let comment = find_comment_before(span.into(), &tokens);
-        assert_debug_snapshot!(comment, @r###"
-        Some(
-            Comment(
-                " comment",
-            ),
-        )
-        "###);
-    }
+    // #[test]
+    // fn test_find_comment_before() {
+    //     let tokens = lex_source(
+    //         r#"
+    //         # comment
+    //         let a = 5
+    //         "#,
+    //     )
+    //     .unwrap();
+    //     let span = tokens
+    //         .clone()
+    //         .0
+    //         .iter()
+    //         .find(|t| t.kind == TokenKind::Keyword("let".to_string()))
+    //         .unwrap()
+    //         .span
+    //         .clone();
+    //     let comment = find_comment_before(span.into(), &tokens);
+    //     assert_debug_snapshot!(comment, @r###"
+    //     Some(
+    //         Comment(
+    //             " comment",
+    //         ),
+    //     )
+    //     "###);
+    // }
 
     #[test]
     fn test_find_comments_after() {
diff --git a/prqlc/prqlc/src/lib.rs b/prqlc/prqlc/src/lib.rs
index ac40247a186e..a117eb3dba62 100644
--- a/prqlc/prqlc/src/lib.rs
+++ b/prqlc/prqlc/src/lib.rs
@@ -375,7 +375,8 @@ fn test_format_comment_basic() {
     from db.employees # inline comment
     "#
 ).unwrap(), @r###"
-    from db.employees # inline comment
+    from db.employees  # inline comment
+
     "###);
 }
 
@@ -395,10 +396,10 @@ fn test_format_prql() {
         "#
     ).unwrap(), @r###"
     from employees
-     # test comment
-
     # test comment
+
     select {name}
+
     "###);
 
     assert_snapshot!(format_prql( r#"

From 99328e81e08df7a947d176784ca9a2868a0fff19 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sun, 12 May 2024 23:07:01 +0000
Subject: [PATCH 07/10] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 prqlc/prqlc/src/codegen/ast.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/prqlc/prqlc/src/codegen/ast.rs b/prqlc/prqlc/src/codegen/ast.rs
index 24c23b1e0b01..e83c69d7404d 100644
--- a/prqlc/prqlc/src/codegen/ast.rs
+++ b/prqlc/prqlc/src/codegen/ast.rs
@@ -381,7 +381,7 @@ pub fn write_ident_part(s: &str) -> String {
 //     }}
 
 // /// Find a comment before a span. If there's exactly one newline prior, then the
-// /// comment is included here. Any furthur above are included with the prior token.
+// /// comment is included here. Any further above are included with the prior token.
 // fn find_comment_before(span: Span, tokens: &TokenVec) -> Option<TokenKind> {
 //     // index of the span in the token vec
 //     let index = tokens

From 47f4fb226081276dc1b9c9e642908c2112f4986c Mon Sep 17 00:00:00 2001
From: Maximilian Roos <m@maxroos.com>
Date: Sun, 12 May 2024 16:10:01 -0700
Subject: [PATCH 08/10] .

---
 prqlc/prqlc/src/codegen/ast.rs | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/prqlc/prqlc/src/codegen/ast.rs b/prqlc/prqlc/src/codegen/ast.rs
index e83c69d7404d..885f15915cd1 100644
--- a/prqlc/prqlc/src/codegen/ast.rs
+++ b/prqlc/prqlc/src/codegen/ast.rs
@@ -14,7 +14,6 @@ pub(crate) fn write_expr(expr: &Expr) -> String {
 }
 
 fn write_within<T: WriteSource>(node: &T, parent: &ExprKind, mut opt: WriteOpt) -> Option<String> {
-    // dbg!(&node, &parent);
     let parent_strength = binding_strength(parent);
     opt.context_strength = opt.context_strength.max(parent_strength);
 
@@ -26,7 +25,7 @@ fn write_within<T: WriteSource>(node: &T, parent: &ExprKind, mut opt: WriteOpt)
     // these to write comments for. I'm sure there are better ways to do it.
     let enable_comments = opt.enable_comments;
     opt.enable_comments = false;
-    let out = dbg!(node.write(opt.clone()));
+    let out = node.write(opt.clone());
     opt.enable_comments = enable_comments;
     out
 }
@@ -69,9 +68,6 @@ impl WriteSource for Expr {
                 }
 
                 let comments = find_comments_after(span, &opt.tokens);
-                // if !comments.is_empty() {
-                //     dbg!(&self, &span, &opt.tokens, &comments);
-                // }
 
                 // If the first item is a comment, it's an inline comment, and
                 // so add two spaces
@@ -86,7 +82,7 @@ impl WriteSource for Expr {
                 }
 
                 for c in comments {
-                    match dbg!(c.kind) {
+                    match c.kind {
                         // TODO: these are defined here since the debug
                         // representations aren't quite right (NewLine is `new
                         // line` as is used in error messages). But we should
@@ -628,7 +624,8 @@ mod test {
             "#,
         )
         .unwrap();
-        let span = dbg!(tokens.clone())
+        let span = tokens
+            .clone()
             .0
             .iter()
             .find(|t| t.kind == TokenKind::Literal(Literal::Integer(5)))

From e0c02e7bcb809cfd1a01f746b029fd3f3edcd469 Mon Sep 17 00:00:00 2001
From: Maximilian Roos <m@maxroos.com>
Date: Sun, 12 May 2024 16:20:42 -0700
Subject: [PATCH 09/10]

---
 prqlc/prqlc/src/lib.rs | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/prqlc/prqlc/src/lib.rs b/prqlc/prqlc/src/lib.rs
index a117eb3dba62..80b822440d71 100644
--- a/prqlc/prqlc/src/lib.rs
+++ b/prqlc/prqlc/src/lib.rs
@@ -408,10 +408,9 @@ fn test_format_prql() {
         # another test comment
         select {name}"#
     ).unwrap(), @r###"
-    # test comment
-    from employees # inline comment
-
+    from employees  # inline comment
     # another test comment
+
     select {name}
     "###);
 }

From 4e47f09975be19c3812c642c40f239032fa9d626 Mon Sep 17 00:00:00 2001
From: Maximilian Roos <m@maxroos.com>
Date: Thu, 16 May 2024 06:56:22 -0700
Subject: [PATCH 10/10]

---
 prqlc/prqlc/src/codegen/mod.rs | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/prqlc/prqlc/src/codegen/mod.rs b/prqlc/prqlc/src/codegen/mod.rs
index 93ab940f3d38..68459772711b 100644
--- a/prqlc/prqlc/src/codegen/mod.rs
+++ b/prqlc/prqlc/src/codegen/mod.rs
@@ -31,11 +31,14 @@ pub trait WriteSource: std::fmt::Debug {
         Some(r)
     }
 
+    /// Attempts to write the current item, expanding the maximum width where necessary.
     fn write_or_expand(&self, mut opt: WriteOpt) -> String {
         loop {
             if let Some(s) = self.write(opt.clone()) {
                 return s;
             } else {
+                // TODO: could we just set the max width rather than increasing
+                // it in a loop?
                 opt.max_width += opt.max_width / 2;
                 opt.reset_line();
             }
@@ -113,6 +116,8 @@ impl WriteOpt {
         Some(())
     }
 
+    /// Sets [WriteOpt::rem_width] to (max_width - indent_width), returning
+    /// `Some(())`, or `None` if there's not enough space.
     fn reset_line(&mut self) -> Option<()> {
         let ident = self.tab.len() as u16 * self.indent;
         self.rem_width = self.max_width.checked_sub(ident)?;