Some potential work on typespecs and representation

Additionally some initial work on string escape processing.
vcfxb · Nov 29, 2023 · 6a0d597 · 6a0d597
1 parent f55b0e2
commit 6a0d597
Show file tree

Hide file tree

Showing 13 changed files with 285 additions and 96 deletions.
diff --git a/examples/represent.wr b/examples/represent.wr
@@ -0,0 +1,53 @@
+
+
+import wright::box::Box;
+import wright::box::NullableBox;
+
+type Option<type T> {
+    func some(t: T) -> Self;
+    func none() -> Self;
+    func is_some(&self) -> bool;
+    func is_none(&self) -> bool;
+    # ... etc
+}
+
+union DefaultOptionRepresentation<type T> { some: T | none: void };
+
+implement Option<T> as DefaultOptionRepresentation<T> {
+    const func some(t: T) -> Self {
+        DefaultOptionRepresentation { some: t }
+    }
+
+    const func none() -> Self {
+        DefaultOptionRepresentation { none: void }
+    }
+
+    const func is_some(&self) -> bool {
+        self is DefaultOptionRepresentation.some
+    }
+
+    const func is_none(&self) -> bool {
+        self is DefaultOptionRepresentation.none
+    }
+
+    # ... etc
+}
+
+implement Option<Box<T>> as NullableBox<T> {
+    func some(t: T) -> Self {
+        Box::new(t) as NullableBox
+    }
+
+    const func none() -> Self {
+        NullableBox::null()
+    }
+
+    const fn is_some(&self) -> bool {
+        !self.is_null()
+    }
+
+    const fn is_none(&self) -> bool {
+        self.is_null()
+    }
+}
+
diff --git a/pages/book/src/SUMMARY.md b/pages/book/src/SUMMARY.md
@@ -7,3 +7,4 @@
 
 - [Language Constructs](./design-notes/language-constructs.md)
 - [User Defined Optimizations](./design-notes/user-defined-optimizations.md)
+- [Threads](./design-notes/threads.md)
diff --git a/pages/book/src/design-notes/threads.md b/pages/book/src/design-notes/threads.md
@@ -0,0 +1,6 @@
+
+For many languages, threading can be a point of tension. When to use it (especially now that single-threaded async is more common),
+how to use it, and how to optimize it are all common issues. 
+
+In building wright, I decided it would be best to separate async and syncronous code/threads to avoid unnecessarily 
+compiling/linking/running an async runtime to manage futures. 
diff --git a/wright/src/parser/ast/expression/literal.rs b/wright/src/parser/ast/expression/literal.rs
@@ -9,14 +9,15 @@ use crate::parser::{
 };
 
 use self::{
-    boolean::{parse_boolean_literal, BooleanLiteral},
-    integer::{parse_integer_literal, IntegerLiteral},
+    boolean::BooleanLiteral,
+    integer::IntegerLiteral,
 };
 
 pub mod boolean;
 pub mod integer;
-// pub mod string;
-// pub mod character;
+pub mod string;
+pub mod character;
+pub(self) mod escapes;
 
 #[derive(Debug)]
 pub enum Literal<'src> {
@@ -39,16 +40,16 @@ where
     erase(discard_errors(map_node_type(parser_function, literal_conversion)))
 }
 
-/// Parse a literal from source code.
-pub fn parse_literal<'src>(
-    parser_state: &mut ParserState<'src>,
-) -> NodeParserOption<Literal<'src>> {
-    // Make a parser that finds the first successfull literal parse.
-    let parser = first_sucessful(vec![
-        convert_to_literal_parser(parse_integer_literal, Literal::Integer),
-        convert_to_literal_parser(parse_boolean_literal, Literal::Boolean),
-    ]);
-
-    // Call that parser.
-    (parser)(parser_state)
+impl<'src> Literal<'src> {
+    /// Parse a literal value in source code. 
+    pub fn parse(parser_state: &mut ParserState<'src>) -> NodeParserOption<Self> {
+        // Make a parser that finds the first successfull literal parse.
+        let parser = first_sucessful(vec![
+            convert_to_literal_parser(IntegerLiteral::parse, Literal::Integer),
+            convert_to_literal_parser(BooleanLiteral::parse, Literal::Boolean),
+        ]);
+
+        // Call that parser.
+        (parser)(parser_state)
+    }
 }
diff --git a/wright/src/parser/ast/expression/literal/boolean.rs b/wright/src/parser/ast/expression/literal/boolean.rs
@@ -20,30 +20,30 @@ pub struct BooleanLiteral<'src> {
     pub value: bool,
 }
 
-/// Attempt to parse a boolean literal from the lexer held by the parser state.
-/// Do not mutate parser state if a viable [`TokenTy`] is not available from the lexer. 
-pub fn parse_boolean_literal<'src>(
-    parser_state: &mut ParserState<'src>,
-) -> NodeParserResult<BooleanLiteral<'src>> {
-    // Try to parse a `true` token and a `false` token.
-    for (token_ty, value) in [(TokenTy::True, true), (TokenTy::False, false)] {
-        // Try to take the appropriate token from the parser state.
-        if let Some(IndexedToken {
-            index,
-            token: Token { length, .. },
-        }) = parser_state.next_token_if_ty_eq(token_ty)
-        {
-            // On success, return the popped token's appropriate AST node.
-            return Ok(BooleanLiteral {
-                meta: parser_state.make_ast_node_meta(index, length),
-                value,
-            });
+impl<'src> BooleanLiteral<'src> {
+    /// Attempt to parse a boolean literal from the lexer held by the parser state.
+    /// Do not mutate parser state if a viable [`TokenTy`] is not available from the lexer. 
+    pub fn parse(parser_state: &mut ParserState<'src>) -> NodeParserResult<Self> {
+        // Try to parse a `true` token and a `false` token.
+        for (token_ty, value) in [(TokenTy::True, true), (TokenTy::False, false)] {
+            // Try to take the appropriate token from the parser state.
+            if let Some(IndexedToken {
+                index,
+                token: Token { length, .. },
+            }) = parser_state.next_token_if_ty_eq(token_ty)
+            {
+                // On success, return the popped token's appropriate AST node.
+                return Ok(BooleanLiteral {
+                    meta: parser_state.make_ast_node_meta(index, length),
+                    value,
+                });
+            }
         }
+
+        // If neither parse succeeds, return an error.
+        Err(ParserError {
+            byte_range: parser_state.peek_byte_range(),
+            ty: ParserErrorVariant::Expected("boolean literal"),
+        })   
     }
-
-    // If neither parse succeeds, return an error.
-    Err(ParserError {
-        byte_range: parser_state.peek_byte_range(),
-        ty: ParserErrorVariant::Expected("boolean literal"),
-    })
 }
diff --git a/wright/src/parser/ast/expression/literal/escapes.rs b/wright/src/parser/ast/expression/literal/escapes.rs
@@ -0,0 +1,50 @@
+//! Utilities for dealing with escaped characters in string and char literals. 
+
+use std::{borrow::Cow, iter::Peekable, str::CharIndices};
+
+pub fn unescape(source_str_lit_body: &str) -> Cow<'_, str> {
+    unimplemented!()
+}
+
+#[derive(Debug)]
+struct StringLiteralPartsIterator<'str_lit> {
+    /// The body of the string literal being unescaped. 
+    str_lit_body: &'str_lit str,
+
+    /// An iterator over the 
+    iter: Peekable<CharIndices<'str_lit>>,
+}
+
+enum StringLiteralPart<'str_lit> {
+    /// A sequence of unescaped characters.
+    UnescapedCharacters(&'str_lit str),
+
+    UnicodeEscape {
+        /// The part of the string literal that contains this escape sequence. 
+        matching_source: &'str_lit str,
+        /// The result of attempting to parse the escaped value into a unicode codepoint. 
+        parsed: Option<char>,
+    },
+}
+
+enum UnicodeEscapeError {
+    /// There were too many digits in the escape sequence. 
+    TooManyDigits,
+    /// Empty escape sequence,
+    Empty,
+    /// The escaped digits do not represent a valid unicode codepoint. 
+    InvalidCodepoint,
+}
+
+impl<'str_lit> StringLiteralPartsIterator<'str_lit> {
+
+}
+
+impl<'str_lit> Iterator for StringLiteralPartsIterator<'str_lit> {
+    type Item = StringLiteralPart<'str_lit>;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        unimplemented!()
+
+    }
+}
diff --git a/wright/src/parser/ast/expression/literal/integer.rs b/wright/src/parser/ast/expression/literal/integer.rs
@@ -22,52 +22,52 @@ pub struct IntegerLiteral<'src> {
     pub value: BigUint,
 }
 
-/// Parse an [`IntegerLiteral`] from source code.
-/// Do not mutate parser state if there is not a [`TokenTy::IntegerLit`] next. 
-pub fn parse_integer_literal<'src>(
-    parser_state: &mut ParserState<'src>,
-) -> NodeParserResult<IntegerLiteral<'src>> {
-    // Read and destructure an integer literal token from the lexer.
-    let IndexedToken {
-        index,
-        token: Token { length, .. },
-    } = parser_state
-        // All integer literals should be of this token type.
-        .next_token_if_ty_eq(TokenTy::IntegerLit)
-        // Error out if the next token is not an integer literal.
-        .ok_or_else(|| ParserError {
-            byte_range: parser_state.peek_byte_range(),
-            ty: ParserErrorVariant::Expected("integer literal"),
-        })?;
+impl<'src> IntegerLiteral<'src> {
+    /// Parse a literal integer from source code.
+    /// Do not mutate parser state if there is not a [`TokenTy::IntegerLit`] next. 
+    pub fn parse(parser_state: &mut ParserState<'src>) -> NodeParserResult<Self> {
+        // Read and destructure an integer literal token from the lexer.
+        let IndexedToken {
+            index,
+            token: Token { length, .. },
+        } = parser_state
+            // All integer literals should be of this token type.
+            .next_token_if_ty_eq(TokenTy::IntegerLit)
+            // Error out if the next token is not an integer literal.
+            .ok_or_else(|| ParserError {
+                byte_range: parser_state.peek_byte_range(),
+                ty: ParserErrorVariant::Expected("integer literal"),
+            })?;
 
-    // Get the matching source of this token.
-    let matching_source = &parser_state.source[index..index + length];
-    // Check for a prefix
-    let prefix = &matching_source[..cmp::max(2, matching_source.len())];
+        // Get the matching source of this token.
+        let matching_source = &parser_state.source[index..index + length];
+        // Check for a prefix
+        let prefix = &matching_source[..cmp::max(2, matching_source.len())];
 
-    // Get a radix off the prefix
-    let radix = match prefix {
-        "0x" | "0X" => 16,
-        "0b" | "0B" => 2,
-        "0o" => 8,
-        _ => 10,
-    };
+        // Get a radix off the prefix
+        let radix = match prefix {
+            "0x" | "0X" => 16,
+            "0b" | "0B" => 2,
+            "0o" => 8,
+            _ => 10,
+        };
 
-    // Strip the prefix from the string to get the body of it to parse.
-    let body = if radix != 10 {
-        &matching_source[2..]
-    } else {
-        matching_source
-    };
+        // Strip the prefix from the string to get the body of it to parse.
+        let body = if radix != 10 {
+            &matching_source[2..]
+        } else {
+            matching_source
+        };
 
-    // Parse it.
-    let value = BigUint::from_str_radix(body, radix)
-        // Panic here as the lexer should check for this.
-        .expect("lexer checks integer literal format");
+        // Parse it.
+        let value = BigUint::from_str_radix(body, radix)
+            // Panic here as the lexer should check for this.
+            .expect("lexer checks integer literal format");
 
-    // Return ok.
-    Ok(IntegerLiteral {
-        meta: parser_state.make_ast_node_meta(index, length),
-        value,
-    })
+        // Return ok.
+        Ok(IntegerLiteral {
+            meta: parser_state.make_ast_node_meta(index, length),
+            value,
+        })
+    }
 }
diff --git a/wright/src/parser/ast/expression/literal/string.rs b/wright/src/parser/ast/expression/literal/string.rs
@@ -0,0 +1,69 @@
+//! AST node representation and parsing implementation for string literals. 
+
+use std::rc::Rc;
+
+use crate::parser::{ast::metadata::AstNodeMeta, state::ParserState, util::NodeParserResult, lexer::{tokens::{TokenTy, Token}, IndexedToken}, error::{ParserError, ParserErrorVariant}};
+
+/// The value of a string literal in source code. 
+#[derive(Debug, Clone)]
+pub enum StringLiteralValue<'src> {
+    /// A string literal in source code without any escapes can be represented directly
+    /// using a reference into the source code. This will refer to the string literal without the 
+    /// opening and closing quotatation marks. 
+    WithoutEscapes(&'src str),
+
+    /// A string literal in source code with escapes must be represented using an owned string, as
+    /// we have to do some processing to resolve all the escapes into the actual unescaped unicaode string. 
+    /// We store this in an [`Rc`] to make cloning less expensive, as we will not need to mutate this string
+    /// while it's in the AST. 
+    WithEscapes(Rc<str>)
+}
+
+/// A string literal in source code. 
+#[derive(Debug)]
+pub struct StringLit<'src> {
+    /// The metadata about this node. 
+    pub meta: AstNodeMeta<'src>,
+    /// A reference counted owned string representing the parsed value. 
+    pub value: StringLiteralValue<'src>,
+    /// Format strings are denoted using '`' instead of '"'. Treat these similarly to string literals. 
+    pub is_format_string: bool,
+}
+
+impl<'src> StringLit<'src> {
+    /// Parse a string literal from source code. If there is not a [`TokenTy::StringLit`]
+    /// available from the parser state's lexer, then this will not mutate the parser state. 
+    pub fn parse(parser_state: &mut ParserState<'src>) -> NodeParserResult<Self> {
+        // Peek the type of the next token or error out if there is not one. 
+        let peeked_token_ty = parser_state
+            .peek_token_ty()
+            // Dereferencing map here to prevent complaining about ref after mut borrow. 
+            .map(|token_ty: &TokenTy| *token_ty)
+            // If there is not a next token, error out. 
+            .ok_or(ParserError { byte_range: parser_state.peek_byte_range(), ty: ParserErrorVariant::Expected("string literal") })?;
+
+        // Mathc on the next token type available from the lexer. 
+        match peeked_token_ty {
+            // Unterminated string literals produce an error.
+            TokenTy::StringLit { is_terminated: false, .. } => Err(parser_state.peek_byte_range_into_error(ParserErrorVariant::UnterminatedStringLiteral)),
+
+            // Terminated string literals produce a value. 
+            TokenTy::StringLit { is_format, .. } => {
+                // Peek the important parts of the token. 
+                let IndexedToken { index, token: Token { length, .. } } = *parser_state.peek_token().unwrap();
+                // Get the associated part of source code, making an immutable reference into the parser state. 
+                let full_matching_source: &str = &parser_state.source[index..index+length];
+                // Get a reference to the body of the string literal itself (without the quotes or backticks for format
+                // strings).
+                let string_lit_body: &str = &full_matching_source[1..(full_matching_source.len()-1)];
+
+
+
+                unimplemented!()
+            }
+
+            // All other token types produce an error.
+            _ => Err(parser_state.peek_byte_range_into_error(ParserErrorVariant::Expected("string literal"))),
+        }
+    }
+}
diff --git a/wright/src/parser/error.rs b/wright/src/parser/error.rs
@@ -19,4 +19,7 @@ pub enum ParserErrorVariant {
 
     /// Encountered unterminated multi-line comment.
     UnterminatedMultilineComment,
+
+    /// Encountered unterminated string literral. 
+    UnterminatedStringLiteral,
 }
Original file line number	Diff line number	Diff line change
Expand Up		@@ -7,3 +7,4 @@

		- [Language Constructs](./design-notes/language-constructs.md)
		- [User Defined Optimizations](./design-notes/user-defined-optimizations.md)
		- [Threads](./design-notes/threads.md)