diff --git a/examples/represent.wr b/examples/represent.wr new file mode 100644 index 00000000..e1738a15 --- /dev/null +++ b/examples/represent.wr @@ -0,0 +1,53 @@ + + +import wright::box::Box; +import wright::box::NullableBox; + +type Option { + func some(t: T) -> Self; + func none() -> Self; + func is_some(&self) -> bool; + func is_none(&self) -> bool; + # ... etc +} + +union DefaultOptionRepresentation { some: T | none: void }; + +implement Option as DefaultOptionRepresentation { + const func some(t: T) -> Self { + DefaultOptionRepresentation { some: t } + } + + const func none() -> Self { + DefaultOptionRepresentation { none: void } + } + + const func is_some(&self) -> bool { + self is DefaultOptionRepresentation.some + } + + const func is_none(&self) -> bool { + self is DefaultOptionRepresentation.none + } + + # ... etc +} + +implement Option> as NullableBox { + func some(t: T) -> Self { + Box::new(t) as NullableBox + } + + const func none() -> Self { + NullableBox::null() + } + + const fn is_some(&self) -> bool { + !self.is_null() + } + + const fn is_none(&self) -> bool { + self.is_null() + } +} + diff --git a/pages/book/src/SUMMARY.md b/pages/book/src/SUMMARY.md index d61c0218..297b7570 100644 --- a/pages/book/src/SUMMARY.md +++ b/pages/book/src/SUMMARY.md @@ -7,3 +7,4 @@ - [Language Constructs](./design-notes/language-constructs.md) - [User Defined Optimizations](./design-notes/user-defined-optimizations.md) +- [Threads](./design-notes/threads.md) diff --git a/pages/book/src/design-notes/threads.md b/pages/book/src/design-notes/threads.md new file mode 100644 index 00000000..e461c054 --- /dev/null +++ b/pages/book/src/design-notes/threads.md @@ -0,0 +1,6 @@ + +For many languages, threading can be a point of tension. When to use it (especially now that single-threaded async is more common), +how to use it, and how to optimize it are all common issues. + +In building wright, I decided it would be best to separate async and syncronous code/threads to avoid unnecessarily +compiling/linking/running an async runtime to manage futures. diff --git a/wright/src/parser/ast/expression/literal.rs b/wright/src/parser/ast/expression/literal.rs index 445760be..fdeb40b1 100644 --- a/wright/src/parser/ast/expression/literal.rs +++ b/wright/src/parser/ast/expression/literal.rs @@ -9,14 +9,15 @@ use crate::parser::{ }; use self::{ - boolean::{parse_boolean_literal, BooleanLiteral}, - integer::{parse_integer_literal, IntegerLiteral}, + boolean::BooleanLiteral, + integer::IntegerLiteral, }; pub mod boolean; pub mod integer; -// pub mod string; -// pub mod character; +pub mod string; +pub mod character; +pub(self) mod escapes; #[derive(Debug)] pub enum Literal<'src> { @@ -39,16 +40,16 @@ where erase(discard_errors(map_node_type(parser_function, literal_conversion))) } -/// Parse a literal from source code. -pub fn parse_literal<'src>( - parser_state: &mut ParserState<'src>, -) -> NodeParserOption> { - // Make a parser that finds the first successfull literal parse. - let parser = first_sucessful(vec![ - convert_to_literal_parser(parse_integer_literal, Literal::Integer), - convert_to_literal_parser(parse_boolean_literal, Literal::Boolean), - ]); - - // Call that parser. - (parser)(parser_state) +impl<'src> Literal<'src> { + /// Parse a literal value in source code. + pub fn parse(parser_state: &mut ParserState<'src>) -> NodeParserOption { + // Make a parser that finds the first successfull literal parse. + let parser = first_sucessful(vec![ + convert_to_literal_parser(IntegerLiteral::parse, Literal::Integer), + convert_to_literal_parser(BooleanLiteral::parse, Literal::Boolean), + ]); + + // Call that parser. + (parser)(parser_state) + } } diff --git a/wright/src/parser/ast/expression/literal/boolean.rs b/wright/src/parser/ast/expression/literal/boolean.rs index fc667c5c..6d5b66f5 100644 --- a/wright/src/parser/ast/expression/literal/boolean.rs +++ b/wright/src/parser/ast/expression/literal/boolean.rs @@ -20,30 +20,30 @@ pub struct BooleanLiteral<'src> { pub value: bool, } -/// Attempt to parse a boolean literal from the lexer held by the parser state. -/// Do not mutate parser state if a viable [`TokenTy`] is not available from the lexer. -pub fn parse_boolean_literal<'src>( - parser_state: &mut ParserState<'src>, -) -> NodeParserResult> { - // Try to parse a `true` token and a `false` token. - for (token_ty, value) in [(TokenTy::True, true), (TokenTy::False, false)] { - // Try to take the appropriate token from the parser state. - if let Some(IndexedToken { - index, - token: Token { length, .. }, - }) = parser_state.next_token_if_ty_eq(token_ty) - { - // On success, return the popped token's appropriate AST node. - return Ok(BooleanLiteral { - meta: parser_state.make_ast_node_meta(index, length), - value, - }); +impl<'src> BooleanLiteral<'src> { + /// Attempt to parse a boolean literal from the lexer held by the parser state. + /// Do not mutate parser state if a viable [`TokenTy`] is not available from the lexer. + pub fn parse(parser_state: &mut ParserState<'src>) -> NodeParserResult { + // Try to parse a `true` token and a `false` token. + for (token_ty, value) in [(TokenTy::True, true), (TokenTy::False, false)] { + // Try to take the appropriate token from the parser state. + if let Some(IndexedToken { + index, + token: Token { length, .. }, + }) = parser_state.next_token_if_ty_eq(token_ty) + { + // On success, return the popped token's appropriate AST node. + return Ok(BooleanLiteral { + meta: parser_state.make_ast_node_meta(index, length), + value, + }); + } } + + // If neither parse succeeds, return an error. + Err(ParserError { + byte_range: parser_state.peek_byte_range(), + ty: ParserErrorVariant::Expected("boolean literal"), + }) } - - // If neither parse succeeds, return an error. - Err(ParserError { - byte_range: parser_state.peek_byte_range(), - ty: ParserErrorVariant::Expected("boolean literal"), - }) } diff --git a/wright/src/parser/ast/expression/literal/escapes.rs b/wright/src/parser/ast/expression/literal/escapes.rs new file mode 100644 index 00000000..cf26d3c1 --- /dev/null +++ b/wright/src/parser/ast/expression/literal/escapes.rs @@ -0,0 +1,50 @@ +//! Utilities for dealing with escaped characters in string and char literals. + +use std::{borrow::Cow, iter::Peekable, str::CharIndices}; + +pub fn unescape(source_str_lit_body: &str) -> Cow<'_, str> { + unimplemented!() +} + +#[derive(Debug)] +struct StringLiteralPartsIterator<'str_lit> { + /// The body of the string literal being unescaped. + str_lit_body: &'str_lit str, + + /// An iterator over the + iter: Peekable>, +} + +enum StringLiteralPart<'str_lit> { + /// A sequence of unescaped characters. + UnescapedCharacters(&'str_lit str), + + UnicodeEscape { + /// The part of the string literal that contains this escape sequence. + matching_source: &'str_lit str, + /// The result of attempting to parse the escaped value into a unicode codepoint. + parsed: Option, + }, +} + +enum UnicodeEscapeError { + /// There were too many digits in the escape sequence. + TooManyDigits, + /// Empty escape sequence, + Empty, + /// The escaped digits do not represent a valid unicode codepoint. + InvalidCodepoint, +} + +impl<'str_lit> StringLiteralPartsIterator<'str_lit> { + +} + +impl<'str_lit> Iterator for StringLiteralPartsIterator<'str_lit> { + type Item = StringLiteralPart<'str_lit>; + + fn next(&mut self) -> Option { + unimplemented!() + + } +} diff --git a/wright/src/parser/ast/expression/literal/integer.rs b/wright/src/parser/ast/expression/literal/integer.rs index 48394a2f..1ec60334 100644 --- a/wright/src/parser/ast/expression/literal/integer.rs +++ b/wright/src/parser/ast/expression/literal/integer.rs @@ -22,52 +22,52 @@ pub struct IntegerLiteral<'src> { pub value: BigUint, } -/// Parse an [`IntegerLiteral`] from source code. -/// Do not mutate parser state if there is not a [`TokenTy::IntegerLit`] next. -pub fn parse_integer_literal<'src>( - parser_state: &mut ParserState<'src>, -) -> NodeParserResult> { - // Read and destructure an integer literal token from the lexer. - let IndexedToken { - index, - token: Token { length, .. }, - } = parser_state - // All integer literals should be of this token type. - .next_token_if_ty_eq(TokenTy::IntegerLit) - // Error out if the next token is not an integer literal. - .ok_or_else(|| ParserError { - byte_range: parser_state.peek_byte_range(), - ty: ParserErrorVariant::Expected("integer literal"), - })?; +impl<'src> IntegerLiteral<'src> { + /// Parse a literal integer from source code. + /// Do not mutate parser state if there is not a [`TokenTy::IntegerLit`] next. + pub fn parse(parser_state: &mut ParserState<'src>) -> NodeParserResult { + // Read and destructure an integer literal token from the lexer. + let IndexedToken { + index, + token: Token { length, .. }, + } = parser_state + // All integer literals should be of this token type. + .next_token_if_ty_eq(TokenTy::IntegerLit) + // Error out if the next token is not an integer literal. + .ok_or_else(|| ParserError { + byte_range: parser_state.peek_byte_range(), + ty: ParserErrorVariant::Expected("integer literal"), + })?; - // Get the matching source of this token. - let matching_source = &parser_state.source[index..index + length]; - // Check for a prefix - let prefix = &matching_source[..cmp::max(2, matching_source.len())]; + // Get the matching source of this token. + let matching_source = &parser_state.source[index..index + length]; + // Check for a prefix + let prefix = &matching_source[..cmp::max(2, matching_source.len())]; - // Get a radix off the prefix - let radix = match prefix { - "0x" | "0X" => 16, - "0b" | "0B" => 2, - "0o" => 8, - _ => 10, - }; + // Get a radix off the prefix + let radix = match prefix { + "0x" | "0X" => 16, + "0b" | "0B" => 2, + "0o" => 8, + _ => 10, + }; - // Strip the prefix from the string to get the body of it to parse. - let body = if radix != 10 { - &matching_source[2..] - } else { - matching_source - }; + // Strip the prefix from the string to get the body of it to parse. + let body = if radix != 10 { + &matching_source[2..] + } else { + matching_source + }; - // Parse it. - let value = BigUint::from_str_radix(body, radix) - // Panic here as the lexer should check for this. - .expect("lexer checks integer literal format"); + // Parse it. + let value = BigUint::from_str_radix(body, radix) + // Panic here as the lexer should check for this. + .expect("lexer checks integer literal format"); - // Return ok. - Ok(IntegerLiteral { - meta: parser_state.make_ast_node_meta(index, length), - value, - }) + // Return ok. + Ok(IntegerLiteral { + meta: parser_state.make_ast_node_meta(index, length), + value, + }) + } } diff --git a/wright/src/parser/ast/expression/literal/string.rs b/wright/src/parser/ast/expression/literal/string.rs index e69de29b..8b4413b3 100644 --- a/wright/src/parser/ast/expression/literal/string.rs +++ b/wright/src/parser/ast/expression/literal/string.rs @@ -0,0 +1,69 @@ +//! AST node representation and parsing implementation for string literals. + +use std::rc::Rc; + +use crate::parser::{ast::metadata::AstNodeMeta, state::ParserState, util::NodeParserResult, lexer::{tokens::{TokenTy, Token}, IndexedToken}, error::{ParserError, ParserErrorVariant}}; + +/// The value of a string literal in source code. +#[derive(Debug, Clone)] +pub enum StringLiteralValue<'src> { + /// A string literal in source code without any escapes can be represented directly + /// using a reference into the source code. This will refer to the string literal without the + /// opening and closing quotatation marks. + WithoutEscapes(&'src str), + + /// A string literal in source code with escapes must be represented using an owned string, as + /// we have to do some processing to resolve all the escapes into the actual unescaped unicaode string. + /// We store this in an [`Rc`] to make cloning less expensive, as we will not need to mutate this string + /// while it's in the AST. + WithEscapes(Rc) +} + +/// A string literal in source code. +#[derive(Debug)] +pub struct StringLit<'src> { + /// The metadata about this node. + pub meta: AstNodeMeta<'src>, + /// A reference counted owned string representing the parsed value. + pub value: StringLiteralValue<'src>, + /// Format strings are denoted using '`' instead of '"'. Treat these similarly to string literals. + pub is_format_string: bool, +} + +impl<'src> StringLit<'src> { + /// Parse a string literal from source code. If there is not a [`TokenTy::StringLit`] + /// available from the parser state's lexer, then this will not mutate the parser state. + pub fn parse(parser_state: &mut ParserState<'src>) -> NodeParserResult { + // Peek the type of the next token or error out if there is not one. + let peeked_token_ty = parser_state + .peek_token_ty() + // Dereferencing map here to prevent complaining about ref after mut borrow. + .map(|token_ty: &TokenTy| *token_ty) + // If there is not a next token, error out. + .ok_or(ParserError { byte_range: parser_state.peek_byte_range(), ty: ParserErrorVariant::Expected("string literal") })?; + + // Mathc on the next token type available from the lexer. + match peeked_token_ty { + // Unterminated string literals produce an error. + TokenTy::StringLit { is_terminated: false, .. } => Err(parser_state.peek_byte_range_into_error(ParserErrorVariant::UnterminatedStringLiteral)), + + // Terminated string literals produce a value. + TokenTy::StringLit { is_format, .. } => { + // Peek the important parts of the token. + let IndexedToken { index, token: Token { length, .. } } = *parser_state.peek_token().unwrap(); + // Get the associated part of source code, making an immutable reference into the parser state. + let full_matching_source: &str = &parser_state.source[index..index+length]; + // Get a reference to the body of the string literal itself (without the quotes or backticks for format + // strings). + let string_lit_body: &str = &full_matching_source[1..(full_matching_source.len()-1)]; + + + + unimplemented!() + } + + // All other token types produce an error. + _ => Err(parser_state.peek_byte_range_into_error(ParserErrorVariant::Expected("string literal"))), + } + } +} diff --git a/wright/src/parser/error.rs b/wright/src/parser/error.rs index 263e9e5d..712c14d7 100644 --- a/wright/src/parser/error.rs +++ b/wright/src/parser/error.rs @@ -19,4 +19,7 @@ pub enum ParserErrorVariant { /// Encountered unterminated multi-line comment. UnterminatedMultilineComment, + + /// Encountered unterminated string literral. + UnterminatedStringLiteral, } diff --git a/wright/src/parser/lexer.rs b/wright/src/parser/lexer.rs index 2d6e64b9..3b38b76d 100644 --- a/wright/src/parser/lexer.rs +++ b/wright/src/parser/lexer.rs @@ -201,11 +201,13 @@ impl<'a> Iterator for Lexer<'a> { "struct" => TokenTy::Struct, "record" => TokenTy::Record, "trait" => TokenTy::Trait, - "fn" => TokenTy::Fn, + "func" => TokenTy::Func, "enum" => TokenTy::Enum, "union" => TokenTy::Union, - "mod" => TokenTy::Module, + "module" => TokenTy::Module, "import" => TokenTy::Import, + "implement" => TokenTy::Implement, + "represent" => TokenTy::Represent, // Visibility keywords "public" => TokenTy::Public, @@ -222,7 +224,6 @@ impl<'a> Iterator for Lexer<'a> { "relation" => TokenTy::Relation, "unsafe" => TokenTy::Unsafe, "unchecked" => TokenTy::Unchecked, - "impl" => TokenTy::Impl, "Self" => TokenTy::SelfUpper, "self" => TokenTy::SelfLower, "type" => TokenTy::Type, diff --git a/wright/src/parser/lexer/tokens.rs b/wright/src/parser/lexer/tokens.rs index 986db3b4..cb59c661 100644 --- a/wright/src/parser/lexer/tokens.rs +++ b/wright/src/parser/lexer/tokens.rs @@ -72,8 +72,14 @@ pub enum TokenTy { Class, Struct, Record, + Enum, + Union, Trait, - Fn, + Type, + Func, + Module, + Implement, + Represent, /// Publicly visible. Public, /// Visible in the package only. @@ -84,14 +90,10 @@ pub enum TokenTy { Constrain, /// Used to constrain relations between variables. Relation, - Enum, - Union, Unsafe, /// May use similar to unsafe in Rust -- call a function or cast without checking any of the constraints. Unchecked, Import, - Impl, - Type, Const, Var, If, @@ -117,9 +119,6 @@ pub enum TokenTy { #[display(fmt = "self")] SelfLower, - /// `mod` in source code. - Module, - /// Whitespace of any kind and length. #[display(fmt = "W")] Whitespace, diff --git a/wright/src/parser/state.rs b/wright/src/parser/state.rs index 465de1ac..5a790aa6 100644 --- a/wright/src/parser/state.rs +++ b/wright/src/parser/state.rs @@ -5,7 +5,7 @@ use super::{ lexer::{ tokens::{Token, TokenTy}, IndexedLexer, IndexedToken, - }, + }, error::{ParserError, ParserErrorVariant}, }; use crate::filemap::{FileId, FileMap}; use codespan_reporting::files::Files; @@ -136,4 +136,10 @@ impl<'src> ParserState<'src> { self.index()..self.index() } } + + /// Create a parser error by peeking the next byte range and combining it with the given variant. + #[inline] + pub(crate) fn peek_byte_range_into_error(&mut self, err_ty: ParserErrorVariant) -> ParserError { + ParserError { byte_range: self.peek_byte_range(), ty: err_ty } + } } diff --git a/wright/src/parser/util.rs b/wright/src/parser/util.rs index 61ba95f4..3e1a107a 100644 --- a/wright/src/parser/util.rs +++ b/wright/src/parser/util.rs @@ -15,4 +15,4 @@ pub type NodeParserResult = Result; pub type NodeParserOption = Option; /// Type alias used to apease the borrow/lifetime checker complaining about HKTs and stuff. -pub type BoxedParserFn<'src, Output> = Box) -> Output + 'src>; +pub type BoxedParserFn<'src, Output> = Box) -> Output) + 'src>;