Skip to content

Commit

Permalink
Some potential work on typespecs and representation
Browse files Browse the repository at this point in the history
Additionally some initial work on string escape processing.
  • Loading branch information
vcfxb committed Nov 29, 2023
1 parent f55b0e2 commit 6a0d597
Show file tree
Hide file tree
Showing 13 changed files with 285 additions and 96 deletions.
53 changes: 53 additions & 0 deletions examples/represent.wr
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@


import wright::box::Box;
import wright::box::NullableBox;

type Option<type T> {
func some(t: T) -> Self;
func none() -> Self;
func is_some(&self) -> bool;
func is_none(&self) -> bool;
# ... etc
}

union DefaultOptionRepresentation<type T> { some: T | none: void };

implement Option<T> as DefaultOptionRepresentation<T> {
const func some(t: T) -> Self {
DefaultOptionRepresentation { some: t }
}

const func none() -> Self {
DefaultOptionRepresentation { none: void }
}

const func is_some(&self) -> bool {
self is DefaultOptionRepresentation.some
}

const func is_none(&self) -> bool {
self is DefaultOptionRepresentation.none
}

# ... etc
}

implement Option<Box<T>> as NullableBox<T> {
func some(t: T) -> Self {
Box::new(t) as NullableBox
}

const func none() -> Self {
NullableBox::null()
}

const fn is_some(&self) -> bool {
!self.is_null()
}

const fn is_none(&self) -> bool {
self.is_null()
}
}

1 change: 1 addition & 0 deletions pages/book/src/SUMMARY.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,4 @@

- [Language Constructs](./design-notes/language-constructs.md)
- [User Defined Optimizations](./design-notes/user-defined-optimizations.md)
- [Threads](./design-notes/threads.md)
6 changes: 6 additions & 0 deletions pages/book/src/design-notes/threads.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@

For many languages, threading can be a point of tension. When to use it (especially now that single-threaded async is more common),
how to use it, and how to optimize it are all common issues.

In building wright, I decided it would be best to separate async and syncronous code/threads to avoid unnecessarily
compiling/linking/running an async runtime to manage futures.
33 changes: 17 additions & 16 deletions wright/src/parser/ast/expression/literal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,15 @@ use crate::parser::{
};

use self::{
boolean::{parse_boolean_literal, BooleanLiteral},
integer::{parse_integer_literal, IntegerLiteral},
boolean::BooleanLiteral,
integer::IntegerLiteral,
};

pub mod boolean;
pub mod integer;
// pub mod string;
// pub mod character;
pub mod string;
pub mod character;
pub(self) mod escapes;

#[derive(Debug)]
pub enum Literal<'src> {
Expand All @@ -39,16 +40,16 @@ where
erase(discard_errors(map_node_type(parser_function, literal_conversion)))
}

/// Parse a literal from source code.
pub fn parse_literal<'src>(
parser_state: &mut ParserState<'src>,
) -> NodeParserOption<Literal<'src>> {
// Make a parser that finds the first successfull literal parse.
let parser = first_sucessful(vec![
convert_to_literal_parser(parse_integer_literal, Literal::Integer),
convert_to_literal_parser(parse_boolean_literal, Literal::Boolean),
]);

// Call that parser.
(parser)(parser_state)
impl<'src> Literal<'src> {
/// Parse a literal value in source code.
pub fn parse(parser_state: &mut ParserState<'src>) -> NodeParserOption<Self> {
// Make a parser that finds the first successfull literal parse.
let parser = first_sucessful(vec![
convert_to_literal_parser(IntegerLiteral::parse, Literal::Integer),
convert_to_literal_parser(BooleanLiteral::parse, Literal::Boolean),
]);

// Call that parser.
(parser)(parser_state)
}
}
48 changes: 24 additions & 24 deletions wright/src/parser/ast/expression/literal/boolean.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,30 +20,30 @@ pub struct BooleanLiteral<'src> {
pub value: bool,
}

/// Attempt to parse a boolean literal from the lexer held by the parser state.
/// Do not mutate parser state if a viable [`TokenTy`] is not available from the lexer.
pub fn parse_boolean_literal<'src>(
parser_state: &mut ParserState<'src>,
) -> NodeParserResult<BooleanLiteral<'src>> {
// Try to parse a `true` token and a `false` token.
for (token_ty, value) in [(TokenTy::True, true), (TokenTy::False, false)] {
// Try to take the appropriate token from the parser state.
if let Some(IndexedToken {
index,
token: Token { length, .. },
}) = parser_state.next_token_if_ty_eq(token_ty)
{
// On success, return the popped token's appropriate AST node.
return Ok(BooleanLiteral {
meta: parser_state.make_ast_node_meta(index, length),
value,
});
impl<'src> BooleanLiteral<'src> {
/// Attempt to parse a boolean literal from the lexer held by the parser state.
/// Do not mutate parser state if a viable [`TokenTy`] is not available from the lexer.
pub fn parse(parser_state: &mut ParserState<'src>) -> NodeParserResult<Self> {
// Try to parse a `true` token and a `false` token.
for (token_ty, value) in [(TokenTy::True, true), (TokenTy::False, false)] {
// Try to take the appropriate token from the parser state.
if let Some(IndexedToken {
index,
token: Token { length, .. },
}) = parser_state.next_token_if_ty_eq(token_ty)
{
// On success, return the popped token's appropriate AST node.
return Ok(BooleanLiteral {
meta: parser_state.make_ast_node_meta(index, length),
value,
});
}
}

// If neither parse succeeds, return an error.
Err(ParserError {
byte_range: parser_state.peek_byte_range(),
ty: ParserErrorVariant::Expected("boolean literal"),
})
}

// If neither parse succeeds, return an error.
Err(ParserError {
byte_range: parser_state.peek_byte_range(),
ty: ParserErrorVariant::Expected("boolean literal"),
})
}
50 changes: 50 additions & 0 deletions wright/src/parser/ast/expression/literal/escapes.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
//! Utilities for dealing with escaped characters in string and char literals.
use std::{borrow::Cow, iter::Peekable, str::CharIndices};

pub fn unescape(source_str_lit_body: &str) -> Cow<'_, str> {

Check warning on line 5 in wright/src/parser/ast/expression/literal/escapes.rs

View workflow job for this annotation

GitHub Actions / coverage

unused variable: `source_str_lit_body`

Check warning on line 5 in wright/src/parser/ast/expression/literal/escapes.rs

View workflow job for this annotation

GitHub Actions / coverage

function `unescape` is never used
unimplemented!()
}

#[derive(Debug)]
struct StringLiteralPartsIterator<'str_lit> {
/// The body of the string literal being unescaped.
str_lit_body: &'str_lit str,

Check warning on line 12 in wright/src/parser/ast/expression/literal/escapes.rs

View workflow job for this annotation

GitHub Actions / coverage

fields `str_lit_body` and `iter` are never read

/// An iterator over the
iter: Peekable<CharIndices<'str_lit>>,
}

enum StringLiteralPart<'str_lit> {
/// A sequence of unescaped characters.
UnescapedCharacters(&'str_lit str),

Check warning on line 20 in wright/src/parser/ast/expression/literal/escapes.rs

View workflow job for this annotation

GitHub Actions / coverage

variants `UnescapedCharacters` and `UnicodeEscape` are never constructed

UnicodeEscape {
/// The part of the string literal that contains this escape sequence.
matching_source: &'str_lit str,
/// The result of attempting to parse the escaped value into a unicode codepoint.
parsed: Option<char>,
},
}

enum UnicodeEscapeError {

Check warning on line 30 in wright/src/parser/ast/expression/literal/escapes.rs

View workflow job for this annotation

GitHub Actions / coverage

enum `UnicodeEscapeError` is never used
/// There were too many digits in the escape sequence.
TooManyDigits,
/// Empty escape sequence,
Empty,
/// The escaped digits do not represent a valid unicode codepoint.
InvalidCodepoint,
}

impl<'str_lit> StringLiteralPartsIterator<'str_lit> {

}

impl<'str_lit> Iterator for StringLiteralPartsIterator<'str_lit> {
type Item = StringLiteralPart<'str_lit>;

fn next(&mut self) -> Option<Self::Item> {
unimplemented!()

}
}
86 changes: 43 additions & 43 deletions wright/src/parser/ast/expression/literal/integer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,52 +22,52 @@ pub struct IntegerLiteral<'src> {
pub value: BigUint,
}

/// Parse an [`IntegerLiteral`] from source code.
/// Do not mutate parser state if there is not a [`TokenTy::IntegerLit`] next.
pub fn parse_integer_literal<'src>(
parser_state: &mut ParserState<'src>,
) -> NodeParserResult<IntegerLiteral<'src>> {
// Read and destructure an integer literal token from the lexer.
let IndexedToken {
index,
token: Token { length, .. },
} = parser_state
// All integer literals should be of this token type.
.next_token_if_ty_eq(TokenTy::IntegerLit)
// Error out if the next token is not an integer literal.
.ok_or_else(|| ParserError {
byte_range: parser_state.peek_byte_range(),
ty: ParserErrorVariant::Expected("integer literal"),
})?;
impl<'src> IntegerLiteral<'src> {
/// Parse a literal integer from source code.
/// Do not mutate parser state if there is not a [`TokenTy::IntegerLit`] next.
pub fn parse(parser_state: &mut ParserState<'src>) -> NodeParserResult<Self> {
// Read and destructure an integer literal token from the lexer.
let IndexedToken {
index,
token: Token { length, .. },
} = parser_state
// All integer literals should be of this token type.
.next_token_if_ty_eq(TokenTy::IntegerLit)
// Error out if the next token is not an integer literal.
.ok_or_else(|| ParserError {
byte_range: parser_state.peek_byte_range(),
ty: ParserErrorVariant::Expected("integer literal"),
})?;

// Get the matching source of this token.
let matching_source = &parser_state.source[index..index + length];
// Check for a prefix
let prefix = &matching_source[..cmp::max(2, matching_source.len())];
// Get the matching source of this token.
let matching_source = &parser_state.source[index..index + length];
// Check for a prefix
let prefix = &matching_source[..cmp::max(2, matching_source.len())];

// Get a radix off the prefix
let radix = match prefix {
"0x" | "0X" => 16,
"0b" | "0B" => 2,
"0o" => 8,
_ => 10,
};
// Get a radix off the prefix
let radix = match prefix {
"0x" | "0X" => 16,
"0b" | "0B" => 2,
"0o" => 8,
_ => 10,
};

// Strip the prefix from the string to get the body of it to parse.
let body = if radix != 10 {
&matching_source[2..]
} else {
matching_source
};
// Strip the prefix from the string to get the body of it to parse.
let body = if radix != 10 {
&matching_source[2..]
} else {
matching_source
};

// Parse it.
let value = BigUint::from_str_radix(body, radix)
// Panic here as the lexer should check for this.
.expect("lexer checks integer literal format");
// Parse it.
let value = BigUint::from_str_radix(body, radix)
// Panic here as the lexer should check for this.
.expect("lexer checks integer literal format");

// Return ok.
Ok(IntegerLiteral {
meta: parser_state.make_ast_node_meta(index, length),
value,
})
// Return ok.
Ok(IntegerLiteral {
meta: parser_state.make_ast_node_meta(index, length),
value,
})
}
}
69 changes: 69 additions & 0 deletions wright/src/parser/ast/expression/literal/string.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
//! AST node representation and parsing implementation for string literals.
use std::rc::Rc;

use crate::parser::{ast::metadata::AstNodeMeta, state::ParserState, util::NodeParserResult, lexer::{tokens::{TokenTy, Token}, IndexedToken}, error::{ParserError, ParserErrorVariant}};

/// The value of a string literal in source code.
#[derive(Debug, Clone)]
pub enum StringLiteralValue<'src> {
/// A string literal in source code without any escapes can be represented directly
/// using a reference into the source code. This will refer to the string literal without the
/// opening and closing quotatation marks.
WithoutEscapes(&'src str),

/// A string literal in source code with escapes must be represented using an owned string, as
/// we have to do some processing to resolve all the escapes into the actual unescaped unicaode string.
/// We store this in an [`Rc`] to make cloning less expensive, as we will not need to mutate this string
/// while it's in the AST.
WithEscapes(Rc<str>)
}

/// A string literal in source code.
#[derive(Debug)]
pub struct StringLit<'src> {
/// The metadata about this node.
pub meta: AstNodeMeta<'src>,
/// A reference counted owned string representing the parsed value.
pub value: StringLiteralValue<'src>,
/// Format strings are denoted using '`' instead of '"'. Treat these similarly to string literals.
pub is_format_string: bool,
}

impl<'src> StringLit<'src> {
/// Parse a string literal from source code. If there is not a [`TokenTy::StringLit`]
/// available from the parser state's lexer, then this will not mutate the parser state.
pub fn parse(parser_state: &mut ParserState<'src>) -> NodeParserResult<Self> {
// Peek the type of the next token or error out if there is not one.
let peeked_token_ty = parser_state
.peek_token_ty()
// Dereferencing map here to prevent complaining about ref after mut borrow.
.map(|token_ty: &TokenTy| *token_ty)
// If there is not a next token, error out.
.ok_or(ParserError { byte_range: parser_state.peek_byte_range(), ty: ParserErrorVariant::Expected("string literal") })?;

// Mathc on the next token type available from the lexer.
match peeked_token_ty {
// Unterminated string literals produce an error.
TokenTy::StringLit { is_terminated: false, .. } => Err(parser_state.peek_byte_range_into_error(ParserErrorVariant::UnterminatedStringLiteral)),

// Terminated string literals produce a value.
TokenTy::StringLit { is_format, .. } => {

Check warning on line 51 in wright/src/parser/ast/expression/literal/string.rs

View workflow job for this annotation

GitHub Actions / coverage

unused variable: `is_format`
// Peek the important parts of the token.
let IndexedToken { index, token: Token { length, .. } } = *parser_state.peek_token().unwrap();
// Get the associated part of source code, making an immutable reference into the parser state.
let full_matching_source: &str = &parser_state.source[index..index+length];
// Get a reference to the body of the string literal itself (without the quotes or backticks for format
// strings).
let string_lit_body: &str = &full_matching_source[1..(full_matching_source.len()-1)];

Check warning on line 58 in wright/src/parser/ast/expression/literal/string.rs

View workflow job for this annotation

GitHub Actions / coverage

unused variable: `string_lit_body`



unimplemented!()
}

// All other token types produce an error.
_ => Err(parser_state.peek_byte_range_into_error(ParserErrorVariant::Expected("string literal"))),
}
}
}
3 changes: 3 additions & 0 deletions wright/src/parser/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,7 @@ pub enum ParserErrorVariant {

/// Encountered unterminated multi-line comment.
UnterminatedMultilineComment,

/// Encountered unterminated string literral.
UnterminatedStringLiteral,
}
Loading

0 comments on commit 6a0d597

Please sign in to comment.