Skip to content

Commit

Permalink
Parser struct with lookahead.
Browse files Browse the repository at this point in the history
  • Loading branch information
vcfxb committed Jul 19, 2024
1 parent bb0dfcc commit cbba2a1
Show file tree
Hide file tree
Showing 6 changed files with 138 additions and 98 deletions.
73 changes: 70 additions & 3 deletions wright/src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,82 @@
//! [AST]: crate::ast
//! [Token]: crate::lexer::token::Token
use super::lexer::Lexer;
use std::collections::VecDeque;
use error::ParserError;

use crate::{lexer::token::{Token, TokenTy}, source_tracking::fragment::Fragment};
use super::lexer::Lexer;

pub mod error;
mod identifier;
mod path;
pub mod whitespace;

/// The [Parser] struct wraps a [Lexer] and adds lookahead and functions that are useful for parsing.
#[derive(Debug)]
pub struct Parser {
lexer: Lexer,
lookahead: VecDeque<Token>,
}

impl Parser {
/// Construct a new parser around a given [Lexer].
pub fn new(lexer: Lexer) -> Self {
Parser {
lexer,
lookahead: VecDeque::new(),
}
}

/// Get the next [Token] from this [Parser]. This may be a clone of a token that's already been peeked.
pub fn next(&mut self) -> Option<Token> {
self.lookahead.pop_front().or_else(|| self.lexer.next_token())
}

/// Peek at the next token from the [Lexer] (cached in the lookahead queue if peeked before).
pub fn peek(&mut self) -> Option<&Token> {
if self.lookahead.is_empty() {
self.lookahead.push_back(self.lexer.next_token()?);
}

self.lookahead.front()
}

/// Peek the [Fragment] of the next [Token].
pub fn peek_fragment(&mut self) -> Option<&Fragment> {
self.peek().map(|token| &token.fragment)
}

/// Get the [Lexer] that's wrapped.
pub fn lexer(&self) -> &Lexer {
&self.lexer
}

/// Lookahead `k` [Token]s.
///
/// If `k == 0` then this is effectively peeking at the next [Token] from the wrapped [Lexer].
pub fn lookahead(&mut self, k: usize) -> Option<&Token> {
while self.lookahead.len() <= k {
self.lookahead.push_back(self.lexer.next_token()?);
}

self.lookahead.get(k)
}

/// Get the next [Token] from this parser if its [Token::variant] is the given `token_ty`.
pub fn next_if_is(&mut self, token_ty: TokenTy) -> Option<Token> {
// Peeking successfully first means that the lookahead vec will never be empty here.
(self.peek()?.variant == token_ty).then(|| unsafe { self.lookahead.pop_front().unwrap_unchecked() })
}

/// Peek at the next [Token], remove it if it's a [TokenTy::Whitespace].
pub fn ignore_whitespace(&mut self) {
self.next_if_is(TokenTy::Whitespace);
}
}

/// Trait implemented by all AST nodes that can be parsed.
pub trait Parse: Sized {
/// Attempt to parse a tree node of this type from a given [Lexer].
fn parse(lexer: &mut Lexer) -> Result<Self, ParserError>;
/// Attempt to parse a tree node of this type from a given [Parser].
fn parse(parser: &mut Parser) -> Result<Self, ParserError>;
}
61 changes: 11 additions & 50 deletions wright/src/parser/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,60 +12,23 @@ use std::borrow::Cow;
#[allow(missing_docs)]
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum ParserErrorKind {
UnternminatedStringLiteralEncountered,
UnterminatedStringLiteralEncountered,
UnterminatedMultilineCommentEncountered,
ExpectedIdentifier,
ExpectedPath,
}

/// Table of all the definition strings for v
pub const ERROR_VARIANT_DESCRIPTION_TABLE: &[(ParserErrorKind, &str)] = &[
(
ParserErrorKind::UnternminatedStringLiteralEncountered,
"encountered unterminated string literal while parsing",
),
(
ParserErrorKind::UnterminatedMultilineCommentEncountered,
"encountered unterminated multiline comment while parsing",
),
(ParserErrorKind::ExpectedIdentifier, "expected identifier"),
(ParserErrorKind::ExpectedPath, "expected path or identifier"),
];

impl ParserErrorKind {
/// Check (at compile time) if this [ParserErrorKind] has a descrition in the [ERROR_VARIANT_DESCRIPTION_TABLE].
pub const fn has_descrition(self) -> bool {
let mut i = 0;

while i < ERROR_VARIANT_DESCRIPTION_TABLE.len() {
if ERROR_VARIANT_DESCRIPTION_TABLE[i].0 as u64 == self as u64 {
return true;
}

i += 1;
}

false
}

/// Get the description string of this [ParserErrorKind], if one exists. Calls to this against literals
/// should be zero-cost since all the lookups are done at compile time. You can use a `const { }` block
/// to ensure this.
///
/// Calls against variables might be a bit more expensive, since this does an iterative lookup against the
/// [ERROR_VARIANT_DESCRIPTION_TABLE].
pub const fn find_description(self) -> Option<&'static str> {
let mut i = 0;

while i < ERROR_VARIANT_DESCRIPTION_TABLE.len() {
if ERROR_VARIANT_DESCRIPTION_TABLE[i].0 as u64 == self as u64 {
return Some(ERROR_VARIANT_DESCRIPTION_TABLE[i].1);
}

i += 1;
/// Get a short description of this kind of error.
pub const fn describe(self) -> &'static str {
use ParserErrorKind::*;

match self {
ExpectedIdentifier => "expected identifier",
ExpectedPath => "expected path or identifier",
UnterminatedMultilineCommentEncountered => "encountered unterminated multiline comment while parsing",
UnterminatedStringLiteralEncountered => "encountered unterminated string literal while parsing",
}

None
}

/// Return this [ParserErrorKind] cast to a [u64], adding 1, preceded by the letters "WPE" standing for "Wright Parser Error".
Expand Down Expand Up @@ -94,9 +57,7 @@ impl ParserError {
pub fn as_diagnostic(self) -> Diagnostic {
let description = self
.kind
.find_description()
.map(ToOwned::to_owned)
.unwrap_or(format!("parser error ({:?})", self.kind));
.describe();

let mut diagnostic = Diagnostic::error()
.with_code(self.kind.error_code_string())
Expand Down
64 changes: 27 additions & 37 deletions wright/src/parser/identifier.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,43 +2,33 @@
use super::{
error::{ParserError, ParserErrorKind},
Parse,
Parse, Parser,
};
use crate::{
ast::identifier::Identifier,
lexer::{
token::{Token, TokenTy},
Lexer,
},
lexer::token::{Token, TokenTy},
};

impl Parse for Identifier {
fn parse(lexer: &mut Lexer) -> Result<Self, ParserError> {
let next_token = lexer.next_token();

// Get the fragment from the next token if it's the right type (or produce an error).
let ident_fragment = match next_token {
Some(Token {
variant: TokenTy::Identifier,
fragment,
}) => Ok(fragment),

Some(Token { fragment, .. }) => Err(ParserError {
kind: ParserErrorKind::ExpectedIdentifier,
location: fragment,
help: None,
}),

None => Err(ParserError {
kind: ParserErrorKind::ExpectedIdentifier,
location: lexer.remaining.clone(),
help: Some("found end of source".into()),
}),
}?;

Ok(Identifier {
fragment: ident_fragment,
})
fn parse(parser: &mut Parser) -> Result<Self, ParserError> {
match parser.next_if_is(TokenTy::Identifier) {
Some(Token { fragment, .. }) => Ok(Identifier { fragment }),

None => match parser.peek_fragment() {
Some(next_frag) => Err(ParserError {
kind: ParserErrorKind::ExpectedIdentifier,
location: next_frag.clone(),
help: None,
}),

None => Err(ParserError {
kind: ParserErrorKind::ExpectedIdentifier,
location: parser.lexer.remaining.clone(),
help: Some("found end of source".into()),
})

}
}
}
}

Expand All @@ -47,22 +37,22 @@ mod tests {
use crate::{
ast::identifier::Identifier,
lexer::Lexer,
parser::{error::ParserErrorKind, Parse},
parser::{error::ParserErrorKind, Parse, Parser},
};

#[test]
fn test_parse_ident() {
let mut lexer = Lexer::new_test("source");
let ident = Identifier::parse(&mut lexer).unwrap();
let mut parser = Parser::new(Lexer::new_test("source"));
let ident = Identifier::parse(&mut parser).unwrap();
assert_eq!(ident.fragment.as_str(), "source");
assert_eq!(lexer.remaining.len(), 0);
assert_eq!(parser.lexer().remaining.len(), 0);
}

#[test]
fn test_parse_ident_fail() {
for fail in ["12", "+", " ", " test", "_", "record"] {
let mut lexer = Lexer::new_test(&fail);
let error = Identifier::parse(&mut lexer).unwrap_err();
let mut parser = Parser::new(Lexer::new_test(&fail));
let error = Identifier::parse(&mut parser).unwrap_err();
assert_eq!(error.kind, ParserErrorKind::ExpectedIdentifier);
}
}
Expand Down
17 changes: 12 additions & 5 deletions wright/src/parser/path.rs
Original file line number Diff line number Diff line change
@@ -1,22 +1,29 @@
//! [Parse] implementation for [Path].
use super::error::ParserErrorKind;
use super::whitespace::optional_whitespace;
use super::Parser;
use super::{error::ParserError, Parse};
use crate::ast::identifier::Identifier;
use crate::ast::path::Path;
use crate::lexer::Lexer;
use crate::lexer::{self, Lexer};

impl Parse for Path {
fn parse(_lexer: &mut Lexer) -> Result<Self, ParserError> {
fn parse(parser: &mut Parser) -> Result<Self, ParserError> {
unimplemented!()
}
}

/// Parse the first (and possibly only) [Identifier] in the [Path].
///
fn _parse_head() -> Result<Identifier, ParserError> {
unimplemented!()
fn parse_head(parser: &mut Parser) -> Result<Identifier, ParserError> {
Identifier::parse(parser)
.map_err(|mut err| {
err.kind = ParserErrorKind::ExpectedPath;
err
})
}


// /// Parse a path (`head::tail`) in source code.
// pub fn parse_path<'src>(parser_state: &mut ParserState<'src>) -> NodeParserResult<Path<'src>> {
// // Get the initial index to make metadata at the end.
Expand Down
15 changes: 15 additions & 0 deletions wright/src/parser/whitespace.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
//! Utilities for parsing through whitespace.
use crate::lexer::{token::{Token, TokenTy}, Lexer};
use std::mem;

/// Consume and ignore a [TokenTy::Whitespace] from the front of the lexer.
/// If there is not one, do nothing.
pub fn optional_whitespace(lexer: &mut Lexer) {
let mut fork = lexer.fork();

if let Some(Token { variant: TokenTy::Whitespace, .. }) = fork.next_token() {
// Replace the original lexer with the fork.
let _ = mem::replace(lexer, fork);
}
}
6 changes: 3 additions & 3 deletions wright/tests/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,16 @@ use termcolor::Buffer;
use wright::{
ast::identifier::Identifier,
lexer::Lexer,
parser::Parse,
parser::{Parse, Parser},
source_tracking::{filename::FileName, source::Source, SourceMap, SourceRef},
};

#[test]
fn test_parse_fail_identifier_to_diagnostic() -> anyhow::Result<()> {
let map: SourceMap = SourceMap::new();
let source_ref: SourceRef = map.add(Source::new_from_static_str(FileName::None, "12345"));
let mut lexer = Lexer::new(source_ref);
let parse_error = Identifier::parse(&mut lexer).unwrap_err();
let mut parser = Parser::new(Lexer::new(source_ref));
let parse_error = Identifier::parse(&mut parser).unwrap_err();
let mut buffer = Buffer::no_color();

parse_error
Expand Down

0 comments on commit cbba2a1

Please sign in to comment.