Skip to content

Commit

Permalink
Integer literal AST node
Browse files Browse the repository at this point in the history
  • Loading branch information
vcfxb committed Jul 27, 2024
1 parent 62d8516 commit 447d69c
Show file tree
Hide file tree
Showing 14 changed files with 179 additions and 39 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/cargo-check.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ jobs:
- source-tracking
- reporting
- file_memmap
- ast-model
- ast-models
- lexer
- parser
- wright_library_defaults
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/cargo-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ jobs:
- source-tracking
- reporting
- file_memmap
- ast-model
- ast-models
- lexer
- parser
- wright_library_defaults
Expand Down
14 changes: 9 additions & 5 deletions wright/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -62,13 +62,14 @@ wright_library_defaults = ["file_memmap", "parser"]
# Wright's parser depends on the ability to report parsing errors and construct AST models.
parser = [
"reporting",
"ast-model",
"ast-models",
"lexer"
]

# Wright's abstract syntax tree model is built on types from the "source_tracking" module.
ast-model = [
ast-models = [
"source-tracking",
"dep:num",
# "derive_more/from"
]

Expand Down Expand Up @@ -115,11 +116,14 @@ none = []
# SIMPLE DEPENDENCIES:
[dependencies]

# Big Integers
# num = "0.4"

# DEPENDENCIES:

# Num gives us integer types of unbound size/domain.
# Used in AST node representations for integer literals.
[dependencies.num]
version = "0.4"
optional = true

# Unicode identifier functions.
# Used by:
# - "parser"
Expand Down
15 changes: 5 additions & 10 deletions wright/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,12 @@
use rustc_version::{version_meta, Channel};

fn main() {
// Set cfg flags depending on release channel.
// See: https://stackoverflow.com/a/70914430.
let channel = match version_meta().unwrap().channel {
Channel::Stable => "CHANNEL_STABLE",
Channel::Beta => "CHANNEL_BETA",
Channel::Nightly => "CHANNEL_NIGHTLY",
Channel::Dev => "CHANNEL_DEV",
};
// Set a cfg flag if we're on the nightly channel.

println!("cargo:rustc-cfg={}", channel);
println!("cargo::rustc-check-cfg=cfg({})", channel);
println!("cargo::rustc-check-cfg=cfg(CHANNEL_NIGHTLY)");
if version_meta().unwrap().channel == Channel::Nightly {
println!("cargo:rustc-cfg=CHANNEL_NIGHTLY");
}

// Save build info.
// See https://docs.rs/built/0.7.4/built/index.html.
Expand Down
2 changes: 2 additions & 0 deletions wright/src/ast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,5 @@
pub mod identifier;
pub mod path;
// pub mod ty;
pub mod literal;
16 changes: 16 additions & 0 deletions wright/src/ast/literal.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
//! AST node models representing literal values in source code.
use num::BigUint;

use crate::source_tracking::fragment::Fragment;

/// An integer literal from source. This only contains unsigned integers as writing negative numbers is considered
/// to be a combination of an integer literal with a unary negation.
#[derive(Debug)]
pub struct IntegerLiteral {
/// The [Fragment] of source code containing this integer literal.
pub fragment: Fragment,

/// The value of the integer parsed from the matching source.
pub value: BigUint,
}
1 change: 1 addition & 0 deletions wright/src/ast/ty.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@

15 changes: 15 additions & 0 deletions wright/src/lexer/integer_literal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@ pub fn try_consume_integer_literal(lexer: &mut Lexer) -> Option<Token> {
}
}

// The first character after the optional prefix is required to be a digit, not underscore.
bytes_consumed += chars.next_if(|c| c.is_digit(radix))?.len_utf8();

// Add the rest of the integer literal.
bytes_consumed += chars
.take_while(|c| c.is_digit(radix) || *c == '_')
Expand All @@ -45,6 +48,8 @@ pub fn try_consume_integer_literal(lexer: &mut Lexer) -> Option<Token> {

#[cfg(test)]
mod tests {
use crate::lexer::integer_literal::try_consume_integer_literal;

use super::{Lexer, TokenTy};

#[test]
Expand All @@ -55,5 +60,15 @@ mod tests {

assert_eq!(token.fragment.as_str(), "123_456_789");
assert_eq!(token.variant, TokenTy::IntegerLiteral);
assert_eq!(lexer.remaining.as_str(), ".");
}

#[test]
fn cant_start_with_underscore() {
let mut lexer = Lexer::new_test("0x__10");

assert!(try_consume_integer_literal(&mut lexer).is_none());

assert_eq!(lexer.remaining.as_str(), "0x__10");
}
}
4 changes: 3 additions & 1 deletion wright/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
#![deny(missing_copy_implementations, missing_debug_implementations)]
#![deny(rustdoc::broken_intra_doc_links)]
#![warn(missing_docs)]


// Compiler directive to get docs.rs (which uses the nightly version of the rust compiler) to show
// info about featurer required for various modules and functionality.
//
Expand Down Expand Up @@ -45,7 +47,7 @@ pub mod reporting;
#[cfg(feature = "lexer")]
pub mod lexer;

#[cfg(feature = "ast-model")]
#[cfg(feature = "ast-models")]
pub mod ast;

#[cfg(feature = "parser")]
Expand Down
24 changes: 20 additions & 4 deletions wright/src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
//! [AST]: crate::ast
//! [Token]: crate::lexer::token::Token
use error::{ParserError, ParserErrorKind};

use super::lexer::Lexer;
use crate::{
lexer::token::{Token, TokenTy},
Expand All @@ -13,6 +15,7 @@ use std::collections::VecDeque;
pub mod error;
mod identifier;
mod path;
mod literal;
pub mod whitespace;

/// The [Parser] struct wraps a [Lexer] and adds lookahead and functions that are useful for parsing.
Expand All @@ -31,11 +34,23 @@ impl Parser {
}
}

/// Get the next [Token] from this [Parser]. This may be a clone of a token that's already been peeked.
pub fn next_token(&mut self) -> Option<Token> {
self.lookahead
/// Get the next [Token] from this [Parser]. This may be a token that's already been peeked.
/// Return an error if a [Token] with [TokenTy::Unknown] is encountered.
pub fn next_token(&mut self) -> Result<Option<Token>, ParserError> {
let token = self.lookahead
.pop_front()
.or_else(|| self.lexer.next_token())
.or_else(|| self.lexer.next_token());

// Check for unknown tokens, which should always convert to an error.
if let Some(Token { variant: TokenTy::Unknown, fragment }) = token {
Err(ParserError {
kind: ParserErrorKind::EncounteredUnknownToken,
location: fragment,
help: None,
})
} else {
Ok(token)
}
}

/// Advance this [Parser] by `n` [Token]s. If this [Parser] runs out of [Token]s, panic.
Expand Down Expand Up @@ -103,6 +118,7 @@ impl Parser {
pub fn next_if_is(&mut self, token_ty: TokenTy) -> Option<Token> {
// Peeking successfully first means that the lookahead vec will never be empty here.
(self.peek()?.variant == token_ty)
// SAFETY: We just peeked a token to check its variant so this unwrap is alway ok.
.then(|| unsafe { self.lookahead.pop_front().unwrap_unchecked() })
}

Expand Down
22 changes: 8 additions & 14 deletions wright/src/parser/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,13 @@ use std::borrow::Cow;
#[allow(missing_docs)]
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum ParserErrorKind {
UnterminatedStringLiteralEncountered,
UnterminatedMultilineCommentEncountered,
EncounteredUnknownToken,
EncounteredUnterminatedComment,
EncounteredUnterminatedString,
ExpectedIdentifier,
ExpectedPath,
ExpectedWhitespace,
ExpectedIntegerLiteral,
}

impl ParserErrorKind {
Expand All @@ -25,22 +27,15 @@ impl ParserErrorKind {
use ParserErrorKind::*;

match self {
EncounteredUnknownToken => "encountered unknown token",
EncounteredUnterminatedComment => "encountered unterminated multiline comment while parsing",
EncounteredUnterminatedString => "encountered unterminated string literal while parsing",
ExpectedIdentifier => "expected identifier",
ExpectedIntegerLiteral => "expected integer literal",
ExpectedPath => "expected path or identifier",
ExpectedWhitespace => "expected whitespace character(s)",
UnterminatedMultilineCommentEncountered => {
"encountered unterminated multiline comment while parsing"
}
UnterminatedStringLiteralEncountered => {
"encountered unterminated string literal while parsing"
}
}
}

/// Return this [ParserErrorKind] cast to a [u64], adding 1, preceded by the letters "WPE" standing for "Wright Parser Error".
pub fn error_code_string(self) -> String {
format!("WPE{}", self as u64 + 1)
}
}

/// An error that occurred while parsing.
Expand All @@ -64,7 +59,6 @@ impl ParserError {
let description = self.kind.describe();

let mut diagnostic = Diagnostic::error()
.with_code(self.kind.error_code_string())
.with_message(description)
.with_highlights([Highlight::primary(self.location, "")]);

Expand Down
95 changes: 95 additions & 0 deletions wright/src/parser/literal.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
//! Literal parsing implementation.
use num::{BigUint, Num};

use crate::{ast::literal::IntegerLiteral, lexer::token::TokenTy};

use super::{error::{ParserError, ParserErrorKind}, Parser};


impl IntegerLiteral {
/// Parse an integer literal from the given [Parser].
pub fn parse(parser: &mut Parser) -> Result<Self, ParserError> {
// Get the token containing the integer literal from the parser.
let Some(int_lit_token) = parser.next_if_is(TokenTy::IntegerLiteral) else {
return match parser.peek_fragment() {
Some(frag) => Err(ParserError {
kind: ParserErrorKind::ExpectedIntegerLiteral,
location: frag.clone(),
help: None,
}),

None => Err(ParserError {
kind: ParserErrorKind::ExpectedIntegerLiteral,
location: parser.lexer.remaining.clone(),
help: Some("found end of source".into()),
})
};
};

// Get the string to pass to num for the rest of parsing.
let mut parse_str: &str = int_lit_token.fragment.as_str();
let mut chars = parse_str.chars();

// Unwrap: Integer literals must be at minimum 1 character, enforced by the lexer.
// use null byte as a sentinel value for the second one, since we're just using the prefix to check for
// a radix to pass to num.
let prefix: [char; 2] = [chars.next().unwrap(), chars.next().unwrap_or('\0')];

// Determine the radix and remove any prefix in the process.
let radix: u32 = match prefix {
// Hexidecimal.
['0', 'x' | 'X'] => {
parse_str = &parse_str[2..];
16
},

// Binary.
['0', 'b' | 'B'] => {
parse_str = &parse_str[2..];
2
}

// Octal
['0', 'o'] => {
parse_str = &parse_str[2..];
8
}

// All other patterns are not radix-prefixes.
_ => 10,
};

// Pass the remainder of parsing off to num.
let value = BigUint::from_str_radix(parse_str, radix)
// We can use expect here for now since we have validated the format of the string
// on our own before passing it off.
.expect("num should successfully parse");

Ok(IntegerLiteral {
fragment: int_lit_token.fragment,
value,
})
}
}

#[cfg(test)]
mod tests {
use num::BigUint;

use crate::{ast::literal::IntegerLiteral, lexer::Lexer, parser::Parser};

#[test]
fn normal() {
let mut parser = Parser::new(Lexer::new_test("1000"));

let int_lit = IntegerLiteral::parse(&mut parser).unwrap();

assert_eq!(int_lit.value, BigUint::new(vec![1000]));
assert_eq!(parser.lexer.remaining.as_str(), "");
assert_eq!(int_lit.fragment.as_str(), "1000");
}

// #[test]
// fn ingore_underscores
}
4 changes: 2 additions & 2 deletions wright/src/parser/path.rs
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,8 @@ fn parse_segment(parser: &mut Parser) -> Option<Identifier> {
for sep_token_sequence in VALID_SEGMENT_SEQUENCES {
if parser.matches(sep_token_sequence) {
parser.advance(sep_token_sequence.len() - 1);
// We can unwrap here because we just checked/matched that this parser ends with an identifier.
return Some(Identifier::parse(parser).unwrap());
// SAFETY: We just checked/matched that this parser ends with an identifier.
return Some(unsafe { Identifier::parse(parser).unwrap_unchecked() });
}
}

Expand Down
2 changes: 1 addition & 1 deletion wright/tests/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ fn test_parse_fail_identifier_to_diagnostic() -> anyhow::Result<()> {
assert_eq!(
std::str::from_utf8(buffer.as_slice())?,
"\
error[WPE3]: expected identifier
error: expected identifier
┌─ <NO_NAME>:1:1
1 │ 12345
Expand Down

0 comments on commit 447d69c

Please sign in to comment.