Skip to content

Commit

Permalink
Add rule modifiers and rule tags support
Browse files Browse the repository at this point in the history
  • Loading branch information
TommYDeeee committed Feb 28, 2024
1 parent c203e25 commit 9ee13b5
Show file tree
Hide file tree
Showing 18 changed files with 303 additions and 41 deletions.
2 changes: 1 addition & 1 deletion example.yar
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
//Global comment

//Rule comment
rule test
rule test : bla test
{
//Rule block comment

Expand Down
6 changes: 6 additions & 0 deletions src/lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,10 @@ pub(crate) enum LogosToken {
// Keywords
#[token("rule")]
Rule,
#[token("private")]
Private,
#[token("global")]
Global,
#[token("meta")]
Meta,
#[token("strings")]
Expand Down Expand Up @@ -149,6 +153,8 @@ pub fn tokenize(text: &str) -> (Vec<Token>, Vec<SyntaxError>) {
fn logos_tokenkind_to_syntaxkind(token: LogosToken) -> SyntaxKind {
match token {
LogosToken::Rule => SyntaxKind::RULE_KW,
LogosToken::Private => SyntaxKind::PRIVATE_KW,
LogosToken::Global => SyntaxKind::GLOBAL_KW,
LogosToken::Meta => SyntaxKind::META_KW,
LogosToken::Strings => SyntaxKind::STRINGS_KW,
LogosToken::Condition => SyntaxKind::CONDITION_KW,
Expand Down
21 changes: 4 additions & 17 deletions src/parser/grammar/expressions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,6 @@ mod atom;

use super::*;

/// Recovery set for `strings` block. This also should be adjusted and tweaked to
/// better represents recovery set later on
const STRINGS_RECOVERY_SET: TokenSet = TokenSet::new(&[T![strings]]);

const META_RECOVERY_SET: TokenSet = TokenSet::new(&[T![identifier]]);

/// Parse a rule body
/// A rule body consists `{`, rule_body and `}`
/// This can probably be later simplified to not have both
Expand All @@ -32,6 +26,7 @@ pub(super) fn rule_body(p: &mut Parser) {
let mut has_strings = false;
let mut has_condition = false;
let mut has_meta = false;

while !p.at(EOF) && !p.at(T!['}']) {
match p.current() {
T![meta] => {
Expand Down Expand Up @@ -66,15 +61,7 @@ pub(super) fn rule_body(p: &mut Parser) {
// but we can still try to parse their body and throw an error for parent
// for now it just looks at next 2 tokens to differenciate between valid strings
// body or condition body. This should probably be adjusted later
p.err_and_bump("expected strings or condition");
if p.current() == T![:] {
p.eat(T![:]);
if p.current() == T![variable] && p.nth(1) == T![=] {
strings_body(p)
} else if let Some(_) = expression(p, None, 1) {
condition_body(p);
}
}
p.err_and_bump("expected meta, strings or condition keyword");
}
}
}
Expand Down Expand Up @@ -121,7 +108,7 @@ pub(super) fn meta_body(p: &mut Parser) {
if p.at(T![identifier]) {
p.bump(T![identifier]);
} else {
p.err_recover("expected an identifier", META_RECOVERY_SET);
p.err_and_bump("expected an identifier");
}
p.expect(T![=]);
match p.current() {
Expand All @@ -145,7 +132,7 @@ pub(super) fn strings_body(p: &mut Parser) {
if p.at(T![variable]) {
p.bump(T![variable]);
} else {
p.err_recover("expected a variable", STRINGS_RECOVERY_SET);
p.err_and_bump("expected a variable");
}
p.expect(T![=]);
// so far only strings are supported, later add match for hex strings and regex
Expand Down
21 changes: 18 additions & 3 deletions src/parser/grammar/items.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,23 +48,38 @@ pub(super) fn process_top_level(p: &mut Parser, stop_on_r_brace: bool) {
// In the future, also imports and includes will be supported here
pub(super) fn opt_rule_import_include(p: &mut Parser, m: Marker) -> Result<(), Marker> {
// add rule modifiers to match current and lookahead next with p.nth(1) for RULE or ERROR
match p.current() {
T![rule] => rule(p, m),
_ => return Err(m),
while p.at_ts(TokenSet::new(&[T![private], T![global]])) {
let m = p.start();
p.bump_any();
m.complete(p, MODIFIER);
}
if p.at(T![rule]) {
rule(p, m);
} else {
return Err(m);
}
Ok(())
}

// Parse a rule
// It consists of rule name [`IDENTIFIER`] and a body [`block_expr`]
fn rule(p: &mut Parser, m: Marker) {
assert!(p.at(T![rule]));
p.bump(T![rule]);
if p.at(IDENTIFIER) {
p.bump(IDENTIFIER);
} else {
p.err_recover("expected a name", RULE_RECOVERY_SET);
}
// add optional support for rule tags
if p.at(T![:]) {
p.bump(T![:]);
while p.at(IDENTIFIER) {
let m = p.start();
p.bump(IDENTIFIER);
m.complete(p, TAG);
}
}
expressions::block_expr(p);
m.complete(p, RULE);
}
10 changes: 9 additions & 1 deletion src/parser/syntax_kind/generated.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ pub enum SyntaxKind {
STRINGS_KW,
CONDITION_KW,
META_KW,
PRIVATE_KW,
GLOBAL_KW,
STRING_LIT,
INT_LIT,
FLOAT_LIT,
Expand All @@ -34,6 +36,8 @@ pub enum SyntaxKind {
COMMENT,
ERROR,
RULE,
MODIFIER,
TAG,
STRINGS,
META,
CONDITION,
Expand Down Expand Up @@ -63,6 +67,8 @@ impl SyntaxKind {
| STRINGS_KW
| CONDITION_KW
| META_KW
| PRIVATE_KW
| GLOBAL_KW
)
}
pub fn is_punct(self) -> bool {
Expand All @@ -82,6 +88,8 @@ impl SyntaxKind {
"strings" => STRINGS_KW,
"condition" => CONDITION_KW,
"meta" => META_KW,
"private" => PRIVATE_KW,
"global" => GLOBAL_KW,
_ => return None,
};
Some(kw)
Expand All @@ -101,5 +109,5 @@ impl SyntaxKind {
}
}
#[macro_export]
macro_rules ! T { [:] => { $ crate :: SyntaxKind :: COLON } ; ['('] => { $ crate :: SyntaxKind :: L_PAREN } ; [')'] => { $ crate :: SyntaxKind :: R_PAREN } ; ['{'] => { $ crate :: SyntaxKind :: L_BRACE } ; ['}'] => { $ crate :: SyntaxKind :: R_BRACE } ; [,] => { $ crate :: SyntaxKind :: COMMA } ; [=] => { $ crate :: SyntaxKind :: ASSIGN } ; [and] => { $ crate :: SyntaxKind :: AND_KW } ; [or] => { $ crate :: SyntaxKind :: OR_KW } ; [not] => { $ crate :: SyntaxKind :: NOT_KW } ; [true] => { $ crate :: SyntaxKind :: TRUE_KW } ; [false] => { $ crate :: SyntaxKind :: FALSE_KW } ; [rule] => { $ crate :: SyntaxKind :: RULE_KW } ; [strings] => { $ crate :: SyntaxKind :: STRINGS_KW } ; [condition] => { $ crate :: SyntaxKind :: CONDITION_KW } ; [meta] => { $ crate :: SyntaxKind :: META_KW } ; [identifier] => { $ crate :: SyntaxKind :: IDENTIFIER } ; [variable] => { $ crate :: SyntaxKind :: VARIABLE } ; [string_lit] => { $ crate :: SyntaxKind :: STRING_LIT } ; [int_lit] => { $ crate :: SyntaxKind :: INT_LIT } ; [float_lit] => { $ crate :: SyntaxKind :: FLOAT_LIT } ; }
macro_rules ! T { [:] => { $ crate :: SyntaxKind :: COLON } ; ['('] => { $ crate :: SyntaxKind :: L_PAREN } ; [')'] => { $ crate :: SyntaxKind :: R_PAREN } ; ['{'] => { $ crate :: SyntaxKind :: L_BRACE } ; ['}'] => { $ crate :: SyntaxKind :: R_BRACE } ; [,] => { $ crate :: SyntaxKind :: COMMA } ; [=] => { $ crate :: SyntaxKind :: ASSIGN } ; [and] => { $ crate :: SyntaxKind :: AND_KW } ; [or] => { $ crate :: SyntaxKind :: OR_KW } ; [not] => { $ crate :: SyntaxKind :: NOT_KW } ; [true] => { $ crate :: SyntaxKind :: TRUE_KW } ; [false] => { $ crate :: SyntaxKind :: FALSE_KW } ; [rule] => { $ crate :: SyntaxKind :: RULE_KW } ; [strings] => { $ crate :: SyntaxKind :: STRINGS_KW } ; [condition] => { $ crate :: SyntaxKind :: CONDITION_KW } ; [meta] => { $ crate :: SyntaxKind :: META_KW } ; [private] => { $ crate :: SyntaxKind :: PRIVATE_KW } ; [global] => { $ crate :: SyntaxKind :: GLOBAL_KW } ; [identifier] => { $ crate :: SyntaxKind :: IDENTIFIER } ; [variable] => { $ crate :: SyntaxKind :: VARIABLE } ; [string_lit] => { $ crate :: SyntaxKind :: STRING_LIT } ; [int_lit] => { $ crate :: SyntaxKind :: INT_LIT } ; [float_lit] => { $ crate :: SyntaxKind :: FLOAT_LIT } ; }
pub use T;
1 change: 1 addition & 0 deletions src/syntax/ast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ pub use self::{
generated::{nodes::*, tokens::*},
operators::*,
traits::HasComments,
traits::HasModifier,
};

/// Zero runtime cost conversion to AST layer
Expand Down
93 changes: 93 additions & 0 deletions src/syntax/ast/generated/nodes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ impl SourceFile {
pub struct Rule {
pub(crate) syntax: SyntaxNode,
}
impl ast::HasModifier for Rule {}
impl ast::HasComments for Rule {}
impl Rule {
pub fn rule_token(&self) -> Option<SyntaxToken> {
Expand All @@ -30,11 +31,40 @@ impl Rule {
pub fn identifier_token(&self) -> Option<SyntaxToken> {
support::token(&self.syntax, T![identifier])
}
pub fn colon_token(&self) -> Option<SyntaxToken> {
support::token(&self.syntax, T![:])
}
pub fn tags(&self) -> AstChildren<Tag> {
support::children(&self.syntax)
}
pub fn body(&self) -> Option<BlockExpr> {
support::child(&self.syntax)
}
}

#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct Modifier {
pub(crate) syntax: SyntaxNode,
}
impl Modifier {
pub fn private_token(&self) -> Option<SyntaxToken> {
support::token(&self.syntax, T![private])
}
pub fn global_token(&self) -> Option<SyntaxToken> {
support::token(&self.syntax, T![global])
}
}

#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct Tag {
pub(crate) syntax: SyntaxNode,
}
impl Tag {
pub fn identifier_token(&self) -> Option<SyntaxToken> {
support::token(&self.syntax, T![identifier])
}
}

#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct BlockExpr {
pub(crate) syntax: SyntaxNode,
Expand Down Expand Up @@ -209,6 +239,12 @@ pub struct AnyHasComments {
pub(crate) syntax: SyntaxNode,
}
impl ast::HasComments for AnyHasComments {}

#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct AnyHasModifier {
pub(crate) syntax: SyntaxNode,
}
impl ast::HasModifier for AnyHasModifier {}
impl AstNode for SourceFile {
fn can_cast(kind: SyntaxKind) -> bool {
kind == SOURCE_FILE
Expand Down Expand Up @@ -239,6 +275,36 @@ impl AstNode for Rule {
&self.syntax
}
}
impl AstNode for Modifier {
fn can_cast(kind: SyntaxKind) -> bool {
kind == MODIFIER
}
fn cast(syntax: SyntaxNode) -> Option<Self> {
if Self::can_cast(syntax.kind()) {
Some(Self { syntax })
} else {
None
}
}
fn syntax(&self) -> &SyntaxNode {
&self.syntax
}
}
impl AstNode for Tag {
fn can_cast(kind: SyntaxKind) -> bool {
kind == TAG
}
fn cast(syntax: SyntaxNode) -> Option<Self> {
if Self::can_cast(syntax.kind()) {
Some(Self { syntax })
} else {
None
}
}
fn syntax(&self) -> &SyntaxNode {
&self.syntax
}
}
impl AstNode for BlockExpr {
fn can_cast(kind: SyntaxKind) -> bool {
kind == BLOCK_EXPR
Expand Down Expand Up @@ -457,6 +523,23 @@ impl AstNode for AnyHasComments {
&self.syntax
}
}
impl AnyHasModifier {
#[inline]
pub fn new<T: ast::HasModifier>(node: T) -> AnyHasModifier {
AnyHasModifier { syntax: node.syntax().clone() }
}
}
impl AstNode for AnyHasModifier {
fn can_cast(kind: SyntaxKind) -> bool {
matches!(kind, RULE)
}
fn cast(syntax: SyntaxNode) -> Option<Self> {
Self::can_cast(syntax.kind()).then_some(AnyHasModifier { syntax })
}
fn syntax(&self) -> &SyntaxNode {
&self.syntax
}
}
impl std::fmt::Display for Expr {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
std::fmt::Display::fmt(self.syntax(), f)
Expand All @@ -472,6 +555,16 @@ impl std::fmt::Display for Rule {
std::fmt::Display::fmt(self.syntax(), f)
}
}
impl std::fmt::Display for Modifier {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
std::fmt::Display::fmt(self.syntax(), f)
}
}
impl std::fmt::Display for Tag {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
std::fmt::Display::fmt(self.syntax(), f)
}
}
impl std::fmt::Display for BlockExpr {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
std::fmt::Display::fmt(self.syntax(), f)
Expand Down
9 changes: 8 additions & 1 deletion src/syntax/ast/traits.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,18 @@
//! to iterate over comments in the syntax tree
//! This can be easily extended to support other traits
use crate::syntax::ast::{self, AstNode};
use crate::syntax::ast::{self, support, AstNode};
use crate::syntax::syntax_node::SyntaxElementChildren;

use super::AstToken;

pub trait HasModifier: AstNode {
fn modifier(&self) -> Vec<String> {
support::children::<ast::Modifier>(self.syntax())
.map(|m| m.syntax().text().to_string())
.collect::<Vec<_>>()
}
}
pub trait HasComments: AstNode {
fn comments(&self) -> CommentIter {
CommentIter { iter: self.syntax().children_with_tokens() }
Expand Down
16 changes: 15 additions & 1 deletion src/syntax/tests/ast_src.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,25 @@ pub(crate) const KINDS_SRC: KindsSrc = KindsSrc {
(",", "COMMA"),
("=", "ASSIGN"),
],
keywords: &["and", "or", "not", "true", "false", "rule", "strings", "condition", "meta"],
keywords: &[
"and",
"or",
"not",
"true",
"false",
"rule",
"strings",
"condition",
"meta",
"private",
"global",
],
literals: &["STRING_LIT", "INT_LIT", "FLOAT_LIT"],
tokens: &["IDENTIFIER", "VARIABLE", "WHITESPACE", "COMMENT", "ERROR"],
nodes: &[
"RULE",
"MODIFIER",
"TAG",
"STRINGS",
"META",
"CONDITION",
Expand Down
Loading

0 comments on commit 9ee13b5

Please sign in to comment.