Skip to content

Commit

Permalink
Add support for multi-char keywords
Browse files Browse the repository at this point in the history
  • Loading branch information
kubukoz committed Oct 2, 2023
1 parent d117fc2 commit 1ff0060
Show file tree
Hide file tree
Showing 2 changed files with 62 additions and 16 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -64,38 +64,59 @@ object Scanner {
) = tokens ::= tok

def readSimple(
token: Char,
token: String,
tok: TokenKind,
): PartialFunction[Unit, Unit] = {
case _ if remaining.startsWith(token.toString()) =>
case _ if remaining.startsWith(token) =>
add(tok(token.toString))
remaining = remaining.drop(token.toString().length())
remaining = remaining.drop(token.length())
}

def simpleTokens(
pairings: (
Char,
String,
TokenKind,
)*
): PartialFunction[Unit, Unit] = pairings.map(readSimple.tupled).reduce(_.orElse(_))

def readOne: PartialFunction[Unit, Unit] = simpleTokens(
'.' -> TokenKind.DOT,
',' -> TokenKind.COMMA,
'#' -> TokenKind.HASH,
'[' -> TokenKind.LB,
']' -> TokenKind.RB,
'{' -> TokenKind.LBR,
'}' -> TokenKind.RBR,
':' -> TokenKind.COLON,
'=' -> TokenKind.EQ,
).orElse {
val keywords = Map(
"use" -> TokenKind.KW_USE,
"service" -> TokenKind.KW_SERVICE,
"null" -> TokenKind.KW_NULL,
"true" -> TokenKind.KW_BOOLEAN,
"false" -> TokenKind.KW_BOOLEAN,
)

def readIdent: PartialFunction[Unit, Unit] = {
case _ if remaining.head.isLetter =>
val (letters, rest) = remaining.span(ch => ch.isLetterOrDigit || ch == '_')
add(TokenKind.IDENT(letters))

keywords.get(letters) match {
case Some(kind) =>
// we matched a keyword, return it.
add(kind(letters))
case None =>
// normal ident
add(TokenKind.IDENT(letters))
}

remaining = rest
}

def readPunctuation: PartialFunction[Unit, Unit] = simpleTokens(
"." -> TokenKind.DOT,
"," -> TokenKind.COMMA,
"#" -> TokenKind.HASH,
"[" -> TokenKind.LB,
"]" -> TokenKind.RB,
"{" -> TokenKind.LBR,
"}" -> TokenKind.RBR,
":" -> TokenKind.COLON,
"=" -> TokenKind.EQ,
)

def readOne: PartialFunction[Unit, Unit] = readIdent.orElse(readPunctuation)

// split "whitespace" string into chains of contiguous newlines OR whitespace characters.
def whitespaceChains(
whitespace: String
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,12 @@ object ScannerTests extends SimpleIOSuite with Checkers {
scanTest(":")(List(TokenKind.COLON(":")))
scanTest("=")(List(TokenKind.EQ("=")))
scanTest("a")(List(TokenKind.IDENT("a")))
scanTest("use")(List(TokenKind.KW_USE("use")))
scanTest("service")(List(TokenKind.KW_SERVICE("service")))
scanTest("null")(List(TokenKind.KW_NULL("null")))
scanTest("true")(List(TokenKind.KW_BOOLEAN("true")))
scanTest("false")(List(TokenKind.KW_BOOLEAN("false")))
// todo: number, string

// idents
scanTest("abcdef")(List(TokenKind.IDENT("abcdef")))
Expand All @@ -117,6 +123,18 @@ object ScannerTests extends SimpleIOSuite with Checkers {
)
)

scanTest(explicitName = "Identifier similar to a keyword - prefix", input = "notfalse")(
List(
TokenKind.IDENT("notfalse")
)
)

scanTest(explicitName = "Identifier similar to a keyword - suffix", input = "falsely")(
List(
TokenKind.IDENT("falsely")
)
)

// whitespace
scanTest(" ")(List(TokenKind.SPACE(" ")))
scanTest("\n")(List(TokenKind.NEWLINE("\n")))
Expand Down Expand Up @@ -199,6 +217,13 @@ object ScannerTests extends SimpleIOSuite with Checkers {
)
)

scanTest(explicitName = "Error tokens before a multi-char keyword", input = "--false")(
List(
TokenKind.Error("--"),
TokenKind.KW_BOOLEAN("false"),
)
)

// complex cases

scanTestReverse(
Expand Down

0 comments on commit 1ff0060

Please sign in to comment.