From 1ff0060ed2c66bf271a8f67e4b8521023fa590ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Koz=C5=82owski?= Date: Mon, 2 Oct 2023 03:26:12 +0200 Subject: [PATCH] Add support for multi-char keywords --- .../smithyql/parser/v2/scanner.scala | 53 +++++++++++++------ .../smithyql/parser/v2/ScannerTests.scala | 25 +++++++++ 2 files changed, 62 insertions(+), 16 deletions(-) diff --git a/modules/parser/src/main/scala/playground/smithyql/parser/v2/scanner.scala b/modules/parser/src/main/scala/playground/smithyql/parser/v2/scanner.scala index 6fea2b15..e3a10929 100644 --- a/modules/parser/src/main/scala/playground/smithyql/parser/v2/scanner.scala +++ b/modules/parser/src/main/scala/playground/smithyql/parser/v2/scanner.scala @@ -64,38 +64,59 @@ object Scanner { ) = tokens ::= tok def readSimple( - token: Char, + token: String, tok: TokenKind, ): PartialFunction[Unit, Unit] = { - case _ if remaining.startsWith(token.toString()) => + case _ if remaining.startsWith(token) => add(tok(token.toString)) - remaining = remaining.drop(token.toString().length()) + remaining = remaining.drop(token.length()) } def simpleTokens( pairings: ( - Char, + String, TokenKind, )* ): PartialFunction[Unit, Unit] = pairings.map(readSimple.tupled).reduce(_.orElse(_)) - def readOne: PartialFunction[Unit, Unit] = simpleTokens( - '.' -> TokenKind.DOT, - ',' -> TokenKind.COMMA, - '#' -> TokenKind.HASH, - '[' -> TokenKind.LB, - ']' -> TokenKind.RB, - '{' -> TokenKind.LBR, - '}' -> TokenKind.RBR, - ':' -> TokenKind.COLON, - '=' -> TokenKind.EQ, - ).orElse { + val keywords = Map( + "use" -> TokenKind.KW_USE, + "service" -> TokenKind.KW_SERVICE, + "null" -> TokenKind.KW_NULL, + "true" -> TokenKind.KW_BOOLEAN, + "false" -> TokenKind.KW_BOOLEAN, + ) + + def readIdent: PartialFunction[Unit, Unit] = { case _ if remaining.head.isLetter => val (letters, rest) = remaining.span(ch => ch.isLetterOrDigit || ch == '_') - add(TokenKind.IDENT(letters)) + + keywords.get(letters) match { + case Some(kind) => + // we matched a keyword, return it. + add(kind(letters)) + case None => + // normal ident + add(TokenKind.IDENT(letters)) + } + remaining = rest } + def readPunctuation: PartialFunction[Unit, Unit] = simpleTokens( + "." -> TokenKind.DOT, + "," -> TokenKind.COMMA, + "#" -> TokenKind.HASH, + "[" -> TokenKind.LB, + "]" -> TokenKind.RB, + "{" -> TokenKind.LBR, + "}" -> TokenKind.RBR, + ":" -> TokenKind.COLON, + "=" -> TokenKind.EQ, + ) + + def readOne: PartialFunction[Unit, Unit] = readIdent.orElse(readPunctuation) + // split "whitespace" string into chains of contiguous newlines OR whitespace characters. def whitespaceChains( whitespace: String diff --git a/modules/parser/src/test/scala/playground/smithyql/parser/v2/ScannerTests.scala b/modules/parser/src/test/scala/playground/smithyql/parser/v2/ScannerTests.scala index f15b793d..0eaa7271 100644 --- a/modules/parser/src/test/scala/playground/smithyql/parser/v2/ScannerTests.scala +++ b/modules/parser/src/test/scala/playground/smithyql/parser/v2/ScannerTests.scala @@ -97,6 +97,12 @@ object ScannerTests extends SimpleIOSuite with Checkers { scanTest(":")(List(TokenKind.COLON(":"))) scanTest("=")(List(TokenKind.EQ("="))) scanTest("a")(List(TokenKind.IDENT("a"))) + scanTest("use")(List(TokenKind.KW_USE("use"))) + scanTest("service")(List(TokenKind.KW_SERVICE("service"))) + scanTest("null")(List(TokenKind.KW_NULL("null"))) + scanTest("true")(List(TokenKind.KW_BOOLEAN("true"))) + scanTest("false")(List(TokenKind.KW_BOOLEAN("false"))) + // todo: number, string // idents scanTest("abcdef")(List(TokenKind.IDENT("abcdef"))) @@ -117,6 +123,18 @@ object ScannerTests extends SimpleIOSuite with Checkers { ) ) + scanTest(explicitName = "Identifier similar to a keyword - prefix", input = "notfalse")( + List( + TokenKind.IDENT("notfalse") + ) + ) + + scanTest(explicitName = "Identifier similar to a keyword - suffix", input = "falsely")( + List( + TokenKind.IDENT("falsely") + ) + ) + // whitespace scanTest(" ")(List(TokenKind.SPACE(" "))) scanTest("\n")(List(TokenKind.NEWLINE("\n"))) @@ -199,6 +217,13 @@ object ScannerTests extends SimpleIOSuite with Checkers { ) ) + scanTest(explicitName = "Error tokens before a multi-char keyword", input = "--false")( + List( + TokenKind.Error("--"), + TokenKind.KW_BOOLEAN("false"), + ) + ) + // complex cases scanTestReverse(