From 2987fa69ff9229b8870f4d54a4d78047b2400e78 Mon Sep 17 00:00:00 2001 From: vighnesh153 Date: Wed, 7 Feb 2024 18:59:23 +0530 Subject: [PATCH] feat: add double-equals lexing and fix bug in identifier lexing --- .../javalang/common/lexer/Lexer.kt | 16 +++++++-- .../javalang/common/lexer/LexerTest.kt | 34 ++++++++++++++++--- 2 files changed, 43 insertions(+), 7 deletions(-) diff --git a/jvm-tools/interpreters/languages/java-lang-common/src/main/kotlin/interpreters/javalang/common/lexer/Lexer.kt b/jvm-tools/interpreters/languages/java-lang-common/src/main/kotlin/interpreters/javalang/common/lexer/Lexer.kt index dec1e05c..5b3bf5f5 100644 --- a/jvm-tools/interpreters/languages/java-lang-common/src/main/kotlin/interpreters/javalang/common/lexer/Lexer.kt +++ b/jvm-tools/interpreters/languages/java-lang-common/src/main/kotlin/interpreters/javalang/common/lexer/Lexer.kt @@ -27,7 +27,16 @@ fun Lexer.nextToken(): Token { // todo: add row and column number in the token when (currentCharacter) { - '=' -> t = Token(tokenType = TokenType.EQUALS, tokenLiteral = TokenType.EQUALS.value) + '=' -> { + val p = peekCharacter() + t = if (p == '=') { + readNextCharacter() + Token(tokenType = TokenType.DOUBLE_EQUALS, tokenLiteral = TokenType.DOUBLE_EQUALS.value) + } else { + Token(tokenType = TokenType.EQUALS, tokenLiteral = TokenType.EQUALS.value) + } + } + ',' -> t = Token(tokenType = TokenType.COMMA, tokenLiteral = TokenType.COMMA.value) '+' -> t = Token(tokenType = TokenType.PLUS, tokenLiteral = TokenType.PLUS.value) ';' -> t = Token(tokenType = TokenType.SEMICOLON, tokenLiteral = TokenType.SEMICOLON.value) @@ -62,7 +71,9 @@ fun Lexer.nextToken(): Token { Char.MIN_VALUE -> t = Token.EOF else -> { if (currentCharacter.isAcceptableIdentifierStart()) { - t = Token( + // this return is necessary to avoid the unnecessary readNextCharacter + // call after when block + return Token( tokenType = TokenType.IDENTIFIER, tokenLiteral = readIdentifier() ) @@ -70,6 +81,7 @@ fun Lexer.nextToken(): Token { // read integer // read float // read double + // todo: return token } } } diff --git a/jvm-tools/interpreters/languages/java-lang-common/src/test/kotlin/interpreters/javalang/common/lexer/LexerTest.kt b/jvm-tools/interpreters/languages/java-lang-common/src/test/kotlin/interpreters/javalang/common/lexer/LexerTest.kt index ea6d6059..2412d1ff 100644 --- a/jvm-tools/interpreters/languages/java-lang-common/src/test/kotlin/interpreters/javalang/common/lexer/LexerTest.kt +++ b/jvm-tools/interpreters/languages/java-lang-common/src/test/kotlin/interpreters/javalang/common/lexer/LexerTest.kt @@ -9,12 +9,22 @@ class LexerTest { @Test fun lexerNextToken() { val input = """ -,;@+-*/\%!&|^?:.~'"` + +,;@+-*/\%!&|^?:.~ + +'"` + (){}[]<> + abc _aa a123 __11 _ + +a == b== c + """.trimIndent() val expectedTokens = listOf( + + // ,;@+-*/\%!&|^?:.~ ExpectedToken(id = 0, tokenType = TokenType.COMMA, tokenLiteral = TokenType.COMMA.value), ExpectedToken(id = 1, tokenType = TokenType.SEMICOLON, tokenLiteral = TokenType.SEMICOLON.value), ExpectedToken(id = 2, tokenType = TokenType.AT_SIGN, tokenLiteral = TokenType.AT_SIGN.value), @@ -32,9 +42,13 @@ abc _aa a123 __11 _ ExpectedToken(id = 14, tokenType = TokenType.COLON, tokenLiteral = TokenType.COLON.value), ExpectedToken(id = 15, tokenType = TokenType.DOT, tokenLiteral = TokenType.DOT.value), ExpectedToken(id = 16, tokenType = TokenType.TILDE, tokenLiteral = TokenType.TILDE.value), + + // '"` ExpectedToken(id = 17, tokenType = TokenType.SINGLE_QUOTE, tokenLiteral = TokenType.SINGLE_QUOTE.value), ExpectedToken(id = 18, tokenType = TokenType.DOUBLE_QUOTE, tokenLiteral = TokenType.DOUBLE_QUOTE.value), ExpectedToken(id = 19, tokenType = TokenType.BACKTICK, tokenLiteral = TokenType.BACKTICK.value), + + // (){}[]<> ExpectedToken( id = 20, tokenType = TokenType.LEFT_PARENTHESIS, @@ -75,12 +89,23 @@ abc _aa a123 __11 _ tokenType = TokenType.RIGHT_ANGLE_BRACKET, tokenLiteral = TokenType.RIGHT_ANGLE_BRACKET.value ), + + // abc _aa a123 __11 _ ExpectedToken(id = 28, tokenType = TokenType.IDENTIFIER, tokenLiteral = "abc"), ExpectedToken(id = 29, tokenType = TokenType.IDENTIFIER, tokenLiteral = "_aa"), ExpectedToken(id = 30, tokenType = TokenType.IDENTIFIER, tokenLiteral = "a123"), ExpectedToken(id = 31, tokenType = TokenType.IDENTIFIER, tokenLiteral = "__11"), ExpectedToken(id = 32, tokenType = TokenType.IDENTIFIER, tokenLiteral = "_"), - ExpectedToken(id = 33, tokenType = Token.EOF.tokenType, tokenLiteral = Token.EOF.tokenLiteral), + + // a == b== c + ExpectedToken(id = 33, tokenType = TokenType.IDENTIFIER, tokenLiteral = "a"), + ExpectedToken(id = 34, tokenType = TokenType.DOUBLE_EQUALS, tokenLiteral = "=="), + ExpectedToken(id = 35, tokenType = TokenType.IDENTIFIER, tokenLiteral = "b"), + ExpectedToken(id = 36, tokenType = TokenType.DOUBLE_EQUALS, tokenLiteral = "=="), + ExpectedToken(id = 37, tokenType = TokenType.IDENTIFIER, tokenLiteral = "c"), + + // eof + ExpectedToken(id = -1, tokenType = Token.EOF.tokenType, tokenLiteral = Token.EOF.tokenLiteral), ) // In the expectedTokens, if ids are not unique, throw error @@ -93,9 +118,8 @@ abc _aa a123 __11 _ for (expectedToken in expectedTokens) { val actualToken = lexer.nextToken() - assertEquals("${1}", "1") - assertEquals(expectedToken.tokenType.name, actualToken.tokenType.name) - assertEquals(expectedToken.tokenLiteral, actualToken.tokenLiteral) + assertEquals(expectedToken.tokenType.name, actualToken.tokenType.name, "id: ${expectedToken.id}") + assertEquals(expectedToken.tokenLiteral, actualToken.tokenLiteral, "id: ${expectedToken.id}") } } }