From 1269382858da489d09e475894ec7adc29e078258 Mon Sep 17 00:00:00 2001 From: vighnesh153 Date: Mon, 19 Feb 2024 19:38:05 +0530 Subject: [PATCH] refactor: move lexer functions to separate files --- .../javalang/common/lexer/Lexer.kt | 511 ------------------ .../javalang/common/lexer/charExtensions.kt | 7 + .../javalang/common/lexer/createLexerError.kt | 13 + .../javalang/common/lexer/createNewToken.kt | 18 + .../common/lexer/lineAndColumnNumber.kt | 20 + .../javalang/common/lexer/nextToken.kt | 290 ++++++++++ .../javalang/common/lexer/readComments.kt | 48 ++ .../javalang/common/lexer/readIdentifier.kt | 12 + .../javalang/common/lexer/readLiterals.kt | 129 +++++ .../common/lexer/LineAndColumnNumberTest.kt | 7 - 10 files changed, 537 insertions(+), 518 deletions(-) create mode 100644 jvm-tools/interpreters/languages/java-lang-common/src/main/kotlin/interpreters/javalang/common/lexer/charExtensions.kt create mode 100644 jvm-tools/interpreters/languages/java-lang-common/src/main/kotlin/interpreters/javalang/common/lexer/createLexerError.kt create mode 100644 jvm-tools/interpreters/languages/java-lang-common/src/main/kotlin/interpreters/javalang/common/lexer/createNewToken.kt create mode 100644 jvm-tools/interpreters/languages/java-lang-common/src/main/kotlin/interpreters/javalang/common/lexer/lineAndColumnNumber.kt create mode 100644 jvm-tools/interpreters/languages/java-lang-common/src/main/kotlin/interpreters/javalang/common/lexer/nextToken.kt create mode 100644 jvm-tools/interpreters/languages/java-lang-common/src/main/kotlin/interpreters/javalang/common/lexer/readComments.kt create mode 100644 jvm-tools/interpreters/languages/java-lang-common/src/main/kotlin/interpreters/javalang/common/lexer/readIdentifier.kt create mode 100644 jvm-tools/interpreters/languages/java-lang-common/src/main/kotlin/interpreters/javalang/common/lexer/readLiterals.kt delete mode 100644 jvm-tools/interpreters/languages/java-lang-common/src/test/kotlin/interpreters/javalang/common/lexer/LineAndColumnNumberTest.kt diff --git a/jvm-tools/interpreters/languages/java-lang-common/src/main/kotlin/interpreters/javalang/common/lexer/Lexer.kt b/jvm-tools/interpreters/languages/java-lang-common/src/main/kotlin/interpreters/javalang/common/lexer/Lexer.kt index 23db48b6..7cc9eae8 100644 --- a/jvm-tools/interpreters/languages/java-lang-common/src/main/kotlin/interpreters/javalang/common/lexer/Lexer.kt +++ b/jvm-tools/interpreters/languages/java-lang-common/src/main/kotlin/interpreters/javalang/common/lexer/Lexer.kt @@ -38,339 +38,6 @@ class Lexer constructor(internal val input: String) { } } -fun Lexer.lineNumber(): Int { - if (currentCharacter == EOF_CHARACTER) { - return input.split("\n").size - } - - return input.slice(0..currentIndex).count { it == '\n' } + 1 -} - -fun Lexer.columnNumber(): Int { - if (currentCharacter == EOF_CHARACTER) { - return 0 - } - - val linesUptoCurrentIndex = input.slice(0..currentIndex).split("\n").toMutableList() - linesUptoCurrentIndex.removeLast() - // currentIndex - (count of characters upto previous line) - return currentIndex + 1 - linesUptoCurrentIndex.sumOf { it.length + 1 } -} - -fun Lexer.createNewToken(tokenType: TokenType, tokenLiteral: String): Token { - val lineNumber = tokenStartLineNumber - val columnNumber = tokenStartColumnNumber - if (lineNumber == null || columnNumber == null) { - throw Error("lineNumber and columnNumber are not initialized!!") - } - return Token( - tokenType = tokenType, - tokenLiteral = tokenLiteral, - lineNumber = lineNumber, - columnNumber = columnNumber, - ) -} - -fun Lexer.nextToken(): Token { - lateinit var t: Token - - skipWhitespace() - - tokenStartLineNumber = lineNumber() - tokenStartColumnNumber = columnNumber() - - when (currentCharacter) { - '=' -> { - t = if (peekCharacter() == '=') { - readNextCharacter() - createNewToken(tokenType = TokenType.DOUBLE_EQUALS, tokenLiteral = TokenType.DOUBLE_EQUALS.value) - } else { - createNewToken(tokenType = TokenType.EQUALS, tokenLiteral = TokenType.EQUALS.value) - } - } - - ',' -> t = createNewToken(tokenType = TokenType.COMMA, tokenLiteral = TokenType.COMMA.value) - '+' -> { - val peek = peekCharacter() - t = when (peek) { - '+' -> { - readNextCharacter() - createNewToken(tokenType = TokenType.INCREMENT, tokenLiteral = TokenType.INCREMENT.value) - } - - '=' -> { - readNextCharacter() - createNewToken(tokenType = TokenType.PLUS_EQUALS, tokenLiteral = TokenType.PLUS_EQUALS.value) - } - - else -> createNewToken(tokenType = TokenType.PLUS, tokenLiteral = TokenType.PLUS.value) - } - } - - ';' -> t = createNewToken(tokenType = TokenType.SEMICOLON, tokenLiteral = TokenType.SEMICOLON.value) - '@' -> t = createNewToken(tokenType = TokenType.AT_SIGN, tokenLiteral = TokenType.AT_SIGN.value) - - '-' -> { - val peek = peekCharacter() - t = when (peek) { - '-' -> { - readNextCharacter() - createNewToken(tokenType = TokenType.DECREMENT, tokenLiteral = TokenType.DECREMENT.value) - } - - '=' -> { - readNextCharacter() - createNewToken(tokenType = TokenType.MINUS_EQUALS, tokenLiteral = TokenType.MINUS_EQUALS.value) - } - - else -> createNewToken(tokenType = TokenType.MINUS, tokenLiteral = TokenType.MINUS.value) - } - } - - '*' -> { - val peek = peekCharacter() - t = when (peek) { - '=' -> { - readNextCharacter() - createNewToken( - tokenType = TokenType.ASTERISK_EQUALS, - tokenLiteral = TokenType.ASTERISK_EQUALS.value - ) - } - - else -> createNewToken(tokenType = TokenType.ASTERISK, tokenLiteral = TokenType.ASTERISK.value) - } - } - - '/' -> { - val peek = peekCharacter() - t = when (peek) { - '=' -> { - readNextCharacter() - createNewToken( - tokenType = TokenType.FORWARD_SLASH_EQUALS, - tokenLiteral = TokenType.FORWARD_SLASH_EQUALS.value - ) - } - - '/' -> createNewToken(tokenType = TokenType.SINGLE_LINE_COMMENT, tokenLiteral = readSingleLineComment()) - '*' -> createNewToken(tokenType = TokenType.MULTI_LINE_COMMENT, tokenLiteral = readMultilineComment()) - - else -> createNewToken( - tokenType = TokenType.FORWARD_SLASH, - tokenLiteral = TokenType.FORWARD_SLASH.value - ) - } - } - - '\\' -> t = createNewToken(tokenType = TokenType.BACK_SLASH, tokenLiteral = TokenType.BACK_SLASH.value) - '%' -> { - val peek = peekCharacter() - t = when (peek) { - '=' -> { - readNextCharacter() - createNewToken(tokenType = TokenType.MODULUS_EQUALS, tokenLiteral = TokenType.MODULUS_EQUALS.value) - } - - else -> createNewToken(tokenType = TokenType.MODULUS, tokenLiteral = TokenType.MODULUS.value) - } - } - - '!' -> { - val peek = peekCharacter() - t = when (peek) { - '=' -> { - readNextCharacter() - createNewToken(tokenType = TokenType.BANG_EQUALS, tokenLiteral = TokenType.BANG_EQUALS.value) - } - - else -> createNewToken(tokenType = TokenType.BANG, tokenLiteral = TokenType.BANG.value) - } - } - - '&' -> { - val peek = peekCharacter() - t = when (peek) { - '&' -> { - readNextCharacter() - createNewToken( - tokenType = TokenType.DOUBLE_AMPERSAND, - tokenLiteral = TokenType.DOUBLE_AMPERSAND.value - ) - } - - else -> createNewToken(tokenType = TokenType.AMPERSAND, tokenLiteral = TokenType.AMPERSAND.value) - } - } - - '|' -> { - val peek = peekCharacter() - t = when (peek) { - '|' -> { - readNextCharacter() - createNewToken( - tokenType = TokenType.DOUBLE_VERTICAL_BAR, - tokenLiteral = TokenType.DOUBLE_VERTICAL_BAR.value - ) - } - - else -> createNewToken(tokenType = TokenType.VERTICAL_BAR, tokenLiteral = TokenType.VERTICAL_BAR.value) - } - } - - '^' -> { - val peek = peekCharacter() - t = when (peek) { - '=' -> { - readNextCharacter() - createNewToken(tokenType = TokenType.CARET_EQUALS, tokenLiteral = TokenType.CARET_EQUALS.value) - } - - else -> createNewToken(tokenType = TokenType.CARET, tokenLiteral = TokenType.CARET.value) - } - } - - '?' -> t = createNewToken(tokenType = TokenType.QUESTION, tokenLiteral = TokenType.QUESTION.value) - ':' -> t = createNewToken(tokenType = TokenType.COLON, tokenLiteral = TokenType.COLON.value) - '.' -> { - t = if (peekCharacter().isDigit()) { - readNumberLiteral() - } else { - createNewToken(tokenType = TokenType.DOT, tokenLiteral = TokenType.DOT.value) - } - } - - '~' -> t = createNewToken(tokenType = TokenType.TILDE, tokenLiteral = TokenType.TILDE.value) - '\'' -> t = createNewToken(tokenType = TokenType.CHARACTER_LITERAL, tokenLiteral = readCharacterLiteral()) - '"' -> t = createNewToken(tokenType = TokenType.STRING_LITERAL, tokenLiteral = readStringLiteral()) - '(' -> t = - createNewToken(tokenType = TokenType.LEFT_PARENTHESIS, tokenLiteral = TokenType.LEFT_PARENTHESIS.value) - - ')' -> t = - createNewToken(tokenType = TokenType.RIGHT_PARENTHESIS, tokenLiteral = TokenType.RIGHT_PARENTHESIS.value) - - '{' -> t = - createNewToken(tokenType = TokenType.LEFT_CURLY_BRACE, tokenLiteral = TokenType.LEFT_CURLY_BRACE.value) - - '}' -> t = - createNewToken(tokenType = TokenType.RIGHT_CURLY_BRACE, tokenLiteral = TokenType.RIGHT_CURLY_BRACE.value) - - '[' -> t = createNewToken( - tokenType = TokenType.LEFT_SQUARE_BRACKET, - tokenLiteral = TokenType.LEFT_SQUARE_BRACKET.value - ) - - ']' -> t = - createNewToken( - tokenType = TokenType.RIGHT_SQUARE_BRACKET, - tokenLiteral = TokenType.RIGHT_SQUARE_BRACKET.value - ) - - '<' -> { - val peek = peekCharacter() - t = when (peek) { - '<' -> { - readNextCharacter() - createNewToken( - tokenType = TokenType.DOUBLE_LEFT_ANGLE_BRACKET, - tokenLiteral = TokenType.DOUBLE_LEFT_ANGLE_BRACKET.value - ) - } - - '=' -> { - readNextCharacter() - createNewToken( - tokenType = TokenType.LEFT_ANGLE_BRACKET_EQUALS, - tokenLiteral = TokenType.LEFT_ANGLE_BRACKET_EQUALS.value - ) - } - - else -> createNewToken( - tokenType = TokenType.LEFT_ANGLE_BRACKET, - tokenLiteral = TokenType.LEFT_ANGLE_BRACKET.value - ) - } - } - - '>' -> { - val peek = peekCharacter() - t = when (peek) { - '>' -> { - readNextCharacter() - if (peekCharacter() == '>') { - readNextCharacter() - createNewToken( - tokenType = TokenType.TRIPLE_RIGHT_ANGLE_BRACKET, - tokenLiteral = TokenType.TRIPLE_RIGHT_ANGLE_BRACKET.value - ) - } else { - createNewToken( - tokenType = TokenType.DOUBLE_RIGHT_ANGLE_BRACKET, - tokenLiteral = TokenType.DOUBLE_RIGHT_ANGLE_BRACKET.value - ) - } - } - - '=' -> { - readNextCharacter() - createNewToken( - tokenType = TokenType.RIGHT_ANGLE_BRACKET_EQUALS, - tokenLiteral = TokenType.RIGHT_ANGLE_BRACKET_EQUALS.value - ) - } - - else -> createNewToken( - tokenType = TokenType.RIGHT_ANGLE_BRACKET, - tokenLiteral = TokenType.RIGHT_ANGLE_BRACKET.value - ) - } - } - - EOF_CHARACTER -> t = Token.buildEOF( - lineNumber = tokenStartLineNumber!!, - columnNumber = tokenStartColumnNumber!!, - ) - - else -> { - t = if (currentCharacter.isAcceptableIdentifierStart()) { - val identifier = readIdentifier() - // this return is necessary to avoid the unnecessary readNextCharacter - // call after when block - return createNewToken( - tokenType = lookupIdentifier(identifier), - tokenLiteral = identifier, - ) - } else if (currentCharacter.isDigit()) { - readNumberLiteral() - } else { - createNewToken(tokenType = TokenType.ILLEGAL, tokenLiteral = "$currentCharacter") - } - } - } - - readNextCharacter() - - tokenStartLineNumber = null - tokenStartColumnNumber = null - - return t -} - -internal fun Char.isUnderscore(): Boolean = this == '_' -internal fun Char.isAcceptableIdentifierStart(): Boolean = isUnderscore() || isLetter() -internal fun Char.isAcceptableIdentifierNonStart(): Boolean = isAcceptableIdentifierStart() || isDigit() - -internal fun Lexer.readIdentifier(): String { - val startIndex = currentIndex - if (!currentCharacter.isAcceptableIdentifierStart()) { - throw Error("You should not attempt to read an identifier which doesn't start with '_' or a letter") - } - while (currentCharacter.isAcceptableIdentifierNonStart()) { - readNextCharacter() - } - return input.slice(startIndex..= input.length) { return EOF_CHARACTER @@ -385,181 +52,3 @@ internal fun Lexer.skipWhitespace() { } } -internal fun Lexer.createLexerError(errorMessage: String): InterpreterError { - return InterpreterError( - errorMessage = errorMessage, - errorType = InterpreterErrorType.LEXER_ERROR, - lineNumber = lineNumber(), - columnNumber = columnNumber(), - ) -} - -internal fun Lexer.readCharacterLiteral(): String { - if (currentCharacter != SINGLE_QUOTE) { - throw Error("You should not attempt to read a character literal if it doesn't start with \"'\"") - } - readNextCharacter() - val startIndex = currentIndex - - while (currentCharacter != SINGLE_QUOTE && currentCharacter != EOF_CHARACTER) { - if (currentCharacter == '\\') { - readEscapeSequence() - } - readNextCharacter() - } - if (currentCharacter == EOF_CHARACTER) { - addError( - createLexerError("Unclosed character literal") - ) - return " Unclosed character literal" - } - // current character is ending single quote - val character = input.slice(startIndex.. Invalid character sequence" - } - return character -} - -internal fun Lexer.readStringLiteral(): String { - if (currentCharacter != DOUBLE_QUOTE) { - throw Error("You should not attempt to read a string literal if it doesn't start with '\"'") - } - readNextCharacter() - val startIndex = currentIndex - - while (currentCharacter != DOUBLE_QUOTE && currentCharacter != EOF_CHARACTER) { - if (currentCharacter == '\\') { - readEscapeSequence() - } - readNextCharacter() - } - if (currentCharacter == EOF_CHARACTER) { - addError( - createLexerError("Unclosed string literal") - ) - return " Unclosed string literal" - } - // current character is ending double quote - return input.slice(startIndex.. Unclosed multiline comment" - } - val endIndex = currentIndex - readNextCharacter() - return input.slice(startIndex.. createNewToken( - tokenType = TokenType.FLOAT_LITERAL, - tokenLiteral = input.slice(startIndex..currentIndex), - ) - - else -> createNewToken( - tokenType = TokenType.DOUBLE_LITERAL, - tokenLiteral = input.slice(startIndex..currentIndex), - ) - } - } - // In Kotlin, check if this is decimal point or object access point - containsDecimalPoint = true - } else if (peek.lowercase() == "f") { - readNextCharacter() - return createNewToken( - tokenType = TokenType.FLOAT_LITERAL, - tokenLiteral = input.slice(startIndex..currentIndex), - ) - } else if (peek.lowercase() == "l") { - readNextCharacter() - if (containsDecimalPoint) { - addError( - createLexerError("A floating point number cannot be long at the same time") - ) - return createNewToken( - tokenType = TokenType.ILLEGAL, - tokenLiteral = "$currentCharacter" - ) - } - return createNewToken( - tokenType = TokenType.LONG_LITERAL, - tokenLiteral = input.slice(startIndex..currentIndex) - ) - } else if (peek.isDigit()) { - // continue with the loop - } else { - break - } - - readNextCharacter() - peek = peekCharacter() - } - - if (containsDecimalPoint) { - return createNewToken( - tokenType = TokenType.DOUBLE_LITERAL, - tokenLiteral = input.slice(startIndex..currentIndex) - ) - } - return createNewToken( - tokenType = TokenType.INTEGER_LITERAL, - tokenLiteral = input.slice(startIndex..currentIndex) - ) -} diff --git a/jvm-tools/interpreters/languages/java-lang-common/src/main/kotlin/interpreters/javalang/common/lexer/charExtensions.kt b/jvm-tools/interpreters/languages/java-lang-common/src/main/kotlin/interpreters/javalang/common/lexer/charExtensions.kt new file mode 100644 index 00000000..82c7df2e --- /dev/null +++ b/jvm-tools/interpreters/languages/java-lang-common/src/main/kotlin/interpreters/javalang/common/lexer/charExtensions.kt @@ -0,0 +1,7 @@ +package interpreters.javalang.common.lexer + +internal fun Char.isUnderscore(): Boolean = this == '_' + +internal fun Char.isAcceptableIdentifierStart(): Boolean = isUnderscore() || isLetter() + +internal fun Char.isAcceptableIdentifierNonStart(): Boolean = isAcceptableIdentifierStart() || isDigit() diff --git a/jvm-tools/interpreters/languages/java-lang-common/src/main/kotlin/interpreters/javalang/common/lexer/createLexerError.kt b/jvm-tools/interpreters/languages/java-lang-common/src/main/kotlin/interpreters/javalang/common/lexer/createLexerError.kt new file mode 100644 index 00000000..999827e0 --- /dev/null +++ b/jvm-tools/interpreters/languages/java-lang-common/src/main/kotlin/interpreters/javalang/common/lexer/createLexerError.kt @@ -0,0 +1,13 @@ +package interpreters.javalang.common.lexer + +import interpreters.javalang.common.errors.InterpreterError +import interpreters.javalang.common.errors.InterpreterErrorType + +internal fun Lexer.createLexerError(errorMessage: String): InterpreterError { + return InterpreterError( + errorMessage = errorMessage, + errorType = InterpreterErrorType.LEXER_ERROR, + lineNumber = lineNumber(), + columnNumber = columnNumber(), + ) +} diff --git a/jvm-tools/interpreters/languages/java-lang-common/src/main/kotlin/interpreters/javalang/common/lexer/createNewToken.kt b/jvm-tools/interpreters/languages/java-lang-common/src/main/kotlin/interpreters/javalang/common/lexer/createNewToken.kt new file mode 100644 index 00000000..0d8d475b --- /dev/null +++ b/jvm-tools/interpreters/languages/java-lang-common/src/main/kotlin/interpreters/javalang/common/lexer/createNewToken.kt @@ -0,0 +1,18 @@ +package interpreters.javalang.common.lexer + +import interpreters.javalang.common.tokens.Token +import interpreters.javalang.common.tokens.TokenType + +fun Lexer.createNewToken(tokenType: TokenType, tokenLiteral: String): Token { + val lineNumber = tokenStartLineNumber + val columnNumber = tokenStartColumnNumber + if (lineNumber == null || columnNumber == null) { + throw Error("lineNumber and columnNumber are not initialized!!") + } + return Token( + tokenType = tokenType, + tokenLiteral = tokenLiteral, + lineNumber = lineNumber, + columnNumber = columnNumber, + ) +} diff --git a/jvm-tools/interpreters/languages/java-lang-common/src/main/kotlin/interpreters/javalang/common/lexer/lineAndColumnNumber.kt b/jvm-tools/interpreters/languages/java-lang-common/src/main/kotlin/interpreters/javalang/common/lexer/lineAndColumnNumber.kt new file mode 100644 index 00000000..eeab4375 --- /dev/null +++ b/jvm-tools/interpreters/languages/java-lang-common/src/main/kotlin/interpreters/javalang/common/lexer/lineAndColumnNumber.kt @@ -0,0 +1,20 @@ +package interpreters.javalang.common.lexer + +fun Lexer.lineNumber(): Int { + if (currentCharacter == EOF_CHARACTER) { + return input.split("\n").size + } + + return input.slice(0..currentIndex).count { it == '\n' } + 1 +} + +fun Lexer.columnNumber(): Int { + if (currentCharacter == EOF_CHARACTER) { + return 0 + } + + val linesUptoCurrentIndex = input.slice(0..currentIndex).split("\n").toMutableList() + linesUptoCurrentIndex.removeLast() + // currentIndex - (count of characters upto previous line) + return currentIndex + 1 - linesUptoCurrentIndex.sumOf { it.length + 1 } +} diff --git a/jvm-tools/interpreters/languages/java-lang-common/src/main/kotlin/interpreters/javalang/common/lexer/nextToken.kt b/jvm-tools/interpreters/languages/java-lang-common/src/main/kotlin/interpreters/javalang/common/lexer/nextToken.kt new file mode 100644 index 00000000..1f67ed85 --- /dev/null +++ b/jvm-tools/interpreters/languages/java-lang-common/src/main/kotlin/interpreters/javalang/common/lexer/nextToken.kt @@ -0,0 +1,290 @@ +package interpreters.javalang.common.lexer + +import interpreters.javalang.common.tokens.Token +import interpreters.javalang.common.tokens.TokenType +import interpreters.javalang.common.tokens.lookupIdentifier + +fun Lexer.nextToken(): Token { + lateinit var t: Token + + skipWhitespace() + + tokenStartLineNumber = lineNumber() + tokenStartColumnNumber = columnNumber() + + when (currentCharacter) { + '=' -> { + t = if (peekCharacter() == '=') { + readNextCharacter() + createNewToken(tokenType = TokenType.DOUBLE_EQUALS, tokenLiteral = TokenType.DOUBLE_EQUALS.value) + } else { + createNewToken(tokenType = TokenType.EQUALS, tokenLiteral = TokenType.EQUALS.value) + } + } + + ',' -> t = createNewToken(tokenType = TokenType.COMMA, tokenLiteral = TokenType.COMMA.value) + '+' -> { + val peek = peekCharacter() + t = when (peek) { + '+' -> { + readNextCharacter() + createNewToken(tokenType = TokenType.INCREMENT, tokenLiteral = TokenType.INCREMENT.value) + } + + '=' -> { + readNextCharacter() + createNewToken(tokenType = TokenType.PLUS_EQUALS, tokenLiteral = TokenType.PLUS_EQUALS.value) + } + + else -> createNewToken(tokenType = TokenType.PLUS, tokenLiteral = TokenType.PLUS.value) + } + } + + ';' -> t = createNewToken(tokenType = TokenType.SEMICOLON, tokenLiteral = TokenType.SEMICOLON.value) + '@' -> t = createNewToken(tokenType = TokenType.AT_SIGN, tokenLiteral = TokenType.AT_SIGN.value) + + '-' -> { + val peek = peekCharacter() + t = when (peek) { + '-' -> { + readNextCharacter() + createNewToken(tokenType = TokenType.DECREMENT, tokenLiteral = TokenType.DECREMENT.value) + } + + '=' -> { + readNextCharacter() + createNewToken(tokenType = TokenType.MINUS_EQUALS, tokenLiteral = TokenType.MINUS_EQUALS.value) + } + + else -> createNewToken(tokenType = TokenType.MINUS, tokenLiteral = TokenType.MINUS.value) + } + } + + '*' -> { + val peek = peekCharacter() + t = when (peek) { + '=' -> { + readNextCharacter() + createNewToken( + tokenType = TokenType.ASTERISK_EQUALS, + tokenLiteral = TokenType.ASTERISK_EQUALS.value + ) + } + + else -> createNewToken(tokenType = TokenType.ASTERISK, tokenLiteral = TokenType.ASTERISK.value) + } + } + + '/' -> { + val peek = peekCharacter() + t = when (peek) { + '=' -> { + readNextCharacter() + createNewToken( + tokenType = TokenType.FORWARD_SLASH_EQUALS, + tokenLiteral = TokenType.FORWARD_SLASH_EQUALS.value + ) + } + + '/' -> createNewToken(tokenType = TokenType.SINGLE_LINE_COMMENT, tokenLiteral = readSingleLineComment()) + '*' -> createNewToken(tokenType = TokenType.MULTI_LINE_COMMENT, tokenLiteral = readMultilineComment()) + + else -> createNewToken( + tokenType = TokenType.FORWARD_SLASH, + tokenLiteral = TokenType.FORWARD_SLASH.value + ) + } + } + + '\\' -> t = createNewToken(tokenType = TokenType.BACK_SLASH, tokenLiteral = TokenType.BACK_SLASH.value) + '%' -> { + val peek = peekCharacter() + t = when (peek) { + '=' -> { + readNextCharacter() + createNewToken(tokenType = TokenType.MODULUS_EQUALS, tokenLiteral = TokenType.MODULUS_EQUALS.value) + } + + else -> createNewToken(tokenType = TokenType.MODULUS, tokenLiteral = TokenType.MODULUS.value) + } + } + + '!' -> { + val peek = peekCharacter() + t = when (peek) { + '=' -> { + readNextCharacter() + createNewToken(tokenType = TokenType.BANG_EQUALS, tokenLiteral = TokenType.BANG_EQUALS.value) + } + + else -> createNewToken(tokenType = TokenType.BANG, tokenLiteral = TokenType.BANG.value) + } + } + + '&' -> { + val peek = peekCharacter() + t = when (peek) { + '&' -> { + readNextCharacter() + createNewToken( + tokenType = TokenType.DOUBLE_AMPERSAND, + tokenLiteral = TokenType.DOUBLE_AMPERSAND.value + ) + } + + else -> createNewToken(tokenType = TokenType.AMPERSAND, tokenLiteral = TokenType.AMPERSAND.value) + } + } + + '|' -> { + val peek = peekCharacter() + t = when (peek) { + '|' -> { + readNextCharacter() + createNewToken( + tokenType = TokenType.DOUBLE_VERTICAL_BAR, + tokenLiteral = TokenType.DOUBLE_VERTICAL_BAR.value + ) + } + + else -> createNewToken(tokenType = TokenType.VERTICAL_BAR, tokenLiteral = TokenType.VERTICAL_BAR.value) + } + } + + '^' -> { + val peek = peekCharacter() + t = when (peek) { + '=' -> { + readNextCharacter() + createNewToken(tokenType = TokenType.CARET_EQUALS, tokenLiteral = TokenType.CARET_EQUALS.value) + } + + else -> createNewToken(tokenType = TokenType.CARET, tokenLiteral = TokenType.CARET.value) + } + } + + '?' -> t = createNewToken(tokenType = TokenType.QUESTION, tokenLiteral = TokenType.QUESTION.value) + ':' -> t = createNewToken(tokenType = TokenType.COLON, tokenLiteral = TokenType.COLON.value) + '.' -> { + t = if (peekCharacter().isDigit()) { + readNumberLiteral() + } else { + createNewToken(tokenType = TokenType.DOT, tokenLiteral = TokenType.DOT.value) + } + } + + '~' -> t = createNewToken(tokenType = TokenType.TILDE, tokenLiteral = TokenType.TILDE.value) + '\'' -> t = createNewToken(tokenType = TokenType.CHARACTER_LITERAL, tokenLiteral = readCharacterLiteral()) + '"' -> t = createNewToken(tokenType = TokenType.STRING_LITERAL, tokenLiteral = readStringLiteral()) + '(' -> t = + createNewToken(tokenType = TokenType.LEFT_PARENTHESIS, tokenLiteral = TokenType.LEFT_PARENTHESIS.value) + + ')' -> t = + createNewToken(tokenType = TokenType.RIGHT_PARENTHESIS, tokenLiteral = TokenType.RIGHT_PARENTHESIS.value) + + '{' -> t = + createNewToken(tokenType = TokenType.LEFT_CURLY_BRACE, tokenLiteral = TokenType.LEFT_CURLY_BRACE.value) + + '}' -> t = + createNewToken(tokenType = TokenType.RIGHT_CURLY_BRACE, tokenLiteral = TokenType.RIGHT_CURLY_BRACE.value) + + '[' -> t = createNewToken( + tokenType = TokenType.LEFT_SQUARE_BRACKET, + tokenLiteral = TokenType.LEFT_SQUARE_BRACKET.value + ) + + ']' -> t = + createNewToken( + tokenType = TokenType.RIGHT_SQUARE_BRACKET, + tokenLiteral = TokenType.RIGHT_SQUARE_BRACKET.value + ) + + '<' -> { + val peek = peekCharacter() + t = when (peek) { + '<' -> { + readNextCharacter() + createNewToken( + tokenType = TokenType.DOUBLE_LEFT_ANGLE_BRACKET, + tokenLiteral = TokenType.DOUBLE_LEFT_ANGLE_BRACKET.value + ) + } + + '=' -> { + readNextCharacter() + createNewToken( + tokenType = TokenType.LEFT_ANGLE_BRACKET_EQUALS, + tokenLiteral = TokenType.LEFT_ANGLE_BRACKET_EQUALS.value + ) + } + + else -> createNewToken( + tokenType = TokenType.LEFT_ANGLE_BRACKET, + tokenLiteral = TokenType.LEFT_ANGLE_BRACKET.value + ) + } + } + + '>' -> { + val peek = peekCharacter() + t = when (peek) { + '>' -> { + readNextCharacter() + if (peekCharacter() == '>') { + readNextCharacter() + createNewToken( + tokenType = TokenType.TRIPLE_RIGHT_ANGLE_BRACKET, + tokenLiteral = TokenType.TRIPLE_RIGHT_ANGLE_BRACKET.value + ) + } else { + createNewToken( + tokenType = TokenType.DOUBLE_RIGHT_ANGLE_BRACKET, + tokenLiteral = TokenType.DOUBLE_RIGHT_ANGLE_BRACKET.value + ) + } + } + + '=' -> { + readNextCharacter() + createNewToken( + tokenType = TokenType.RIGHT_ANGLE_BRACKET_EQUALS, + tokenLiteral = TokenType.RIGHT_ANGLE_BRACKET_EQUALS.value + ) + } + + else -> createNewToken( + tokenType = TokenType.RIGHT_ANGLE_BRACKET, + tokenLiteral = TokenType.RIGHT_ANGLE_BRACKET.value + ) + } + } + + EOF_CHARACTER -> t = Token.buildEOF( + lineNumber = tokenStartLineNumber!!, + columnNumber = tokenStartColumnNumber!!, + ) + + else -> { + t = if (currentCharacter.isAcceptableIdentifierStart()) { + val identifier = readIdentifier() + // this return is necessary to avoid the unnecessary readNextCharacter + // call after when block + return createNewToken( + tokenType = lookupIdentifier(identifier), + tokenLiteral = identifier, + ) + } else if (currentCharacter.isDigit()) { + readNumberLiteral() + } else { + createNewToken(tokenType = TokenType.ILLEGAL, tokenLiteral = "$currentCharacter") + } + } + } + + readNextCharacter() + + tokenStartLineNumber = null + tokenStartColumnNumber = null + + return t +} diff --git a/jvm-tools/interpreters/languages/java-lang-common/src/main/kotlin/interpreters/javalang/common/lexer/readComments.kt b/jvm-tools/interpreters/languages/java-lang-common/src/main/kotlin/interpreters/javalang/common/lexer/readComments.kt new file mode 100644 index 00000000..6c05f0d3 --- /dev/null +++ b/jvm-tools/interpreters/languages/java-lang-common/src/main/kotlin/interpreters/javalang/common/lexer/readComments.kt @@ -0,0 +1,48 @@ +package interpreters.javalang.common.lexer + +import kotlin.math.min + +internal fun Lexer.readSingleLineComment(): String { + if (currentCharacter != '/' || peekCharacter() != '/') { + throw Error("You should not attempt to read single line comment that doesn't start with '//'") + } + + // get past second '/' + readNextCharacter() + readNextCharacter() + + val startIndex = currentIndex + while (currentCharacter != EOF_CHARACTER && currentCharacter != '\n') { + readNextCharacter() + } + return input.slice(startIndex..min(currentIndex, input.lastIndex)) +} + +internal fun Lexer.readMultilineComment(): String { + if (currentCharacter != '/' || peekCharacter() != '*') { + throw Error("You should not attempt to read multiline comment that doesn't start with '/*'") + } + + // get past '/*' + readNextCharacter() + readNextCharacter() + + val startIndex = currentIndex + while ( + // end of multiline comment + (currentCharacter != '*' || peekCharacter() != '/') && + // end of file + currentCharacter != EOF_CHARACTER + ) { + readNextCharacter() + } + if (currentCharacter == EOF_CHARACTER) { + addError( + createLexerError("Unclosed multiline comment") + ) + return " Unclosed multiline comment" + } + val endIndex = currentIndex + readNextCharacter() + return input.slice(startIndex.. Unclosed character literal" + } + // current character is ending single quote + val character = input.slice(startIndex.. Invalid character sequence" + } + return character +} + +internal fun Lexer.readStringLiteral(): String { + if (currentCharacter != DOUBLE_QUOTE) { + throw Error("You should not attempt to read a string literal if it doesn't start with '\"'") + } + readNextCharacter() + val startIndex = currentIndex + + while (currentCharacter != DOUBLE_QUOTE && currentCharacter != EOF_CHARACTER) { + if (currentCharacter == '\\') { + readEscapeSequence() + } + readNextCharacter() + } + if (currentCharacter == EOF_CHARACTER) { + addError( + createLexerError("Unclosed string literal") + ) + return " Unclosed string literal" + } + // current character is ending double quote + return input.slice(startIndex.. createNewToken( + tokenType = TokenType.FLOAT_LITERAL, + tokenLiteral = input.slice(startIndex..currentIndex), + ) + + else -> createNewToken( + tokenType = TokenType.DOUBLE_LITERAL, + tokenLiteral = input.slice(startIndex..currentIndex), + ) + } + } + // In Kotlin, check if this is decimal point or object access point + containsDecimalPoint = true + } else if (peek.lowercase() == "f") { + readNextCharacter() + return createNewToken( + tokenType = TokenType.FLOAT_LITERAL, + tokenLiteral = input.slice(startIndex..currentIndex), + ) + } else if (peek.lowercase() == "l") { + readNextCharacter() + if (containsDecimalPoint) { + addError( + createLexerError("A floating point number cannot be long at the same time") + ) + return createNewToken( + tokenType = TokenType.ILLEGAL, + tokenLiteral = "$currentCharacter" + ) + } + return createNewToken( + tokenType = TokenType.LONG_LITERAL, + tokenLiteral = input.slice(startIndex..currentIndex) + ) + } else if (peek.isDigit()) { + // continue with the loop + } else { + break + } + + readNextCharacter() + peek = peekCharacter() + } + + if (containsDecimalPoint) { + return createNewToken( + tokenType = TokenType.DOUBLE_LITERAL, + tokenLiteral = input.slice(startIndex..currentIndex) + ) + } + return createNewToken( + tokenType = TokenType.INTEGER_LITERAL, + tokenLiteral = input.slice(startIndex..currentIndex) + ) +} diff --git a/jvm-tools/interpreters/languages/java-lang-common/src/test/kotlin/interpreters/javalang/common/lexer/LineAndColumnNumberTest.kt b/jvm-tools/interpreters/languages/java-lang-common/src/test/kotlin/interpreters/javalang/common/lexer/LineAndColumnNumberTest.kt deleted file mode 100644 index 9b5bb312..00000000 --- a/jvm-tools/interpreters/languages/java-lang-common/src/test/kotlin/interpreters/javalang/common/lexer/LineAndColumnNumberTest.kt +++ /dev/null @@ -1,7 +0,0 @@ -package interpreters.javalang.common.lexer - -class LineAndColumnNumberTest { - fun lexerNextToken_shouldIdentifyLineAndColumnNumbersOfAllTokens() { - - } -} \ No newline at end of file