-
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
refactor: move lexer functions to separate files
- Loading branch information
1 parent
3a0a122
commit 1269382
Showing
10 changed files
with
537 additions
and
518 deletions.
There are no files selected for viewing
511 changes: 0 additions & 511 deletions
511
...rs/languages/java-lang-common/src/main/kotlin/interpreters/javalang/common/lexer/Lexer.kt
Large diffs are not rendered by default.
Oops, something went wrong.
7 changes: 7 additions & 0 deletions
7
...ges/java-lang-common/src/main/kotlin/interpreters/javalang/common/lexer/charExtensions.kt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
package interpreters.javalang.common.lexer | ||
|
||
internal fun Char.isUnderscore(): Boolean = this == '_' | ||
|
||
internal fun Char.isAcceptableIdentifierStart(): Boolean = isUnderscore() || isLetter() | ||
|
||
internal fun Char.isAcceptableIdentifierNonStart(): Boolean = isAcceptableIdentifierStart() || isDigit() |
13 changes: 13 additions & 0 deletions
13
...s/java-lang-common/src/main/kotlin/interpreters/javalang/common/lexer/createLexerError.kt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
package interpreters.javalang.common.lexer | ||
|
||
import interpreters.javalang.common.errors.InterpreterError | ||
import interpreters.javalang.common.errors.InterpreterErrorType | ||
|
||
internal fun Lexer.createLexerError(errorMessage: String): InterpreterError { | ||
return InterpreterError( | ||
errorMessage = errorMessage, | ||
errorType = InterpreterErrorType.LEXER_ERROR, | ||
lineNumber = lineNumber(), | ||
columnNumber = columnNumber(), | ||
) | ||
} |
18 changes: 18 additions & 0 deletions
18
...ges/java-lang-common/src/main/kotlin/interpreters/javalang/common/lexer/createNewToken.kt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
package interpreters.javalang.common.lexer | ||
|
||
import interpreters.javalang.common.tokens.Token | ||
import interpreters.javalang.common.tokens.TokenType | ||
|
||
fun Lexer.createNewToken(tokenType: TokenType, tokenLiteral: String): Token { | ||
val lineNumber = tokenStartLineNumber | ||
val columnNumber = tokenStartColumnNumber | ||
if (lineNumber == null || columnNumber == null) { | ||
throw Error("lineNumber and columnNumber are not initialized!!") | ||
} | ||
return Token( | ||
tokenType = tokenType, | ||
tokenLiteral = tokenLiteral, | ||
lineNumber = lineNumber, | ||
columnNumber = columnNumber, | ||
) | ||
} |
20 changes: 20 additions & 0 deletions
20
...ava-lang-common/src/main/kotlin/interpreters/javalang/common/lexer/lineAndColumnNumber.kt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
package interpreters.javalang.common.lexer | ||
|
||
fun Lexer.lineNumber(): Int { | ||
if (currentCharacter == EOF_CHARACTER) { | ||
return input.split("\n").size | ||
} | ||
|
||
return input.slice(0..currentIndex).count { it == '\n' } + 1 | ||
} | ||
|
||
fun Lexer.columnNumber(): Int { | ||
if (currentCharacter == EOF_CHARACTER) { | ||
return 0 | ||
} | ||
|
||
val linesUptoCurrentIndex = input.slice(0..currentIndex).split("\n").toMutableList() | ||
linesUptoCurrentIndex.removeLast() | ||
// currentIndex - (count of characters upto previous line) | ||
return currentIndex + 1 - linesUptoCurrentIndex.sumOf { it.length + 1 } | ||
} |
290 changes: 290 additions & 0 deletions
290
...anguages/java-lang-common/src/main/kotlin/interpreters/javalang/common/lexer/nextToken.kt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,290 @@ | ||
package interpreters.javalang.common.lexer | ||
|
||
import interpreters.javalang.common.tokens.Token | ||
import interpreters.javalang.common.tokens.TokenType | ||
import interpreters.javalang.common.tokens.lookupIdentifier | ||
|
||
fun Lexer.nextToken(): Token { | ||
lateinit var t: Token | ||
|
||
skipWhitespace() | ||
|
||
tokenStartLineNumber = lineNumber() | ||
tokenStartColumnNumber = columnNumber() | ||
|
||
when (currentCharacter) { | ||
'=' -> { | ||
t = if (peekCharacter() == '=') { | ||
readNextCharacter() | ||
createNewToken(tokenType = TokenType.DOUBLE_EQUALS, tokenLiteral = TokenType.DOUBLE_EQUALS.value) | ||
} else { | ||
createNewToken(tokenType = TokenType.EQUALS, tokenLiteral = TokenType.EQUALS.value) | ||
} | ||
} | ||
|
||
',' -> t = createNewToken(tokenType = TokenType.COMMA, tokenLiteral = TokenType.COMMA.value) | ||
'+' -> { | ||
val peek = peekCharacter() | ||
t = when (peek) { | ||
'+' -> { | ||
readNextCharacter() | ||
createNewToken(tokenType = TokenType.INCREMENT, tokenLiteral = TokenType.INCREMENT.value) | ||
} | ||
|
||
'=' -> { | ||
readNextCharacter() | ||
createNewToken(tokenType = TokenType.PLUS_EQUALS, tokenLiteral = TokenType.PLUS_EQUALS.value) | ||
} | ||
|
||
else -> createNewToken(tokenType = TokenType.PLUS, tokenLiteral = TokenType.PLUS.value) | ||
} | ||
} | ||
|
||
';' -> t = createNewToken(tokenType = TokenType.SEMICOLON, tokenLiteral = TokenType.SEMICOLON.value) | ||
'@' -> t = createNewToken(tokenType = TokenType.AT_SIGN, tokenLiteral = TokenType.AT_SIGN.value) | ||
|
||
'-' -> { | ||
val peek = peekCharacter() | ||
t = when (peek) { | ||
'-' -> { | ||
readNextCharacter() | ||
createNewToken(tokenType = TokenType.DECREMENT, tokenLiteral = TokenType.DECREMENT.value) | ||
} | ||
|
||
'=' -> { | ||
readNextCharacter() | ||
createNewToken(tokenType = TokenType.MINUS_EQUALS, tokenLiteral = TokenType.MINUS_EQUALS.value) | ||
} | ||
|
||
else -> createNewToken(tokenType = TokenType.MINUS, tokenLiteral = TokenType.MINUS.value) | ||
} | ||
} | ||
|
||
'*' -> { | ||
val peek = peekCharacter() | ||
t = when (peek) { | ||
'=' -> { | ||
readNextCharacter() | ||
createNewToken( | ||
tokenType = TokenType.ASTERISK_EQUALS, | ||
tokenLiteral = TokenType.ASTERISK_EQUALS.value | ||
) | ||
} | ||
|
||
else -> createNewToken(tokenType = TokenType.ASTERISK, tokenLiteral = TokenType.ASTERISK.value) | ||
} | ||
} | ||
|
||
'/' -> { | ||
val peek = peekCharacter() | ||
t = when (peek) { | ||
'=' -> { | ||
readNextCharacter() | ||
createNewToken( | ||
tokenType = TokenType.FORWARD_SLASH_EQUALS, | ||
tokenLiteral = TokenType.FORWARD_SLASH_EQUALS.value | ||
) | ||
} | ||
|
||
'/' -> createNewToken(tokenType = TokenType.SINGLE_LINE_COMMENT, tokenLiteral = readSingleLineComment()) | ||
'*' -> createNewToken(tokenType = TokenType.MULTI_LINE_COMMENT, tokenLiteral = readMultilineComment()) | ||
|
||
else -> createNewToken( | ||
tokenType = TokenType.FORWARD_SLASH, | ||
tokenLiteral = TokenType.FORWARD_SLASH.value | ||
) | ||
} | ||
} | ||
|
||
'\\' -> t = createNewToken(tokenType = TokenType.BACK_SLASH, tokenLiteral = TokenType.BACK_SLASH.value) | ||
'%' -> { | ||
val peek = peekCharacter() | ||
t = when (peek) { | ||
'=' -> { | ||
readNextCharacter() | ||
createNewToken(tokenType = TokenType.MODULUS_EQUALS, tokenLiteral = TokenType.MODULUS_EQUALS.value) | ||
} | ||
|
||
else -> createNewToken(tokenType = TokenType.MODULUS, tokenLiteral = TokenType.MODULUS.value) | ||
} | ||
} | ||
|
||
'!' -> { | ||
val peek = peekCharacter() | ||
t = when (peek) { | ||
'=' -> { | ||
readNextCharacter() | ||
createNewToken(tokenType = TokenType.BANG_EQUALS, tokenLiteral = TokenType.BANG_EQUALS.value) | ||
} | ||
|
||
else -> createNewToken(tokenType = TokenType.BANG, tokenLiteral = TokenType.BANG.value) | ||
} | ||
} | ||
|
||
'&' -> { | ||
val peek = peekCharacter() | ||
t = when (peek) { | ||
'&' -> { | ||
readNextCharacter() | ||
createNewToken( | ||
tokenType = TokenType.DOUBLE_AMPERSAND, | ||
tokenLiteral = TokenType.DOUBLE_AMPERSAND.value | ||
) | ||
} | ||
|
||
else -> createNewToken(tokenType = TokenType.AMPERSAND, tokenLiteral = TokenType.AMPERSAND.value) | ||
} | ||
} | ||
|
||
'|' -> { | ||
val peek = peekCharacter() | ||
t = when (peek) { | ||
'|' -> { | ||
readNextCharacter() | ||
createNewToken( | ||
tokenType = TokenType.DOUBLE_VERTICAL_BAR, | ||
tokenLiteral = TokenType.DOUBLE_VERTICAL_BAR.value | ||
) | ||
} | ||
|
||
else -> createNewToken(tokenType = TokenType.VERTICAL_BAR, tokenLiteral = TokenType.VERTICAL_BAR.value) | ||
} | ||
} | ||
|
||
'^' -> { | ||
val peek = peekCharacter() | ||
t = when (peek) { | ||
'=' -> { | ||
readNextCharacter() | ||
createNewToken(tokenType = TokenType.CARET_EQUALS, tokenLiteral = TokenType.CARET_EQUALS.value) | ||
} | ||
|
||
else -> createNewToken(tokenType = TokenType.CARET, tokenLiteral = TokenType.CARET.value) | ||
} | ||
} | ||
|
||
'?' -> t = createNewToken(tokenType = TokenType.QUESTION, tokenLiteral = TokenType.QUESTION.value) | ||
':' -> t = createNewToken(tokenType = TokenType.COLON, tokenLiteral = TokenType.COLON.value) | ||
'.' -> { | ||
t = if (peekCharacter().isDigit()) { | ||
readNumberLiteral() | ||
} else { | ||
createNewToken(tokenType = TokenType.DOT, tokenLiteral = TokenType.DOT.value) | ||
} | ||
} | ||
|
||
'~' -> t = createNewToken(tokenType = TokenType.TILDE, tokenLiteral = TokenType.TILDE.value) | ||
'\'' -> t = createNewToken(tokenType = TokenType.CHARACTER_LITERAL, tokenLiteral = readCharacterLiteral()) | ||
'"' -> t = createNewToken(tokenType = TokenType.STRING_LITERAL, tokenLiteral = readStringLiteral()) | ||
'(' -> t = | ||
createNewToken(tokenType = TokenType.LEFT_PARENTHESIS, tokenLiteral = TokenType.LEFT_PARENTHESIS.value) | ||
|
||
')' -> t = | ||
createNewToken(tokenType = TokenType.RIGHT_PARENTHESIS, tokenLiteral = TokenType.RIGHT_PARENTHESIS.value) | ||
|
||
'{' -> t = | ||
createNewToken(tokenType = TokenType.LEFT_CURLY_BRACE, tokenLiteral = TokenType.LEFT_CURLY_BRACE.value) | ||
|
||
'}' -> t = | ||
createNewToken(tokenType = TokenType.RIGHT_CURLY_BRACE, tokenLiteral = TokenType.RIGHT_CURLY_BRACE.value) | ||
|
||
'[' -> t = createNewToken( | ||
tokenType = TokenType.LEFT_SQUARE_BRACKET, | ||
tokenLiteral = TokenType.LEFT_SQUARE_BRACKET.value | ||
) | ||
|
||
']' -> t = | ||
createNewToken( | ||
tokenType = TokenType.RIGHT_SQUARE_BRACKET, | ||
tokenLiteral = TokenType.RIGHT_SQUARE_BRACKET.value | ||
) | ||
|
||
'<' -> { | ||
val peek = peekCharacter() | ||
t = when (peek) { | ||
'<' -> { | ||
readNextCharacter() | ||
createNewToken( | ||
tokenType = TokenType.DOUBLE_LEFT_ANGLE_BRACKET, | ||
tokenLiteral = TokenType.DOUBLE_LEFT_ANGLE_BRACKET.value | ||
) | ||
} | ||
|
||
'=' -> { | ||
readNextCharacter() | ||
createNewToken( | ||
tokenType = TokenType.LEFT_ANGLE_BRACKET_EQUALS, | ||
tokenLiteral = TokenType.LEFT_ANGLE_BRACKET_EQUALS.value | ||
) | ||
} | ||
|
||
else -> createNewToken( | ||
tokenType = TokenType.LEFT_ANGLE_BRACKET, | ||
tokenLiteral = TokenType.LEFT_ANGLE_BRACKET.value | ||
) | ||
} | ||
} | ||
|
||
'>' -> { | ||
val peek = peekCharacter() | ||
t = when (peek) { | ||
'>' -> { | ||
readNextCharacter() | ||
if (peekCharacter() == '>') { | ||
readNextCharacter() | ||
createNewToken( | ||
tokenType = TokenType.TRIPLE_RIGHT_ANGLE_BRACKET, | ||
tokenLiteral = TokenType.TRIPLE_RIGHT_ANGLE_BRACKET.value | ||
) | ||
} else { | ||
createNewToken( | ||
tokenType = TokenType.DOUBLE_RIGHT_ANGLE_BRACKET, | ||
tokenLiteral = TokenType.DOUBLE_RIGHT_ANGLE_BRACKET.value | ||
) | ||
} | ||
} | ||
|
||
'=' -> { | ||
readNextCharacter() | ||
createNewToken( | ||
tokenType = TokenType.RIGHT_ANGLE_BRACKET_EQUALS, | ||
tokenLiteral = TokenType.RIGHT_ANGLE_BRACKET_EQUALS.value | ||
) | ||
} | ||
|
||
else -> createNewToken( | ||
tokenType = TokenType.RIGHT_ANGLE_BRACKET, | ||
tokenLiteral = TokenType.RIGHT_ANGLE_BRACKET.value | ||
) | ||
} | ||
} | ||
|
||
EOF_CHARACTER -> t = Token.buildEOF( | ||
lineNumber = tokenStartLineNumber!!, | ||
columnNumber = tokenStartColumnNumber!!, | ||
) | ||
|
||
else -> { | ||
t = if (currentCharacter.isAcceptableIdentifierStart()) { | ||
val identifier = readIdentifier() | ||
// this return is necessary to avoid the unnecessary readNextCharacter | ||
// call after when block | ||
return createNewToken( | ||
tokenType = lookupIdentifier(identifier), | ||
tokenLiteral = identifier, | ||
) | ||
} else if (currentCharacter.isDigit()) { | ||
readNumberLiteral() | ||
} else { | ||
createNewToken(tokenType = TokenType.ILLEGAL, tokenLiteral = "$currentCharacter") | ||
} | ||
} | ||
} | ||
|
||
readNextCharacter() | ||
|
||
tokenStartLineNumber = null | ||
tokenStartColumnNumber = null | ||
|
||
return t | ||
} |
Oops, something went wrong.